##// END OF EJS Templates
revlog: regroup parsing code
Matt Mackall -
r4971:3e6dae27 default
parent child Browse files
Show More
@@ -1,1292 +1,1297 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 # revlog version strings
18 # revlog version strings
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGNG = 1
20 REVLOGNG = 1
21
21
22 # revlog flags
22 # revlog flags
23 REVLOGNGINLINEDATA = (1 << 16)
23 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25
25
26 REVLOG_DEFAULT_FORMAT = REVLOGNG
26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28
28
29 def hash(text, p1, p2):
29 def hash(text, p1, p2):
30 """generate a hash from the given text and its parent hashes
30 """generate a hash from the given text and its parent hashes
31
31
32 This hash combines both the current file contents and its history
32 This hash combines both the current file contents and its history
33 in a manner that makes it easy to distinguish nodes with the same
33 in a manner that makes it easy to distinguish nodes with the same
34 content in the revision graph.
34 content in the revision graph.
35 """
35 """
36 l = [p1, p2]
36 l = [p1, p2]
37 l.sort()
37 l.sort()
38 s = sha.new(l[0])
38 s = sha.new(l[0])
39 s.update(l[1])
39 s.update(l[1])
40 s.update(text)
40 s.update(text)
41 return s.digest()
41 return s.digest()
42
42
43 def compress(text):
43 def compress(text):
44 """ generate a possibly-compressed representation of text """
44 """ generate a possibly-compressed representation of text """
45 if not text: return ("", text)
45 if not text: return ("", text)
46 if len(text) < 44:
46 if len(text) < 44:
47 if text[0] == '\0': return ("", text)
47 if text[0] == '\0': return ("", text)
48 return ('u', text)
48 return ('u', text)
49 bin = zlib.compress(text)
49 bin = zlib.compress(text)
50 if len(bin) > len(text):
50 if len(bin) > len(text):
51 if text[0] == '\0': return ("", text)
51 if text[0] == '\0': return ("", text)
52 return ('u', text)
52 return ('u', text)
53 return ("", bin)
53 return ("", bin)
54
54
55 def decompress(bin):
55 def decompress(bin):
56 """ decompress the given input """
56 """ decompress the given input """
57 if not bin: return bin
57 if not bin: return bin
58 t = bin[0]
58 t = bin[0]
59 if t == '\0': return bin
59 if t == '\0': return bin
60 if t == 'x': return zlib.decompress(bin)
60 if t == 'x': return zlib.decompress(bin)
61 if t == 'u': return bin[1:]
61 if t == 'u': return bin[1:]
62 raise RevlogError(_("unknown compression type %r") % t)
62 raise RevlogError(_("unknown compression type %r") % t)
63
63
64 indexformatv0 = ">4l20s20s20s"
64 indexformatv0 = ">4l20s20s20s"
65 v0shaoffset = 56
65 v0shaoffset = 56
66 # index ng:
66 # index ng:
67 # 6 bytes offset
67 # 6 bytes offset
68 # 2 bytes flags
68 # 2 bytes flags
69 # 4 bytes compressed length
69 # 4 bytes compressed length
70 # 4 bytes uncompressed length
70 # 4 bytes uncompressed length
71 # 4 bytes: base rev
71 # 4 bytes: base rev
72 # 4 bytes link rev
72 # 4 bytes link rev
73 # 4 bytes parent 1 rev
73 # 4 bytes parent 1 rev
74 # 4 bytes parent 2 rev
74 # 4 bytes parent 2 rev
75 # 32 bytes: nodeid
75 # 32 bytes: nodeid
76 indexformatng = ">Qiiiiii20s12x"
76 indexformatng = ">Qiiiiii20s12x"
77 ngshaoffset = 32
77 ngshaoffset = 32
78 versionformat = ">I"
78 versionformat = ">I"
79
79
80 class lazyparser(object):
80 class lazyparser(object):
81 """
81 """
82 this class avoids the need to parse the entirety of large indices
82 this class avoids the need to parse the entirety of large indices
83 """
83 """
84
84
85 # lazyparser is not safe to use on windows if win32 extensions not
85 # lazyparser is not safe to use on windows if win32 extensions not
86 # available. it keeps file handle open, which make it not possible
86 # available. it keeps file handle open, which make it not possible
87 # to break hardlinks on local cloned repos.
87 # to break hardlinks on local cloned repos.
88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 hasattr(util, 'win32api'))
89 hasattr(util, 'win32api'))
90
90
91 def __init__(self, dataf, size, indexformat, shaoffset):
91 def __init__(self, dataf, size, indexformat, shaoffset):
92 self.dataf = dataf
92 self.dataf = dataf
93 self.format = indexformat
93 self.format = indexformat
94 self.s = struct.calcsize(indexformat)
94 self.s = struct.calcsize(indexformat)
95 self.indexformat = indexformat
95 self.indexformat = indexformat
96 self.datasize = size
96 self.datasize = size
97 self.l = size/self.s
97 self.l = size/self.s
98 self.index = [None] * self.l
98 self.index = [None] * self.l
99 self.map = {nullid: nullrev}
99 self.map = {nullid: nullrev}
100 self.allmap = 0
100 self.allmap = 0
101 self.all = 0
101 self.all = 0
102 self.mapfind_count = 0
102 self.mapfind_count = 0
103 self.shaoffset = shaoffset
103 self.shaoffset = shaoffset
104
104
105 def loadmap(self):
105 def loadmap(self):
106 """
106 """
107 during a commit, we need to make sure the rev being added is
107 during a commit, we need to make sure the rev being added is
108 not a duplicate. This requires loading the entire index,
108 not a duplicate. This requires loading the entire index,
109 which is fairly slow. loadmap can load up just the node map,
109 which is fairly slow. loadmap can load up just the node map,
110 which takes much less time.
110 which takes much less time.
111 """
111 """
112 if self.allmap: return
112 if self.allmap: return
113 end = self.datasize
113 end = self.datasize
114 self.allmap = 1
114 self.allmap = 1
115 cur = 0
115 cur = 0
116 count = 0
116 count = 0
117 blocksize = self.s * 256
117 blocksize = self.s * 256
118 self.dataf.seek(0)
118 self.dataf.seek(0)
119 while cur < end:
119 while cur < end:
120 data = self.dataf.read(blocksize)
120 data = self.dataf.read(blocksize)
121 off = 0
121 off = 0
122 for x in xrange(256):
122 for x in xrange(256):
123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
124 self.map[n] = count
124 self.map[n] = count
125 count += 1
125 count += 1
126 if count >= self.l:
126 if count >= self.l:
127 break
127 break
128 off += self.s
128 off += self.s
129 cur += blocksize
129 cur += blocksize
130
130
131 def loadblock(self, blockstart, blocksize, data=None):
131 def loadblock(self, blockstart, blocksize, data=None):
132 if self.all: return
132 if self.all: return
133 if data is None:
133 if data is None:
134 self.dataf.seek(blockstart)
134 self.dataf.seek(blockstart)
135 if blockstart + blocksize > self.datasize:
135 if blockstart + blocksize > self.datasize:
136 # the revlog may have grown since we've started running,
136 # the revlog may have grown since we've started running,
137 # but we don't have space in self.index for more entries.
137 # but we don't have space in self.index for more entries.
138 # limit blocksize so that we don't get too much data.
138 # limit blocksize so that we don't get too much data.
139 blocksize = max(self.datasize - blockstart, 0)
139 blocksize = max(self.datasize - blockstart, 0)
140 data = self.dataf.read(blocksize)
140 data = self.dataf.read(blocksize)
141 lend = len(data) / self.s
141 lend = len(data) / self.s
142 i = blockstart / self.s
142 i = blockstart / self.s
143 off = 0
143 off = 0
144 # lazyindex supports __delitem__
144 # lazyindex supports __delitem__
145 if lend > len(self.index) - i:
145 if lend > len(self.index) - i:
146 lend = len(self.index) - i
146 lend = len(self.index) - i
147 for x in xrange(lend):
147 for x in xrange(lend):
148 if self.index[i + x] == None:
148 if self.index[i + x] == None:
149 b = data[off : off + self.s]
149 b = data[off : off + self.s]
150 self.index[i + x] = b
150 self.index[i + x] = b
151 n = b[self.shaoffset:self.shaoffset + 20]
151 n = b[self.shaoffset:self.shaoffset + 20]
152 self.map[n] = i + x
152 self.map[n] = i + x
153 off += self.s
153 off += self.s
154
154
155 def findnode(self, node):
155 def findnode(self, node):
156 """search backwards through the index file for a specific node"""
156 """search backwards through the index file for a specific node"""
157 if self.allmap: return None
157 if self.allmap: return None
158
158
159 # hg log will cause many many searches for the manifest
159 # hg log will cause many many searches for the manifest
160 # nodes. After we get called a few times, just load the whole
160 # nodes. After we get called a few times, just load the whole
161 # thing.
161 # thing.
162 if self.mapfind_count > 8:
162 if self.mapfind_count > 8:
163 self.loadmap()
163 self.loadmap()
164 if node in self.map:
164 if node in self.map:
165 return node
165 return node
166 return None
166 return None
167 self.mapfind_count += 1
167 self.mapfind_count += 1
168 last = self.l - 1
168 last = self.l - 1
169 while self.index[last] != None:
169 while self.index[last] != None:
170 if last == 0:
170 if last == 0:
171 self.all = 1
171 self.all = 1
172 self.allmap = 1
172 self.allmap = 1
173 return None
173 return None
174 last -= 1
174 last -= 1
175 end = (last + 1) * self.s
175 end = (last + 1) * self.s
176 blocksize = self.s * 256
176 blocksize = self.s * 256
177 while end >= 0:
177 while end >= 0:
178 start = max(end - blocksize, 0)
178 start = max(end - blocksize, 0)
179 self.dataf.seek(start)
179 self.dataf.seek(start)
180 data = self.dataf.read(end - start)
180 data = self.dataf.read(end - start)
181 findend = end - start
181 findend = end - start
182 while True:
182 while True:
183 # we're searching backwards, so weh have to make sure
183 # we're searching backwards, so weh have to make sure
184 # we don't find a changeset where this node is a parent
184 # we don't find a changeset where this node is a parent
185 off = data.rfind(node, 0, findend)
185 off = data.rfind(node, 0, findend)
186 findend = off
186 findend = off
187 if off >= 0:
187 if off >= 0:
188 i = off / self.s
188 i = off / self.s
189 off = i * self.s
189 off = i * self.s
190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
191 if n == node:
191 if n == node:
192 self.map[n] = i + start / self.s
192 self.map[n] = i + start / self.s
193 return node
193 return node
194 else:
194 else:
195 break
195 break
196 end -= blocksize
196 end -= blocksize
197 return None
197 return None
198
198
199 def loadindex(self, i=None, end=None):
199 def loadindex(self, i=None, end=None):
200 if self.all: return
200 if self.all: return
201 all = False
201 all = False
202 if i == None:
202 if i == None:
203 blockstart = 0
203 blockstart = 0
204 blocksize = (512 / self.s) * self.s
204 blocksize = (512 / self.s) * self.s
205 end = self.datasize
205 end = self.datasize
206 all = True
206 all = True
207 else:
207 else:
208 if end:
208 if end:
209 blockstart = i * self.s
209 blockstart = i * self.s
210 end = end * self.s
210 end = end * self.s
211 blocksize = end - blockstart
211 blocksize = end - blockstart
212 else:
212 else:
213 blockstart = (i & ~63) * self.s
213 blockstart = (i & ~63) * self.s
214 blocksize = self.s * 64
214 blocksize = self.s * 64
215 end = blockstart + blocksize
215 end = blockstart + blocksize
216 while blockstart < end:
216 while blockstart < end:
217 self.loadblock(blockstart, blocksize)
217 self.loadblock(blockstart, blocksize)
218 blockstart += blocksize
218 blockstart += blocksize
219 if all: self.all = True
219 if all: self.all = True
220
220
221 class lazyindex(object):
221 class lazyindex(object):
222 """a lazy version of the index array"""
222 """a lazy version of the index array"""
223 def __init__(self, parser):
223 def __init__(self, parser):
224 self.p = parser
224 self.p = parser
225 def __len__(self):
225 def __len__(self):
226 return len(self.p.index)
226 return len(self.p.index)
227 def load(self, pos):
227 def load(self, pos):
228 if pos < 0:
228 if pos < 0:
229 pos += len(self.p.index)
229 pos += len(self.p.index)
230 self.p.loadindex(pos)
230 self.p.loadindex(pos)
231 return self.p.index[pos]
231 return self.p.index[pos]
232 def __getitem__(self, pos):
232 def __getitem__(self, pos):
233 ret = self.p.index[pos] or self.load(pos)
233 ret = self.p.index[pos] or self.load(pos)
234 if isinstance(ret, str):
234 if isinstance(ret, str):
235 ret = struct.unpack(self.p.indexformat, ret)
235 ret = struct.unpack(self.p.indexformat, ret)
236 return ret
236 return ret
237 def __setitem__(self, pos, item):
237 def __setitem__(self, pos, item):
238 self.p.index[pos] = item
238 self.p.index[pos] = item
239 def __delitem__(self, pos):
239 def __delitem__(self, pos):
240 del self.p.index[pos]
240 del self.p.index[pos]
241 def append(self, e):
241 def append(self, e):
242 self.p.index.append(e)
242 self.p.index.append(e)
243
243
244 class lazymap(object):
244 class lazymap(object):
245 """a lazy version of the node map"""
245 """a lazy version of the node map"""
246 def __init__(self, parser):
246 def __init__(self, parser):
247 self.p = parser
247 self.p = parser
248 def load(self, key):
248 def load(self, key):
249 n = self.p.findnode(key)
249 n = self.p.findnode(key)
250 if n == None:
250 if n == None:
251 raise KeyError(key)
251 raise KeyError(key)
252 def __contains__(self, key):
252 def __contains__(self, key):
253 if key in self.p.map:
253 if key in self.p.map:
254 return True
254 return True
255 self.p.loadmap()
255 self.p.loadmap()
256 return key in self.p.map
256 return key in self.p.map
257 def __iter__(self):
257 def __iter__(self):
258 yield nullid
258 yield nullid
259 for i in xrange(self.p.l):
259 for i in xrange(self.p.l):
260 ret = self.p.index[i]
260 ret = self.p.index[i]
261 if not ret:
261 if not ret:
262 self.p.loadindex(i)
262 self.p.loadindex(i)
263 ret = self.p.index[i]
263 ret = self.p.index[i]
264 if isinstance(ret, str):
264 if isinstance(ret, str):
265 ret = struct.unpack(self.p.indexformat, ret)
265 ret = struct.unpack(self.p.indexformat, ret)
266 yield ret[-1]
266 yield ret[-1]
267 def __getitem__(self, key):
267 def __getitem__(self, key):
268 try:
268 try:
269 return self.p.map[key]
269 return self.p.map[key]
270 except KeyError:
270 except KeyError:
271 try:
271 try:
272 self.load(key)
272 self.load(key)
273 return self.p.map[key]
273 return self.p.map[key]
274 except KeyError:
274 except KeyError:
275 raise KeyError("node " + hex(key))
275 raise KeyError("node " + hex(key))
276 def __setitem__(self, key, val):
276 def __setitem__(self, key, val):
277 self.p.map[key] = val
277 self.p.map[key] = val
278 def __delitem__(self, key):
278 def __delitem__(self, key):
279 del self.p.map[key]
279 del self.p.map[key]
280
280
281 class RevlogError(Exception): pass
281 class RevlogError(Exception): pass
282 class LookupError(RevlogError): pass
282 class LookupError(RevlogError): pass
283
283
284 def getoffset(q):
284 def getoffset(q):
285 if q & 0xFFFF:
285 if q & 0xFFFF:
286 raise RevlogError(_('incompatible revision flag %x') % q)
286 raise RevlogError(_('incompatible revision flag %x') % q)
287 return int(q >> 16)
287 return int(q >> 16)
288
288
289 def gettype(q):
289 def gettype(q):
290 return int(q & 0xFFFF)
290 return int(q & 0xFFFF)
291
291
292 def offset_type(offset, type):
292 def offset_type(offset, type):
293 return long(long(offset) << 16 | type)
293 return long(long(offset) << 16 | type)
294
294
295 class revlog(object):
295 class revlog(object):
296 """
296 """
297 the underlying revision storage object
297 the underlying revision storage object
298
298
299 A revlog consists of two parts, an index and the revision data.
299 A revlog consists of two parts, an index and the revision data.
300
300
301 The index is a file with a fixed record size containing
301 The index is a file with a fixed record size containing
302 information on each revision, includings its nodeid (hash), the
302 information on each revision, includings its nodeid (hash), the
303 nodeids of its parents, the position and offset of its data within
303 nodeids of its parents, the position and offset of its data within
304 the data file, and the revision it's based on. Finally, each entry
304 the data file, and the revision it's based on. Finally, each entry
305 contains a linkrev entry that can serve as a pointer to external
305 contains a linkrev entry that can serve as a pointer to external
306 data.
306 data.
307
307
308 The revision data itself is a linear collection of data chunks.
308 The revision data itself is a linear collection of data chunks.
309 Each chunk represents a revision and is usually represented as a
309 Each chunk represents a revision and is usually represented as a
310 delta against the previous chunk. To bound lookup time, runs of
310 delta against the previous chunk. To bound lookup time, runs of
311 deltas are limited to about 2 times the length of the original
311 deltas are limited to about 2 times the length of the original
312 version data. This makes retrieval of a version proportional to
312 version data. This makes retrieval of a version proportional to
313 its size, or O(1) relative to the number of revisions.
313 its size, or O(1) relative to the number of revisions.
314
314
315 Both pieces of the revlog are written to in an append-only
315 Both pieces of the revlog are written to in an append-only
316 fashion, which means we never need to rewrite a file to insert or
316 fashion, which means we never need to rewrite a file to insert or
317 remove data, and can use some simple techniques to avoid the need
317 remove data, and can use some simple techniques to avoid the need
318 for locking while reading.
318 for locking while reading.
319 """
319 """
320 def __init__(self, opener, indexfile):
320 def __init__(self, opener, indexfile):
321 """
321 """
322 create a revlog object
322 create a revlog object
323
323
324 opener is a function that abstracts the file opening operation
324 opener is a function that abstracts the file opening operation
325 and can be used to implement COW semantics or the like.
325 and can be used to implement COW semantics or the like.
326 """
326 """
327 self.indexfile = indexfile
327 self.indexfile = indexfile
328 self.datafile = indexfile[:-2] + ".d"
328 self.datafile = indexfile[:-2] + ".d"
329 self.opener = opener
329 self.opener = opener
330
330
331 self.indexstat = None
331 self.indexstat = None
332 self.cache = None
332 self.cache = None
333 self.chunkcache = None
333 self.chunkcache = None
334 self.defversion = REVLOG_DEFAULT_VERSION
334 self.defversion = REVLOG_DEFAULT_VERSION
335 if hasattr(opener, "defversion"):
335 if hasattr(opener, "defversion"):
336 self.defversion = opener.defversion
336 self.defversion = opener.defversion
337 if self.defversion & REVLOGNG:
337 if self.defversion & REVLOGNG:
338 self.defversion |= REVLOGNGINLINEDATA
338 self.defversion |= REVLOGNGINLINEDATA
339 self._load()
339 self._load()
340
340
341 def _load(self):
341 def _load(self):
342 v = self.defversion
342 v = self.defversion
343 try:
343 try:
344 f = self.opener(self.indexfile)
344 f = self.opener(self.indexfile)
345 i = f.read(4)
345 i = f.read(4)
346 f.seek(0)
346 f.seek(0)
347 if len(i) > 0:
347 if len(i) > 0:
348 v = struct.unpack(versionformat, i)[0]
348 v = struct.unpack(versionformat, i)[0]
349 except IOError, inst:
349 except IOError, inst:
350 if inst.errno != errno.ENOENT:
350 if inst.errno != errno.ENOENT:
351 raise
351 raise
352 i = ""
352 i = ""
353 else:
353 else:
354 try:
354 try:
355 st = util.fstat(f)
355 st = util.fstat(f)
356 except AttributeError, inst:
356 except AttributeError, inst:
357 st = None
357 st = None
358 else:
358 else:
359 oldst = self.indexstat
359 oldst = self.indexstat
360 if (oldst and st.st_dev == oldst.st_dev
360 if (oldst and st.st_dev == oldst.st_dev
361 and st.st_ino == oldst.st_ino
361 and st.st_ino == oldst.st_ino
362 and st.st_mtime == oldst.st_mtime
362 and st.st_mtime == oldst.st_mtime
363 and st.st_ctime == oldst.st_ctime
363 and st.st_ctime == oldst.st_ctime
364 and st.st_size == oldst.st_size):
364 and st.st_size == oldst.st_size):
365 return
365 return
366 self.indexstat = st
366 self.indexstat = st
367 flags = v & ~0xFFFF
367 flags = v & ~0xFFFF
368 fmt = v & 0xFFFF
368 fmt = v & 0xFFFF
369 if fmt == REVLOGV0:
369 if fmt == REVLOGV0:
370 if flags:
370 if flags:
371 raise RevlogError(_("index %s unknown flags %#04x for format v0")
371 raise RevlogError(_("index %s unknown flags %#04x for format v0")
372 % (self.indexfile, flags >> 16))
372 % (self.indexfile, flags >> 16))
373 elif fmt == REVLOGNG:
373 elif fmt == REVLOGNG:
374 if flags & ~REVLOGNGINLINEDATA:
374 if flags & ~REVLOGNGINLINEDATA:
375 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
375 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
376 % (self.indexfile, flags >> 16))
376 % (self.indexfile, flags >> 16))
377 else:
377 else:
378 raise RevlogError(_("index %s unknown format %d")
378 raise RevlogError(_("index %s unknown format %d")
379 % (self.indexfile, fmt))
379 % (self.indexfile, fmt))
380 self.version = v
380 self.version = v
381 if v == REVLOGV0:
381 self.nodemap = {nullid: nullrev}
382 self.index = []
383 self.indexformat = indexformatng
384 if self.version == REVLOGV0:
382 self.indexformat = indexformatv0
385 self.indexformat = indexformatv0
383 shaoffset = v0shaoffset
384 else:
385 self.indexformat = indexformatng
386 shaoffset = ngshaoffset
387
388 if i:
386 if i:
389 if (lazyparser.safe_to_use and not self._inline() and
387 self._parseindex(f, st)
390 st and st.st_size > 10000):
388
391 # big index, let's parse it on demand
389 def _parseindex(self, fp, st):
392 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
390 shaoffset = ngshaoffset
393 self.index = lazyindex(parser)
391 if self.version == REVLOGV0:
394 self.nodemap = lazymap(parser)
392 shaoffset = v0shaoffset
395 else:
393
396 self._parseindex(f, st)
394 if (lazyparser.safe_to_use and not self._inline() and
395 st and st.st_size > 10000):
396 # big index, let's parse it on demand
397 parser = lazyparser(fp, st.st_size, self.indexformat, shaoffset)
398 self.index = lazyindex(parser)
399 self.nodemap = lazymap(parser)
397 if self.version != REVLOGV0:
400 if self.version != REVLOGV0:
398 e = list(self.index[0])
401 e = list(self.index[0])
399 type = gettype(e[0])
402 type = gettype(e[0])
400 e[0] = offset_type(0, type)
403 e[0] = offset_type(0, type)
401 self.index[0] = e
404 self.index[0] = e
402 else:
405 return
403 self.nodemap = {nullid: nullrev}
404 self.index = []
405
406
406 def _parseindex(self, fp, st):
407 s = struct.calcsize(self.indexformat)
407 s = struct.calcsize(self.indexformat)
408 self.index = []
408 self.index = []
409 self.nodemap = {nullid: nullrev}
409 self.nodemap = {nullid: nullrev}
410 inline = self._inline()
410 inline = self._inline()
411 n = 0
411 n = 0
412 leftover = None
412 leftover = None
413 while True:
413 while True:
414 if st:
414 if st:
415 data = fp.read(65536)
415 data = fp.read(65536)
416 else:
416 else:
417 # hack for httprangereader, it doesn't do partial reads well
417 # hack for httprangereader, it doesn't do partial reads well
418 data = fp.read()
418 data = fp.read()
419 if not data:
419 if not data:
420 break
420 break
421 if n == 0 and self._inline():
421 if n == 0 and self._inline():
422 # cache the first chunk
422 # cache the first chunk
423 self.chunkcache = (0, data)
423 self.chunkcache = (0, data)
424 if leftover:
424 if leftover:
425 data = leftover + data
425 data = leftover + data
426 leftover = None
426 leftover = None
427 off = 0
427 off = 0
428 l = len(data)
428 l = len(data)
429 while off < l:
429 while off < l:
430 if l - off < s:
430 if l - off < s:
431 leftover = data[off:]
431 leftover = data[off:]
432 break
432 break
433 cur = data[off:off + s]
433 cur = data[off:off + s]
434 off += s
434 off += s
435 e = struct.unpack(self.indexformat, cur)
435 e = struct.unpack(self.indexformat, cur)
436 self.index.append(e)
436 self.index.append(e)
437 self.nodemap[e[-1]] = n
437 self.nodemap[e[-1]] = n
438 n += 1
438 n += 1
439 if inline:
439 if inline:
440 if e[1] < 0:
440 if e[1] < 0:
441 break
441 break
442 off += e[1]
442 off += e[1]
443 if off > l:
443 if off > l:
444 # some things don't seek well, just read it
444 # some things don't seek well, just read it
445 fp.read(off - l)
445 fp.read(off - l)
446 break
446 break
447 if not st:
447 if not st:
448 break
448 break
449
449
450 if self.version != REVLOGV0:
451 e = list(self.index[0])
452 type = gettype(e[0])
453 e[0] = offset_type(0, type)
454 self.index[0] = e
450
455
451 def _loadindex(self, start, end):
456 def _loadindex(self, start, end):
452 """load a block of indexes all at once from the lazy parser"""
457 """load a block of indexes all at once from the lazy parser"""
453 if isinstance(self.index, lazyindex):
458 if isinstance(self.index, lazyindex):
454 self.index.p.loadindex(start, end)
459 self.index.p.loadindex(start, end)
455
460
456 def _loadindexmap(self):
461 def _loadindexmap(self):
457 """loads both the map and the index from the lazy parser"""
462 """loads both the map and the index from the lazy parser"""
458 if isinstance(self.index, lazyindex):
463 if isinstance(self.index, lazyindex):
459 p = self.index.p
464 p = self.index.p
460 p.loadindex()
465 p.loadindex()
461 self.nodemap = p.map
466 self.nodemap = p.map
462
467
463 def _loadmap(self):
468 def _loadmap(self):
464 """loads the map from the lazy parser"""
469 """loads the map from the lazy parser"""
465 if isinstance(self.nodemap, lazymap):
470 if isinstance(self.nodemap, lazymap):
466 self.nodemap.p.loadmap()
471 self.nodemap.p.loadmap()
467 self.nodemap = self.nodemap.p.map
472 self.nodemap = self.nodemap.p.map
468
473
469 def _inline(self): return self.version & REVLOGNGINLINEDATA
474 def _inline(self): return self.version & REVLOGNGINLINEDATA
470
475
471 def tip(self): return self.node(len(self.index) - 1)
476 def tip(self): return self.node(len(self.index) - 1)
472 def count(self): return len(self.index)
477 def count(self): return len(self.index)
473 def node(self, rev):
478 def node(self, rev):
474 return rev == nullrev and nullid or self.index[rev][-1]
479 return rev == nullrev and nullid or self.index[rev][-1]
475 def rev(self, node):
480 def rev(self, node):
476 try:
481 try:
477 return self.nodemap[node]
482 return self.nodemap[node]
478 except KeyError:
483 except KeyError:
479 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
484 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
480 def linkrev(self, node):
485 def linkrev(self, node):
481 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
486 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
482 def parents(self, node):
487 def parents(self, node):
483 if node == nullid: return (nullid, nullid)
488 if node == nullid: return (nullid, nullid)
484 r = self.rev(node)
489 r = self.rev(node)
485 d = self.index[r][-3:-1]
490 d = self.index[r][-3:-1]
486 if self.version == REVLOGV0:
491 if self.version == REVLOGV0:
487 return d
492 return d
488 return (self.node(d[0]), self.node(d[1]))
493 return (self.node(d[0]), self.node(d[1]))
489 def parentrevs(self, rev):
494 def parentrevs(self, rev):
490 if rev == nullrev:
495 if rev == nullrev:
491 return (nullrev, nullrev)
496 return (nullrev, nullrev)
492 d = self.index[rev][-3:-1]
497 d = self.index[rev][-3:-1]
493 if self.version == REVLOGV0:
498 if self.version == REVLOGV0:
494 return (self.rev(d[0]), self.rev(d[1]))
499 return (self.rev(d[0]), self.rev(d[1]))
495 return d
500 return d
496 def start(self, rev):
501 def start(self, rev):
497 if rev == nullrev:
502 if rev == nullrev:
498 return 0
503 return 0
499 if self.version != REVLOGV0:
504 if self.version != REVLOGV0:
500 return getoffset(self.index[rev][0])
505 return getoffset(self.index[rev][0])
501 return self.index[rev][0]
506 return self.index[rev][0]
502
507
503 def end(self, rev): return self.start(rev) + self.length(rev)
508 def end(self, rev): return self.start(rev) + self.length(rev)
504
509
505 def size(self, rev):
510 def size(self, rev):
506 """return the length of the uncompressed text for a given revision"""
511 """return the length of the uncompressed text for a given revision"""
507 if rev == nullrev:
512 if rev == nullrev:
508 return 0
513 return 0
509 l = -1
514 l = -1
510 if self.version != REVLOGV0:
515 if self.version != REVLOGV0:
511 l = self.index[rev][2]
516 l = self.index[rev][2]
512 if l >= 0:
517 if l >= 0:
513 return l
518 return l
514
519
515 t = self.revision(self.node(rev))
520 t = self.revision(self.node(rev))
516 return len(t)
521 return len(t)
517
522
518 # alternate implementation, The advantage to this code is it
523 # alternate implementation, The advantage to this code is it
519 # will be faster for a single revision. But, the results are not
524 # will be faster for a single revision. But, the results are not
520 # cached, so finding the size of every revision will be slower.
525 # cached, so finding the size of every revision will be slower.
521 """
526 """
522 if self.cache and self.cache[1] == rev:
527 if self.cache and self.cache[1] == rev:
523 return len(self.cache[2])
528 return len(self.cache[2])
524
529
525 base = self.base(rev)
530 base = self.base(rev)
526 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
531 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
527 base = self.cache[1]
532 base = self.cache[1]
528 text = self.cache[2]
533 text = self.cache[2]
529 else:
534 else:
530 text = self.revision(self.node(base))
535 text = self.revision(self.node(base))
531
536
532 l = len(text)
537 l = len(text)
533 for x in xrange(base + 1, rev + 1):
538 for x in xrange(base + 1, rev + 1):
534 l = mdiff.patchedsize(l, self.chunk(x))
539 l = mdiff.patchedsize(l, self.chunk(x))
535 return l
540 return l
536 """
541 """
537
542
538 def length(self, rev):
543 def length(self, rev):
539 if rev == nullrev:
544 if rev == nullrev:
540 return 0
545 return 0
541 else:
546 else:
542 return self.index[rev][1]
547 return self.index[rev][1]
543 def base(self, rev):
548 def base(self, rev):
544 if (rev == nullrev):
549 if (rev == nullrev):
545 return nullrev
550 return nullrev
546 else:
551 else:
547 return self.index[rev][-5]
552 return self.index[rev][-5]
548
553
549 def reachable(self, node, stop=None):
554 def reachable(self, node, stop=None):
550 """return a hash of all nodes ancestral to a given node, including
555 """return a hash of all nodes ancestral to a given node, including
551 the node itself, stopping when stop is matched"""
556 the node itself, stopping when stop is matched"""
552 reachable = {}
557 reachable = {}
553 visit = [node]
558 visit = [node]
554 reachable[node] = 1
559 reachable[node] = 1
555 if stop:
560 if stop:
556 stopn = self.rev(stop)
561 stopn = self.rev(stop)
557 else:
562 else:
558 stopn = 0
563 stopn = 0
559 while visit:
564 while visit:
560 n = visit.pop(0)
565 n = visit.pop(0)
561 if n == stop:
566 if n == stop:
562 continue
567 continue
563 if n == nullid:
568 if n == nullid:
564 continue
569 continue
565 for p in self.parents(n):
570 for p in self.parents(n):
566 if self.rev(p) < stopn:
571 if self.rev(p) < stopn:
567 continue
572 continue
568 if p not in reachable:
573 if p not in reachable:
569 reachable[p] = 1
574 reachable[p] = 1
570 visit.append(p)
575 visit.append(p)
571 return reachable
576 return reachable
572
577
573 def nodesbetween(self, roots=None, heads=None):
578 def nodesbetween(self, roots=None, heads=None):
574 """Return a tuple containing three elements. Elements 1 and 2 contain
579 """Return a tuple containing three elements. Elements 1 and 2 contain
575 a final list bases and heads after all the unreachable ones have been
580 a final list bases and heads after all the unreachable ones have been
576 pruned. Element 0 contains a topologically sorted list of all
581 pruned. Element 0 contains a topologically sorted list of all
577
582
578 nodes that satisfy these constraints:
583 nodes that satisfy these constraints:
579 1. All nodes must be descended from a node in roots (the nodes on
584 1. All nodes must be descended from a node in roots (the nodes on
580 roots are considered descended from themselves).
585 roots are considered descended from themselves).
581 2. All nodes must also be ancestors of a node in heads (the nodes in
586 2. All nodes must also be ancestors of a node in heads (the nodes in
582 heads are considered to be their own ancestors).
587 heads are considered to be their own ancestors).
583
588
584 If roots is unspecified, nullid is assumed as the only root.
589 If roots is unspecified, nullid is assumed as the only root.
585 If heads is unspecified, it is taken to be the output of the
590 If heads is unspecified, it is taken to be the output of the
586 heads method (i.e. a list of all nodes in the repository that
591 heads method (i.e. a list of all nodes in the repository that
587 have no children)."""
592 have no children)."""
588 nonodes = ([], [], [])
593 nonodes = ([], [], [])
589 if roots is not None:
594 if roots is not None:
590 roots = list(roots)
595 roots = list(roots)
591 if not roots:
596 if not roots:
592 return nonodes
597 return nonodes
593 lowestrev = min([self.rev(n) for n in roots])
598 lowestrev = min([self.rev(n) for n in roots])
594 else:
599 else:
595 roots = [nullid] # Everybody's a descendent of nullid
600 roots = [nullid] # Everybody's a descendent of nullid
596 lowestrev = nullrev
601 lowestrev = nullrev
597 if (lowestrev == nullrev) and (heads is None):
602 if (lowestrev == nullrev) and (heads is None):
598 # We want _all_ the nodes!
603 # We want _all_ the nodes!
599 return ([self.node(r) for r in xrange(0, self.count())],
604 return ([self.node(r) for r in xrange(0, self.count())],
600 [nullid], list(self.heads()))
605 [nullid], list(self.heads()))
601 if heads is None:
606 if heads is None:
602 # All nodes are ancestors, so the latest ancestor is the last
607 # All nodes are ancestors, so the latest ancestor is the last
603 # node.
608 # node.
604 highestrev = self.count() - 1
609 highestrev = self.count() - 1
605 # Set ancestors to None to signal that every node is an ancestor.
610 # Set ancestors to None to signal that every node is an ancestor.
606 ancestors = None
611 ancestors = None
607 # Set heads to an empty dictionary for later discovery of heads
612 # Set heads to an empty dictionary for later discovery of heads
608 heads = {}
613 heads = {}
609 else:
614 else:
610 heads = list(heads)
615 heads = list(heads)
611 if not heads:
616 if not heads:
612 return nonodes
617 return nonodes
613 ancestors = {}
618 ancestors = {}
614 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Turn heads into a dictionary so we can remove 'fake' heads.
615 # Also, later we will be using it to filter out the heads we can't
620 # Also, later we will be using it to filter out the heads we can't
616 # find from roots.
621 # find from roots.
617 heads = dict.fromkeys(heads, 0)
622 heads = dict.fromkeys(heads, 0)
618 # Start at the top and keep marking parents until we're done.
623 # Start at the top and keep marking parents until we're done.
619 nodestotag = heads.keys()
624 nodestotag = heads.keys()
620 # Remember where the top was so we can use it as a limit later.
625 # Remember where the top was so we can use it as a limit later.
621 highestrev = max([self.rev(n) for n in nodestotag])
626 highestrev = max([self.rev(n) for n in nodestotag])
622 while nodestotag:
627 while nodestotag:
623 # grab a node to tag
628 # grab a node to tag
624 n = nodestotag.pop()
629 n = nodestotag.pop()
625 # Never tag nullid
630 # Never tag nullid
626 if n == nullid:
631 if n == nullid:
627 continue
632 continue
628 # A node's revision number represents its place in a
633 # A node's revision number represents its place in a
629 # topologically sorted list of nodes.
634 # topologically sorted list of nodes.
630 r = self.rev(n)
635 r = self.rev(n)
631 if r >= lowestrev:
636 if r >= lowestrev:
632 if n not in ancestors:
637 if n not in ancestors:
633 # If we are possibly a descendent of one of the roots
638 # If we are possibly a descendent of one of the roots
634 # and we haven't already been marked as an ancestor
639 # and we haven't already been marked as an ancestor
635 ancestors[n] = 1 # Mark as ancestor
640 ancestors[n] = 1 # Mark as ancestor
636 # Add non-nullid parents to list of nodes to tag.
641 # Add non-nullid parents to list of nodes to tag.
637 nodestotag.extend([p for p in self.parents(n) if
642 nodestotag.extend([p for p in self.parents(n) if
638 p != nullid])
643 p != nullid])
639 elif n in heads: # We've seen it before, is it a fake head?
644 elif n in heads: # We've seen it before, is it a fake head?
640 # So it is, real heads should not be the ancestors of
645 # So it is, real heads should not be the ancestors of
641 # any other heads.
646 # any other heads.
642 heads.pop(n)
647 heads.pop(n)
643 if not ancestors:
648 if not ancestors:
644 return nonodes
649 return nonodes
645 # Now that we have our set of ancestors, we want to remove any
650 # Now that we have our set of ancestors, we want to remove any
646 # roots that are not ancestors.
651 # roots that are not ancestors.
647
652
648 # If one of the roots was nullid, everything is included anyway.
653 # If one of the roots was nullid, everything is included anyway.
649 if lowestrev > nullrev:
654 if lowestrev > nullrev:
650 # But, since we weren't, let's recompute the lowest rev to not
655 # But, since we weren't, let's recompute the lowest rev to not
651 # include roots that aren't ancestors.
656 # include roots that aren't ancestors.
652
657
653 # Filter out roots that aren't ancestors of heads
658 # Filter out roots that aren't ancestors of heads
654 roots = [n for n in roots if n in ancestors]
659 roots = [n for n in roots if n in ancestors]
655 # Recompute the lowest revision
660 # Recompute the lowest revision
656 if roots:
661 if roots:
657 lowestrev = min([self.rev(n) for n in roots])
662 lowestrev = min([self.rev(n) for n in roots])
658 else:
663 else:
659 # No more roots? Return empty list
664 # No more roots? Return empty list
660 return nonodes
665 return nonodes
661 else:
666 else:
662 # We are descending from nullid, and don't need to care about
667 # We are descending from nullid, and don't need to care about
663 # any other roots.
668 # any other roots.
664 lowestrev = nullrev
669 lowestrev = nullrev
665 roots = [nullid]
670 roots = [nullid]
666 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 # values don't matter.
672 # values don't matter.
668 descendents = dict.fromkeys(roots, 1)
673 descendents = dict.fromkeys(roots, 1)
669 # Also, keep the original roots so we can filter out roots that aren't
674 # Also, keep the original roots so we can filter out roots that aren't
670 # 'real' roots (i.e. are descended from other roots).
675 # 'real' roots (i.e. are descended from other roots).
671 roots = descendents.copy()
676 roots = descendents.copy()
672 # Our topologically sorted list of output nodes.
677 # Our topologically sorted list of output nodes.
673 orderedout = []
678 orderedout = []
674 # Don't start at nullid since we don't want nullid in our output list,
679 # Don't start at nullid since we don't want nullid in our output list,
675 # and if nullid shows up in descedents, empty parents will look like
680 # and if nullid shows up in descedents, empty parents will look like
676 # they're descendents.
681 # they're descendents.
677 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 n = self.node(r)
683 n = self.node(r)
679 isdescendent = False
684 isdescendent = False
680 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 isdescendent = True
686 isdescendent = True
682 elif n in descendents:
687 elif n in descendents:
683 # n is already a descendent
688 # n is already a descendent
684 isdescendent = True
689 isdescendent = True
685 # This check only needs to be done here because all the roots
690 # This check only needs to be done here because all the roots
686 # will start being marked is descendents before the loop.
691 # will start being marked is descendents before the loop.
687 if n in roots:
692 if n in roots:
688 # If n was a root, check if it's a 'real' root.
693 # If n was a root, check if it's a 'real' root.
689 p = tuple(self.parents(n))
694 p = tuple(self.parents(n))
690 # If any of its parents are descendents, it's not a root.
695 # If any of its parents are descendents, it's not a root.
691 if (p[0] in descendents) or (p[1] in descendents):
696 if (p[0] in descendents) or (p[1] in descendents):
692 roots.pop(n)
697 roots.pop(n)
693 else:
698 else:
694 p = tuple(self.parents(n))
699 p = tuple(self.parents(n))
695 # A node is a descendent if either of its parents are
700 # A node is a descendent if either of its parents are
696 # descendents. (We seeded the dependents list with the roots
701 # descendents. (We seeded the dependents list with the roots
697 # up there, remember?)
702 # up there, remember?)
698 if (p[0] in descendents) or (p[1] in descendents):
703 if (p[0] in descendents) or (p[1] in descendents):
699 descendents[n] = 1
704 descendents[n] = 1
700 isdescendent = True
705 isdescendent = True
701 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 # Only include nodes that are both descendents and ancestors.
707 # Only include nodes that are both descendents and ancestors.
703 orderedout.append(n)
708 orderedout.append(n)
704 if (ancestors is not None) and (n in heads):
709 if (ancestors is not None) and (n in heads):
705 # We're trying to figure out which heads are reachable
710 # We're trying to figure out which heads are reachable
706 # from roots.
711 # from roots.
707 # Mark this head as having been reached
712 # Mark this head as having been reached
708 heads[n] = 1
713 heads[n] = 1
709 elif ancestors is None:
714 elif ancestors is None:
710 # Otherwise, we're trying to discover the heads.
715 # Otherwise, we're trying to discover the heads.
711 # Assume this is a head because if it isn't, the next step
716 # Assume this is a head because if it isn't, the next step
712 # will eventually remove it.
717 # will eventually remove it.
713 heads[n] = 1
718 heads[n] = 1
714 # But, obviously its parents aren't.
719 # But, obviously its parents aren't.
715 for p in self.parents(n):
720 for p in self.parents(n):
716 heads.pop(p, None)
721 heads.pop(p, None)
717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 roots = roots.keys()
723 roots = roots.keys()
719 assert orderedout
724 assert orderedout
720 assert roots
725 assert roots
721 assert heads
726 assert heads
722 return (orderedout, roots, heads)
727 return (orderedout, roots, heads)
723
728
724 def heads(self, start=None, stop=None):
729 def heads(self, start=None, stop=None):
725 """return the list of all nodes that have no children
730 """return the list of all nodes that have no children
726
731
727 if start is specified, only heads that are descendants of
732 if start is specified, only heads that are descendants of
728 start will be returned
733 start will be returned
729 if stop is specified, it will consider all the revs from stop
734 if stop is specified, it will consider all the revs from stop
730 as if they had no children
735 as if they had no children
731 """
736 """
732 if start is None:
737 if start is None:
733 start = nullid
738 start = nullid
734 if stop is None:
739 if stop is None:
735 stop = []
740 stop = []
736 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
741 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
737 startrev = self.rev(start)
742 startrev = self.rev(start)
738 reachable = {startrev: 1}
743 reachable = {startrev: 1}
739 heads = {startrev: 1}
744 heads = {startrev: 1}
740
745
741 parentrevs = self.parentrevs
746 parentrevs = self.parentrevs
742 for r in xrange(startrev + 1, self.count()):
747 for r in xrange(startrev + 1, self.count()):
743 for p in parentrevs(r):
748 for p in parentrevs(r):
744 if p in reachable:
749 if p in reachable:
745 if r not in stoprevs:
750 if r not in stoprevs:
746 reachable[r] = 1
751 reachable[r] = 1
747 heads[r] = 1
752 heads[r] = 1
748 if p in heads and p not in stoprevs:
753 if p in heads and p not in stoprevs:
749 del heads[p]
754 del heads[p]
750
755
751 return [self.node(r) for r in heads]
756 return [self.node(r) for r in heads]
752
757
753 def children(self, node):
758 def children(self, node):
754 """find the children of a given node"""
759 """find the children of a given node"""
755 c = []
760 c = []
756 p = self.rev(node)
761 p = self.rev(node)
757 for r in range(p + 1, self.count()):
762 for r in range(p + 1, self.count()):
758 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
763 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
759 if prevs:
764 if prevs:
760 for pr in prevs:
765 for pr in prevs:
761 if pr == p:
766 if pr == p:
762 c.append(self.node(r))
767 c.append(self.node(r))
763 elif p == nullrev:
768 elif p == nullrev:
764 c.append(self.node(r))
769 c.append(self.node(r))
765 return c
770 return c
766
771
767 def _match(self, id):
772 def _match(self, id):
768 if isinstance(id, (long, int)):
773 if isinstance(id, (long, int)):
769 # rev
774 # rev
770 return self.node(id)
775 return self.node(id)
771 if len(id) == 20:
776 if len(id) == 20:
772 # possibly a binary node
777 # possibly a binary node
773 # odds of a binary node being all hex in ASCII are 1 in 10**25
778 # odds of a binary node being all hex in ASCII are 1 in 10**25
774 try:
779 try:
775 node = id
780 node = id
776 r = self.rev(node) # quick search the index
781 r = self.rev(node) # quick search the index
777 return node
782 return node
778 except LookupError:
783 except LookupError:
779 pass # may be partial hex id
784 pass # may be partial hex id
780 try:
785 try:
781 # str(rev)
786 # str(rev)
782 rev = int(id)
787 rev = int(id)
783 if str(rev) != id: raise ValueError
788 if str(rev) != id: raise ValueError
784 if rev < 0: rev = self.count() + rev
789 if rev < 0: rev = self.count() + rev
785 if rev < 0 or rev >= self.count(): raise ValueError
790 if rev < 0 or rev >= self.count(): raise ValueError
786 return self.node(rev)
791 return self.node(rev)
787 except (ValueError, OverflowError):
792 except (ValueError, OverflowError):
788 pass
793 pass
789 if len(id) == 40:
794 if len(id) == 40:
790 try:
795 try:
791 # a full hex nodeid?
796 # a full hex nodeid?
792 node = bin(id)
797 node = bin(id)
793 r = self.rev(node)
798 r = self.rev(node)
794 return node
799 return node
795 except TypeError:
800 except TypeError:
796 pass
801 pass
797
802
798 def _partialmatch(self, id):
803 def _partialmatch(self, id):
799 if len(id) < 40:
804 if len(id) < 40:
800 try:
805 try:
801 # hex(node)[:...]
806 # hex(node)[:...]
802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
807 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 node = None
808 node = None
804 for n in self.nodemap:
809 for n in self.nodemap:
805 if n.startswith(bin_id) and hex(n).startswith(id):
810 if n.startswith(bin_id) and hex(n).startswith(id):
806 if node is not None:
811 if node is not None:
807 raise LookupError(_("Ambiguous identifier"))
812 raise LookupError(_("Ambiguous identifier"))
808 node = n
813 node = n
809 if node is not None:
814 if node is not None:
810 return node
815 return node
811 except TypeError:
816 except TypeError:
812 pass
817 pass
813
818
814 def lookup(self, id):
819 def lookup(self, id):
815 """locate a node based on:
820 """locate a node based on:
816 - revision number or str(revision number)
821 - revision number or str(revision number)
817 - nodeid or subset of hex nodeid
822 - nodeid or subset of hex nodeid
818 """
823 """
819
824
820 n = self._match(id)
825 n = self._match(id)
821 if n is not None:
826 if n is not None:
822 return n
827 return n
823 n = self._partialmatch(id)
828 n = self._partialmatch(id)
824 if n:
829 if n:
825 return n
830 return n
826
831
827 raise LookupError(_("No match found"))
832 raise LookupError(_("No match found"))
828
833
829 def cmp(self, node, text):
834 def cmp(self, node, text):
830 """compare text with a given file revision"""
835 """compare text with a given file revision"""
831 p1, p2 = self.parents(node)
836 p1, p2 = self.parents(node)
832 return hash(text, p1, p2) != node
837 return hash(text, p1, p2) != node
833
838
834 def diff(self, a, b):
839 def diff(self, a, b):
835 """return a delta between two revisions"""
840 """return a delta between two revisions"""
836 return mdiff.textdiff(a, b)
841 return mdiff.textdiff(a, b)
837
842
838 def patches(self, t, pl):
843 def patches(self, t, pl):
839 """apply a list of patches to a string"""
844 """apply a list of patches to a string"""
840 return mdiff.patches(t, pl)
845 return mdiff.patches(t, pl)
841
846
842 def chunk(self, rev, df=None, cachelen=4096):
847 def chunk(self, rev, df=None, cachelen=4096):
843 start, length = self.start(rev), self.length(rev)
848 start, length = self.start(rev), self.length(rev)
844 inline = self._inline()
849 inline = self._inline()
845 if inline:
850 if inline:
846 start += (rev + 1) * struct.calcsize(self.indexformat)
851 start += (rev + 1) * struct.calcsize(self.indexformat)
847 end = start + length
852 end = start + length
848 def loadcache(df):
853 def loadcache(df):
849 cache_length = max(cachelen, length) # 4k
854 cache_length = max(cachelen, length) # 4k
850 if not df:
855 if not df:
851 if inline:
856 if inline:
852 df = self.opener(self.indexfile)
857 df = self.opener(self.indexfile)
853 else:
858 else:
854 df = self.opener(self.datafile)
859 df = self.opener(self.datafile)
855 df.seek(start)
860 df.seek(start)
856 self.chunkcache = (start, df.read(cache_length))
861 self.chunkcache = (start, df.read(cache_length))
857
862
858 if not self.chunkcache:
863 if not self.chunkcache:
859 loadcache(df)
864 loadcache(df)
860
865
861 cache_start = self.chunkcache[0]
866 cache_start = self.chunkcache[0]
862 cache_end = cache_start + len(self.chunkcache[1])
867 cache_end = cache_start + len(self.chunkcache[1])
863 if start >= cache_start and end <= cache_end:
868 if start >= cache_start and end <= cache_end:
864 # it is cached
869 # it is cached
865 offset = start - cache_start
870 offset = start - cache_start
866 else:
871 else:
867 loadcache(df)
872 loadcache(df)
868 offset = 0
873 offset = 0
869
874
870 #def checkchunk():
875 #def checkchunk():
871 # df = self.opener(self.datafile)
876 # df = self.opener(self.datafile)
872 # df.seek(start)
877 # df.seek(start)
873 # return df.read(length)
878 # return df.read(length)
874 #assert s == checkchunk()
879 #assert s == checkchunk()
875 return decompress(self.chunkcache[1][offset:offset + length])
880 return decompress(self.chunkcache[1][offset:offset + length])
876
881
877 def delta(self, node):
882 def delta(self, node):
878 """return or calculate a delta between a node and its predecessor"""
883 """return or calculate a delta between a node and its predecessor"""
879 r = self.rev(node)
884 r = self.rev(node)
880 return self.revdiff(r - 1, r)
885 return self.revdiff(r - 1, r)
881
886
882 def revdiff(self, rev1, rev2):
887 def revdiff(self, rev1, rev2):
883 """return or calculate a delta between two revisions"""
888 """return or calculate a delta between two revisions"""
884 b1 = self.base(rev1)
889 b1 = self.base(rev1)
885 b2 = self.base(rev2)
890 b2 = self.base(rev2)
886 if b1 == b2 and rev1 + 1 == rev2:
891 if b1 == b2 and rev1 + 1 == rev2:
887 return self.chunk(rev2)
892 return self.chunk(rev2)
888 else:
893 else:
889 return self.diff(self.revision(self.node(rev1)),
894 return self.diff(self.revision(self.node(rev1)),
890 self.revision(self.node(rev2)))
895 self.revision(self.node(rev2)))
891
896
892 def revision(self, node):
897 def revision(self, node):
893 """return an uncompressed revision of a given"""
898 """return an uncompressed revision of a given"""
894 if node == nullid: return ""
899 if node == nullid: return ""
895 if self.cache and self.cache[0] == node: return self.cache[2]
900 if self.cache and self.cache[0] == node: return self.cache[2]
896
901
897 # look up what we need to read
902 # look up what we need to read
898 text = None
903 text = None
899 rev = self.rev(node)
904 rev = self.rev(node)
900 base = self.base(rev)
905 base = self.base(rev)
901
906
902 if self._inline():
907 if self._inline():
903 # we probably have the whole chunk cached
908 # we probably have the whole chunk cached
904 df = None
909 df = None
905 else:
910 else:
906 df = self.opener(self.datafile)
911 df = self.opener(self.datafile)
907
912
908 # do we have useful data cached?
913 # do we have useful data cached?
909 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
914 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
910 base = self.cache[1]
915 base = self.cache[1]
911 text = self.cache[2]
916 text = self.cache[2]
912 self._loadindex(base, rev + 1)
917 self._loadindex(base, rev + 1)
913 else:
918 else:
914 self._loadindex(base, rev + 1)
919 self._loadindex(base, rev + 1)
915 text = self.chunk(base, df=df)
920 text = self.chunk(base, df=df)
916
921
917 bins = []
922 bins = []
918 for r in xrange(base + 1, rev + 1):
923 for r in xrange(base + 1, rev + 1):
919 bins.append(self.chunk(r, df=df))
924 bins.append(self.chunk(r, df=df))
920
925
921 text = self.patches(text, bins)
926 text = self.patches(text, bins)
922
927
923 p1, p2 = self.parents(node)
928 p1, p2 = self.parents(node)
924 if node != hash(text, p1, p2):
929 if node != hash(text, p1, p2):
925 raise RevlogError(_("integrity check failed on %s:%d")
930 raise RevlogError(_("integrity check failed on %s:%d")
926 % (self.datafile, rev))
931 % (self.datafile, rev))
927
932
928 self.cache = (node, rev, text)
933 self.cache = (node, rev, text)
929 return text
934 return text
930
935
931 def checkinlinesize(self, tr, fp=None):
936 def checkinlinesize(self, tr, fp=None):
932 if not self._inline():
937 if not self._inline():
933 return
938 return
934 if not fp:
939 if not fp:
935 fp = self.opener(self.indexfile, 'r')
940 fp = self.opener(self.indexfile, 'r')
936 fp.seek(0, 2)
941 fp.seek(0, 2)
937 size = fp.tell()
942 size = fp.tell()
938 if size < 131072:
943 if size < 131072:
939 return
944 return
940 trinfo = tr.find(self.indexfile)
945 trinfo = tr.find(self.indexfile)
941 if trinfo == None:
946 if trinfo == None:
942 raise RevlogError(_("%s not found in the transaction")
947 raise RevlogError(_("%s not found in the transaction")
943 % self.indexfile)
948 % self.indexfile)
944
949
945 trindex = trinfo[2]
950 trindex = trinfo[2]
946 dataoff = self.start(trindex)
951 dataoff = self.start(trindex)
947
952
948 tr.add(self.datafile, dataoff)
953 tr.add(self.datafile, dataoff)
949 df = self.opener(self.datafile, 'w')
954 df = self.opener(self.datafile, 'w')
950 calc = struct.calcsize(self.indexformat)
955 calc = struct.calcsize(self.indexformat)
951 for r in xrange(self.count()):
956 for r in xrange(self.count()):
952 start = self.start(r) + (r + 1) * calc
957 start = self.start(r) + (r + 1) * calc
953 length = self.length(r)
958 length = self.length(r)
954 fp.seek(start)
959 fp.seek(start)
955 d = fp.read(length)
960 d = fp.read(length)
956 df.write(d)
961 df.write(d)
957 fp.close()
962 fp.close()
958 df.close()
963 df.close()
959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
964 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 self.version &= ~(REVLOGNGINLINEDATA)
965 self.version &= ~(REVLOGNGINLINEDATA)
961 if self.count():
966 if self.count():
962 x = self.index[0]
967 x = self.index[0]
963 e = struct.pack(self.indexformat, *x)[4:]
968 e = struct.pack(self.indexformat, *x)[4:]
964 l = struct.pack(versionformat, self.version)
969 l = struct.pack(versionformat, self.version)
965 fp.write(l)
970 fp.write(l)
966 fp.write(e)
971 fp.write(e)
967
972
968 for i in xrange(1, self.count()):
973 for i in xrange(1, self.count()):
969 x = self.index[i]
974 x = self.index[i]
970 e = struct.pack(self.indexformat, *x)
975 e = struct.pack(self.indexformat, *x)
971 fp.write(e)
976 fp.write(e)
972
977
973 # if we don't call rename, the temp file will never replace the
978 # if we don't call rename, the temp file will never replace the
974 # real index
979 # real index
975 fp.rename()
980 fp.rename()
976
981
977 tr.replace(self.indexfile, trindex * calc)
982 tr.replace(self.indexfile, trindex * calc)
978 self.chunkcache = None
983 self.chunkcache = None
979
984
980 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
985 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
981 """add a revision to the log
986 """add a revision to the log
982
987
983 text - the revision data to add
988 text - the revision data to add
984 transaction - the transaction object used for rollback
989 transaction - the transaction object used for rollback
985 link - the linkrev data to add
990 link - the linkrev data to add
986 p1, p2 - the parent nodeids of the revision
991 p1, p2 - the parent nodeids of the revision
987 d - an optional precomputed delta
992 d - an optional precomputed delta
988 """
993 """
989 if not self._inline():
994 if not self._inline():
990 dfh = self.opener(self.datafile, "a")
995 dfh = self.opener(self.datafile, "a")
991 else:
996 else:
992 dfh = None
997 dfh = None
993 ifh = self.opener(self.indexfile, "a+")
998 ifh = self.opener(self.indexfile, "a+")
994 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
995
1000
996 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
997 if text is None: text = ""
1002 if text is None: text = ""
998 if p1 is None: p1 = self.tip()
1003 if p1 is None: p1 = self.tip()
999 if p2 is None: p2 = nullid
1004 if p2 is None: p2 = nullid
1000
1005
1001 node = hash(text, p1, p2)
1006 node = hash(text, p1, p2)
1002
1007
1003 if node in self.nodemap:
1008 if node in self.nodemap:
1004 return node
1009 return node
1005
1010
1006 n = self.count()
1011 n = self.count()
1007 t = n - 1
1012 t = n - 1
1008
1013
1009 if n:
1014 if n:
1010 base = self.base(t)
1015 base = self.base(t)
1011 start = self.start(base)
1016 start = self.start(base)
1012 end = self.end(t)
1017 end = self.end(t)
1013 if not d:
1018 if not d:
1014 prev = self.revision(self.tip())
1019 prev = self.revision(self.tip())
1015 d = self.diff(prev, text)
1020 d = self.diff(prev, text)
1016 data = compress(d)
1021 data = compress(d)
1017 l = len(data[1]) + len(data[0])
1022 l = len(data[1]) + len(data[0])
1018 dist = end - start + l
1023 dist = end - start + l
1019
1024
1020 # full versions are inserted when the needed deltas
1025 # full versions are inserted when the needed deltas
1021 # become comparable to the uncompressed text
1026 # become comparable to the uncompressed text
1022 if not n or dist > len(text) * 2:
1027 if not n or dist > len(text) * 2:
1023 data = compress(text)
1028 data = compress(text)
1024 l = len(data[1]) + len(data[0])
1029 l = len(data[1]) + len(data[0])
1025 base = n
1030 base = n
1026 else:
1031 else:
1027 base = self.base(t)
1032 base = self.base(t)
1028
1033
1029 offset = 0
1034 offset = 0
1030 if t >= 0:
1035 if t >= 0:
1031 offset = self.end(t)
1036 offset = self.end(t)
1032
1037
1033 if self.version == REVLOGV0:
1038 if self.version == REVLOGV0:
1034 e = (offset, l, base, link, p1, p2, node)
1039 e = (offset, l, base, link, p1, p2, node)
1035 else:
1040 else:
1036 e = (offset_type(offset, 0), l, len(text),
1041 e = (offset_type(offset, 0), l, len(text),
1037 base, link, self.rev(p1), self.rev(p2), node)
1042 base, link, self.rev(p1), self.rev(p2), node)
1038
1043
1039 self.index.append(e)
1044 self.index.append(e)
1040 self.nodemap[node] = n
1045 self.nodemap[node] = n
1041 entry = struct.pack(self.indexformat, *e)
1046 entry = struct.pack(self.indexformat, *e)
1042
1047
1043 if not self._inline():
1048 if not self._inline():
1044 transaction.add(self.datafile, offset)
1049 transaction.add(self.datafile, offset)
1045 transaction.add(self.indexfile, n * len(entry))
1050 transaction.add(self.indexfile, n * len(entry))
1046 if data[0]:
1051 if data[0]:
1047 dfh.write(data[0])
1052 dfh.write(data[0])
1048 dfh.write(data[1])
1053 dfh.write(data[1])
1049 dfh.flush()
1054 dfh.flush()
1050 else:
1055 else:
1051 ifh.seek(0, 2)
1056 ifh.seek(0, 2)
1052 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1057 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1053
1058
1054 if len(self.index) == 1 and self.version != REVLOGV0:
1059 if len(self.index) == 1 and self.version != REVLOGV0:
1055 l = struct.pack(versionformat, self.version)
1060 l = struct.pack(versionformat, self.version)
1056 ifh.write(l)
1061 ifh.write(l)
1057 entry = entry[4:]
1062 entry = entry[4:]
1058
1063
1059 ifh.write(entry)
1064 ifh.write(entry)
1060
1065
1061 if self._inline():
1066 if self._inline():
1062 ifh.write(data[0])
1067 ifh.write(data[0])
1063 ifh.write(data[1])
1068 ifh.write(data[1])
1064 self.checkinlinesize(transaction, ifh)
1069 self.checkinlinesize(transaction, ifh)
1065
1070
1066 self.cache = (node, n, text)
1071 self.cache = (node, n, text)
1067 return node
1072 return node
1068
1073
1069 def ancestor(self, a, b):
1074 def ancestor(self, a, b):
1070 """calculate the least common ancestor of nodes a and b"""
1075 """calculate the least common ancestor of nodes a and b"""
1071
1076
1072 def parents(rev):
1077 def parents(rev):
1073 return [p for p in self.parentrevs(rev) if p != nullrev]
1078 return [p for p in self.parentrevs(rev) if p != nullrev]
1074
1079
1075 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1080 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1076 if c is None:
1081 if c is None:
1077 return nullid
1082 return nullid
1078
1083
1079 return self.node(c)
1084 return self.node(c)
1080
1085
1081 def group(self, nodelist, lookup, infocollect=None):
1086 def group(self, nodelist, lookup, infocollect=None):
1082 """calculate a delta group
1087 """calculate a delta group
1083
1088
1084 Given a list of changeset revs, return a set of deltas and
1089 Given a list of changeset revs, return a set of deltas and
1085 metadata corresponding to nodes. the first delta is
1090 metadata corresponding to nodes. the first delta is
1086 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1091 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1087 have this parent as it has all history before these
1092 have this parent as it has all history before these
1088 changesets. parent is parent[0]
1093 changesets. parent is parent[0]
1089 """
1094 """
1090 revs = [self.rev(n) for n in nodelist]
1095 revs = [self.rev(n) for n in nodelist]
1091
1096
1092 # if we don't have any revisions touched by these changesets, bail
1097 # if we don't have any revisions touched by these changesets, bail
1093 if not revs:
1098 if not revs:
1094 yield changegroup.closechunk()
1099 yield changegroup.closechunk()
1095 return
1100 return
1096
1101
1097 # add the parent of the first rev
1102 # add the parent of the first rev
1098 p = self.parents(self.node(revs[0]))[0]
1103 p = self.parents(self.node(revs[0]))[0]
1099 revs.insert(0, self.rev(p))
1104 revs.insert(0, self.rev(p))
1100
1105
1101 # build deltas
1106 # build deltas
1102 for d in xrange(0, len(revs) - 1):
1107 for d in xrange(0, len(revs) - 1):
1103 a, b = revs[d], revs[d + 1]
1108 a, b = revs[d], revs[d + 1]
1104 nb = self.node(b)
1109 nb = self.node(b)
1105
1110
1106 if infocollect is not None:
1111 if infocollect is not None:
1107 infocollect(nb)
1112 infocollect(nb)
1108
1113
1109 d = self.revdiff(a, b)
1114 d = self.revdiff(a, b)
1110 p = self.parents(nb)
1115 p = self.parents(nb)
1111 meta = nb + p[0] + p[1] + lookup(nb)
1116 meta = nb + p[0] + p[1] + lookup(nb)
1112 yield changegroup.genchunk("%s%s" % (meta, d))
1117 yield changegroup.genchunk("%s%s" % (meta, d))
1113
1118
1114 yield changegroup.closechunk()
1119 yield changegroup.closechunk()
1115
1120
1116 def addgroup(self, revs, linkmapper, transaction, unique=0):
1121 def addgroup(self, revs, linkmapper, transaction, unique=0):
1117 """
1122 """
1118 add a delta group
1123 add a delta group
1119
1124
1120 given a set of deltas, add them to the revision log. the
1125 given a set of deltas, add them to the revision log. the
1121 first delta is against its parent, which should be in our
1126 first delta is against its parent, which should be in our
1122 log, the rest are against the previous delta.
1127 log, the rest are against the previous delta.
1123 """
1128 """
1124
1129
1125 #track the base of the current delta log
1130 #track the base of the current delta log
1126 r = self.count()
1131 r = self.count()
1127 t = r - 1
1132 t = r - 1
1128 node = None
1133 node = None
1129
1134
1130 base = prev = nullrev
1135 base = prev = nullrev
1131 start = end = textlen = 0
1136 start = end = textlen = 0
1132 if r:
1137 if r:
1133 end = self.end(t)
1138 end = self.end(t)
1134
1139
1135 ifh = self.opener(self.indexfile, "a+")
1140 ifh = self.opener(self.indexfile, "a+")
1136 ifh.seek(0, 2)
1141 ifh.seek(0, 2)
1137 transaction.add(self.indexfile, ifh.tell(), self.count())
1142 transaction.add(self.indexfile, ifh.tell(), self.count())
1138 if self._inline():
1143 if self._inline():
1139 dfh = None
1144 dfh = None
1140 else:
1145 else:
1141 transaction.add(self.datafile, end)
1146 transaction.add(self.datafile, end)
1142 dfh = self.opener(self.datafile, "a")
1147 dfh = self.opener(self.datafile, "a")
1143
1148
1144 # loop through our set of deltas
1149 # loop through our set of deltas
1145 chain = None
1150 chain = None
1146 for chunk in revs:
1151 for chunk in revs:
1147 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1152 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1148 link = linkmapper(cs)
1153 link = linkmapper(cs)
1149 if node in self.nodemap:
1154 if node in self.nodemap:
1150 # this can happen if two branches make the same change
1155 # this can happen if two branches make the same change
1151 # if unique:
1156 # if unique:
1152 # raise RevlogError(_("already have %s") % hex(node[:4]))
1157 # raise RevlogError(_("already have %s") % hex(node[:4]))
1153 chain = node
1158 chain = node
1154 continue
1159 continue
1155 delta = chunk[80:]
1160 delta = chunk[80:]
1156
1161
1157 for p in (p1, p2):
1162 for p in (p1, p2):
1158 if not p in self.nodemap:
1163 if not p in self.nodemap:
1159 raise LookupError(_("unknown parent %s") % short(p))
1164 raise LookupError(_("unknown parent %s") % short(p))
1160
1165
1161 if not chain:
1166 if not chain:
1162 # retrieve the parent revision of the delta chain
1167 # retrieve the parent revision of the delta chain
1163 chain = p1
1168 chain = p1
1164 if not chain in self.nodemap:
1169 if not chain in self.nodemap:
1165 raise LookupError(_("unknown base %s") % short(chain[:4]))
1170 raise LookupError(_("unknown base %s") % short(chain[:4]))
1166
1171
1167 # full versions are inserted when the needed deltas become
1172 # full versions are inserted when the needed deltas become
1168 # comparable to the uncompressed text or when the previous
1173 # comparable to the uncompressed text or when the previous
1169 # version is not the one we have a delta against. We use
1174 # version is not the one we have a delta against. We use
1170 # the size of the previous full rev as a proxy for the
1175 # the size of the previous full rev as a proxy for the
1171 # current size.
1176 # current size.
1172
1177
1173 if chain == prev:
1178 if chain == prev:
1174 tempd = compress(delta)
1179 tempd = compress(delta)
1175 cdelta = tempd[0] + tempd[1]
1180 cdelta = tempd[0] + tempd[1]
1176 textlen = mdiff.patchedsize(textlen, delta)
1181 textlen = mdiff.patchedsize(textlen, delta)
1177
1182
1178 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1183 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1179 # flush our writes here so we can read it in revision
1184 # flush our writes here so we can read it in revision
1180 if dfh:
1185 if dfh:
1181 dfh.flush()
1186 dfh.flush()
1182 ifh.flush()
1187 ifh.flush()
1183 text = self.revision(chain)
1188 text = self.revision(chain)
1184 text = self.patches(text, [delta])
1189 text = self.patches(text, [delta])
1185 chk = self._addrevision(text, transaction, link, p1, p2, None,
1190 chk = self._addrevision(text, transaction, link, p1, p2, None,
1186 ifh, dfh)
1191 ifh, dfh)
1187 if not dfh and not self._inline():
1192 if not dfh and not self._inline():
1188 # addrevision switched from inline to conventional
1193 # addrevision switched from inline to conventional
1189 # reopen the index
1194 # reopen the index
1190 dfh = self.opener(self.datafile, "a")
1195 dfh = self.opener(self.datafile, "a")
1191 ifh = self.opener(self.indexfile, "a")
1196 ifh = self.opener(self.indexfile, "a")
1192 if chk != node:
1197 if chk != node:
1193 raise RevlogError(_("consistency error adding group"))
1198 raise RevlogError(_("consistency error adding group"))
1194 textlen = len(text)
1199 textlen = len(text)
1195 else:
1200 else:
1196 if self.version == REVLOGV0:
1201 if self.version == REVLOGV0:
1197 e = (end, len(cdelta), base, link, p1, p2, node)
1202 e = (end, len(cdelta), base, link, p1, p2, node)
1198 else:
1203 else:
1199 e = (offset_type(end, 0), len(cdelta), textlen, base,
1204 e = (offset_type(end, 0), len(cdelta), textlen, base,
1200 link, self.rev(p1), self.rev(p2), node)
1205 link, self.rev(p1), self.rev(p2), node)
1201 self.index.append(e)
1206 self.index.append(e)
1202 self.nodemap[node] = r
1207 self.nodemap[node] = r
1203 if self._inline():
1208 if self._inline():
1204 ifh.write(struct.pack(self.indexformat, *e))
1209 ifh.write(struct.pack(self.indexformat, *e))
1205 ifh.write(cdelta)
1210 ifh.write(cdelta)
1206 self.checkinlinesize(transaction, ifh)
1211 self.checkinlinesize(transaction, ifh)
1207 if not self._inline():
1212 if not self._inline():
1208 dfh = self.opener(self.datafile, "a")
1213 dfh = self.opener(self.datafile, "a")
1209 ifh = self.opener(self.indexfile, "a")
1214 ifh = self.opener(self.indexfile, "a")
1210 else:
1215 else:
1211 dfh.write(cdelta)
1216 dfh.write(cdelta)
1212 ifh.write(struct.pack(self.indexformat, *e))
1217 ifh.write(struct.pack(self.indexformat, *e))
1213
1218
1214 t, r, chain, prev = r, r + 1, node, node
1219 t, r, chain, prev = r, r + 1, node, node
1215 base = self.base(t)
1220 base = self.base(t)
1216 start = self.start(base)
1221 start = self.start(base)
1217 end = self.end(t)
1222 end = self.end(t)
1218
1223
1219 return node
1224 return node
1220
1225
1221 def strip(self, rev, minlink):
1226 def strip(self, rev, minlink):
1222 if self.count() == 0 or rev >= self.count():
1227 if self.count() == 0 or rev >= self.count():
1223 return
1228 return
1224
1229
1225 if isinstance(self.index, lazyindex):
1230 if isinstance(self.index, lazyindex):
1226 self._loadindexmap()
1231 self._loadindexmap()
1227
1232
1228 # When stripping away a revision, we need to make sure it
1233 # When stripping away a revision, we need to make sure it
1229 # does not actually belong to an older changeset.
1234 # does not actually belong to an older changeset.
1230 # The minlink parameter defines the oldest revision
1235 # The minlink parameter defines the oldest revision
1231 # we're allowed to strip away.
1236 # we're allowed to strip away.
1232 while minlink > self.index[rev][-4]:
1237 while minlink > self.index[rev][-4]:
1233 rev += 1
1238 rev += 1
1234 if rev >= self.count():
1239 if rev >= self.count():
1235 return
1240 return
1236
1241
1237 # first truncate the files on disk
1242 # first truncate the files on disk
1238 end = self.start(rev)
1243 end = self.start(rev)
1239 if not self._inline():
1244 if not self._inline():
1240 df = self.opener(self.datafile, "a")
1245 df = self.opener(self.datafile, "a")
1241 df.truncate(end)
1246 df.truncate(end)
1242 end = rev * struct.calcsize(self.indexformat)
1247 end = rev * struct.calcsize(self.indexformat)
1243 else:
1248 else:
1244 end += rev * struct.calcsize(self.indexformat)
1249 end += rev * struct.calcsize(self.indexformat)
1245
1250
1246 indexf = self.opener(self.indexfile, "a")
1251 indexf = self.opener(self.indexfile, "a")
1247 indexf.truncate(end)
1252 indexf.truncate(end)
1248
1253
1249 # then reset internal state in memory to forget those revisions
1254 # then reset internal state in memory to forget those revisions
1250 self.cache = None
1255 self.cache = None
1251 self.chunkcache = None
1256 self.chunkcache = None
1252 for x in xrange(rev, self.count()):
1257 for x in xrange(rev, self.count()):
1253 del self.nodemap[self.node(x)]
1258 del self.nodemap[self.node(x)]
1254
1259
1255 del self.index[rev:]
1260 del self.index[rev:]
1256
1261
1257 def checksize(self):
1262 def checksize(self):
1258 expected = 0
1263 expected = 0
1259 if self.count():
1264 if self.count():
1260 expected = self.end(self.count() - 1)
1265 expected = self.end(self.count() - 1)
1261
1266
1262 try:
1267 try:
1263 f = self.opener(self.datafile)
1268 f = self.opener(self.datafile)
1264 f.seek(0, 2)
1269 f.seek(0, 2)
1265 actual = f.tell()
1270 actual = f.tell()
1266 dd = actual - expected
1271 dd = actual - expected
1267 except IOError, inst:
1272 except IOError, inst:
1268 if inst.errno != errno.ENOENT:
1273 if inst.errno != errno.ENOENT:
1269 raise
1274 raise
1270 dd = 0
1275 dd = 0
1271
1276
1272 try:
1277 try:
1273 f = self.opener(self.indexfile)
1278 f = self.opener(self.indexfile)
1274 f.seek(0, 2)
1279 f.seek(0, 2)
1275 actual = f.tell()
1280 actual = f.tell()
1276 s = struct.calcsize(self.indexformat)
1281 s = struct.calcsize(self.indexformat)
1277 i = actual / s
1282 i = actual / s
1278 di = actual - (i * s)
1283 di = actual - (i * s)
1279 if self._inline():
1284 if self._inline():
1280 databytes = 0
1285 databytes = 0
1281 for r in xrange(self.count()):
1286 for r in xrange(self.count()):
1282 databytes += self.length(r)
1287 databytes += self.length(r)
1283 dd = 0
1288 dd = 0
1284 di = actual - self.count() * s - databytes
1289 di = actual - self.count() * s - databytes
1285 except IOError, inst:
1290 except IOError, inst:
1286 if inst.errno != errno.ENOENT:
1291 if inst.errno != errno.ENOENT:
1287 raise
1292 raise
1288 di = 0
1293 di = 0
1289
1294
1290 return (dd, di)
1295 return (dd, di)
1291
1296
1292
1297
General Comments 0
You need to be logged in to leave comments. Login now