##// END OF EJS Templates
New lazy index code for revlogs....
mason@suse.com -
r2079:ee96ca27 default
parent child Browse files
Show More
@@ -1,1101 +1,1206 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 demandload(globals(), "sha struct zlib")
17 demandload(globals(), "sha struct zlib")
18
18
19 # revlog version strings
19 # revlog version strings
20 REVLOGV0 = 0
20 REVLOGV0 = 0
21 REVLOGNG = 1
21 REVLOGNG = 1
22
22
23 # revlog flags
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOGNGINLINEDATA = (1 << 16)
25
25
26 def flagstr(flag):
26 def flagstr(flag):
27 if flag == "inline":
27 if flag == "inline":
28 return REVLOGNGINLINEDATA
28 return REVLOGNGINLINEDATA
29 raise RevlogError(_("unknown revlog flag %s" % flag))
29 raise RevlogError(_("unknown revlog flag %s" % flag))
30
30
31 def hash(text, p1, p2):
31 def hash(text, p1, p2):
32 """generate a hash from the given text and its parent hashes
32 """generate a hash from the given text and its parent hashes
33
33
34 This hash combines both the current file contents and its history
34 This hash combines both the current file contents and its history
35 in a manner that makes it easy to distinguish nodes with the same
35 in a manner that makes it easy to distinguish nodes with the same
36 content in the revision graph.
36 content in the revision graph.
37 """
37 """
38 l = [p1, p2]
38 l = [p1, p2]
39 l.sort()
39 l.sort()
40 s = sha.new(l[0])
40 s = sha.new(l[0])
41 s.update(l[1])
41 s.update(l[1])
42 s.update(text)
42 s.update(text)
43 return s.digest()
43 return s.digest()
44
44
45 def compress(text):
45 def compress(text):
46 """ generate a possibly-compressed representation of text """
46 """ generate a possibly-compressed representation of text """
47 if not text: return ("", text)
47 if not text: return ("", text)
48 if len(text) < 44:
48 if len(text) < 44:
49 if text[0] == '\0': return ("", text)
49 if text[0] == '\0': return ("", text)
50 return ('u', text)
50 return ('u', text)
51 bin = zlib.compress(text)
51 bin = zlib.compress(text)
52 if len(bin) > len(text):
52 if len(bin) > len(text):
53 if text[0] == '\0': return ("", text)
53 if text[0] == '\0': return ("", text)
54 return ('u', text)
54 return ('u', text)
55 return ("", bin)
55 return ("", bin)
56
56
57 def decompress(bin):
57 def decompress(bin):
58 """ decompress the given input """
58 """ decompress the given input """
59 if not bin: return bin
59 if not bin: return bin
60 t = bin[0]
60 t = bin[0]
61 if t == '\0': return bin
61 if t == '\0': return bin
62 if t == 'x': return zlib.decompress(bin)
62 if t == 'x': return zlib.decompress(bin)
63 if t == 'u': return bin[1:]
63 if t == 'u': return bin[1:]
64 raise RevlogError(_("unknown compression type %r") % t)
64 raise RevlogError(_("unknown compression type %r") % t)
65
65
66 indexformatv0 = ">4l20s20s20s"
66 indexformatv0 = ">4l20s20s20s"
67 v0shaoffset = 56
67 # index ng:
68 # index ng:
68 # 6 bytes offset
69 # 6 bytes offset
69 # 2 bytes flags
70 # 2 bytes flags
70 # 4 bytes compressed length
71 # 4 bytes compressed length
71 # 4 bytes uncompressed length
72 # 4 bytes uncompressed length
72 # 4 bytes: base rev
73 # 4 bytes: base rev
73 # 4 bytes link rev
74 # 4 bytes link rev
74 # 4 bytes parent 1 rev
75 # 4 bytes parent 1 rev
75 # 4 bytes parent 2 rev
76 # 4 bytes parent 2 rev
76 # 32 bytes: nodeid
77 # 32 bytes: nodeid
77 indexformatng = ">Qiiiiii20s12x"
78 indexformatng = ">Qiiiiii20s12x"
79 ngshaoffset = 32
78 versionformat = ">i"
80 versionformat = ">i"
79
81
80 class lazyparser(object):
82 class lazyparser(object):
81 """
83 """
82 this class avoids the need to parse the entirety of large indices
84 this class avoids the need to parse the entirety of large indices
83
84 By default we parse and load 1000 entries at a time.
85
86 If no position is specified, we load the whole index, and replace
87 the lazy objects in revlog with the underlying objects for
88 efficiency in cases where we look at most of the nodes.
89 """
85 """
90 def __init__(self, data, revlog, indexformat):
86 def __init__(self, dataf, size, indexformat, shaoffset):
91 self.data = data
87 self.dataf = dataf
88 self.format = indexformat
92 self.s = struct.calcsize(indexformat)
89 self.s = struct.calcsize(indexformat)
93 self.indexformat = indexformat
90 self.indexformat = indexformat
94 self.l = len(data)/self.s
91 self.datasize = size
92 self.l = size/self.s
95 self.index = [None] * self.l
93 self.index = [None] * self.l
96 self.map = {nullid: -1}
94 self.map = {nullid: -1}
95 self.allmap = 0
97 self.all = 0
96 self.all = 0
98 self.revlog = revlog
97 self.mapfind_count = 0
98 self.shaoffset = shaoffset
99
99
100 def load(self, pos=None):
100 def loadmap(self):
101 """
102 during a commit, we need to make sure the rev being added is
103 not a duplicate. This requires loading the entire index,
104 which is fairly slow. loadmap can load up just the node map,
105 which takes much less time.
106 """
107 if self.allmap: return
108 start = 0
109 end = self.datasize
110 self.allmap = 1
111 cur = 0
112 count = 0
113 blocksize = self.s * 256
114 self.dataf.seek(0)
115 while cur < end:
116 data = self.dataf.read(blocksize)
117 off = 0
118 for x in xrange(256):
119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 self.map[n] = count
121 count += 1
122 if count >= self.l:
123 break
124 off += self.s
125 cur += blocksize
126
127 def loadblock(self, blockstart, blocksize, data=None):
101 if self.all: return
128 if self.all: return
102 if pos is not None:
129 if data is None:
103 block = pos / 1000
130 self.dataf.seek(blockstart)
104 i = block * 1000
131 data = self.dataf.read(blocksize)
105 end = min(self.l, i + 1000)
132 lend = len(data) / self.s
106 else:
133 i = blockstart / self.s
134 off = 0
135 for x in xrange(lend):
136 if self.index[i + x] == None:
137 b = data[off : off + self.s]
138 e = struct.unpack(self.format, b)
139 self.index[i + x] = e
140 self.map[e[-1]] = i + x
141 off += self.s
142
143 def findnode(self, node):
144 """search backwards through the index file for a specific node"""
145 if self.allmap: return None
146
147 # hg log will cause many many searches for the manifest
148 # nodes. After we get called a few times, just load the whole
149 # thing.
150 if self.mapfind_count > 8:
151 self.loadmap()
152 if node in self.map:
153 return node
154 return None
155 self.mapfind_count += 1
156 last = self.l - 1
157 while self.index[last] != None:
158 if last == 0:
107 self.all = 1
159 self.all = 1
108 i = 0
160 self.allmap = 1
109 end = self.l
161 return None
110 self.revlog.index = self.index
162 last -= 1
111 self.revlog.nodemap = self.map
163 end = (last + 1) * self.s
164 blocksize = self.s * 256
165 while end >= 0:
166 start = max(end - blocksize, 0)
167 self.dataf.seek(start)
168 data = self.dataf.read(end - start)
169 findend = end - start
170 while True:
171 # we're searching backwards, so weh have to make sure
172 # we don't find a changeset where this node is a parent
173 off = data.rfind(node, 0, findend)
174 findend = off
175 if off >= 0:
176 i = off / self.s
177 off = i * self.s
178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 if n == node:
180 self.map[n] = i + start / self.s
181 return node
182 else:
183 break
184 end -= blocksize
185 return None
112
186
113 while i < end:
187 def loadindex(self, i=None, end=None):
114 if not self.index[i]:
188 if self.all: return
115 d = self.data[i * self.s: (i + 1) * self.s]
189 all = False
116 e = struct.unpack(self.indexformat, d)
190 if i == None:
117 self.index[i] = e
191 blockstart = 0
118 self.map[e[-1]] = i
192 blocksize = (512 / self.s) * self.s
119 i += 1
193 end = self.datasize
194 all = True
195 else:
196 if end:
197 blockstart = i * self.s
198 end = end * self.s
199 blocksize = end - blockstart
200 else:
201 blockstart = (i & ~(32)) * self.s
202 blocksize = self.s * 64
203 end = blockstart + blocksize
204 while blockstart < end:
205 self.loadblock(blockstart, blocksize)
206 blockstart += blocksize
207 if all: self.all = True
120
208
121 class lazyindex(object):
209 class lazyindex(object):
122 """a lazy version of the index array"""
210 """a lazy version of the index array"""
123 def __init__(self, parser):
211 def __init__(self, parser):
124 self.p = parser
212 self.p = parser
125 def __len__(self):
213 def __len__(self):
126 return len(self.p.index)
214 return len(self.p.index)
127 def load(self, pos):
215 def load(self, pos):
128 if pos < 0:
216 if pos < 0:
129 pos += len(self.p.index)
217 pos += len(self.p.index)
130 self.p.load(pos)
218 self.p.loadindex(pos)
131 return self.p.index[pos]
219 return self.p.index[pos]
132 def __getitem__(self, pos):
220 def __getitem__(self, pos):
133 return self.p.index[pos] or self.load(pos)
221 return self.p.index[pos] or self.load(pos)
134 def __setitem__(self, pos, item):
222 def __setitem__(self, pos, item):
135 self.p.index[pos] = item
223 self.p.index[pos] = item
136 def __delitem__(self, pos):
224 def __delitem__(self, pos):
137 del self.p.index[pos]
225 del self.p.index[pos]
138 def append(self, e):
226 def append(self, e):
139 self.p.index.append(e)
227 self.p.index.append(e)
140
228
141 class lazymap(object):
229 class lazymap(object):
142 """a lazy version of the node map"""
230 """a lazy version of the node map"""
143 def __init__(self, parser):
231 def __init__(self, parser):
144 self.p = parser
232 self.p = parser
145 def load(self, key):
233 def load(self, key):
146 if self.p.all: return
234 n = self.p.findnode(key)
147 n = self.p.data.find(key)
235 if n == None:
148 if n < 0:
149 raise KeyError(key)
236 raise KeyError(key)
150 pos = n / self.p.s
151 self.p.load(pos)
152 def __contains__(self, key):
237 def __contains__(self, key):
153 self.p.load()
238 if key in self.p.map:
239 return True
240 self.p.loadmap()
154 return key in self.p.map
241 return key in self.p.map
155 def __iter__(self):
242 def __iter__(self):
156 yield nullid
243 yield nullid
157 for i in xrange(self.p.l):
244 for i in xrange(self.p.l):
158 try:
245 try:
159 yield self.p.index[i][-1]
246 yield self.p.index[i][-1]
160 except:
247 except:
161 self.p.load(i)
248 self.p.loadindex(i)
162 yield self.p.index[i][-1]
249 yield self.p.index[i][-1]
163 def __getitem__(self, key):
250 def __getitem__(self, key):
164 try:
251 try:
165 return self.p.map[key]
252 return self.p.map[key]
166 except KeyError:
253 except KeyError:
167 try:
254 try:
168 self.load(key)
255 self.load(key)
169 return self.p.map[key]
256 return self.p.map[key]
170 except KeyError:
257 except KeyError:
171 raise KeyError("node " + hex(key))
258 raise KeyError("node " + hex(key))
172 def __setitem__(self, key, val):
259 def __setitem__(self, key, val):
173 self.p.map[key] = val
260 self.p.map[key] = val
174 def __delitem__(self, key):
261 def __delitem__(self, key):
175 del self.p.map[key]
262 del self.p.map[key]
176
263
177 class RevlogError(Exception): pass
264 class RevlogError(Exception): pass
178
265
179 class revlog(object):
266 class revlog(object):
180 """
267 """
181 the underlying revision storage object
268 the underlying revision storage object
182
269
183 A revlog consists of two parts, an index and the revision data.
270 A revlog consists of two parts, an index and the revision data.
184
271
185 The index is a file with a fixed record size containing
272 The index is a file with a fixed record size containing
186 information on each revision, includings its nodeid (hash), the
273 information on each revision, includings its nodeid (hash), the
187 nodeids of its parents, the position and offset of its data within
274 nodeids of its parents, the position and offset of its data within
188 the data file, and the revision it's based on. Finally, each entry
275 the data file, and the revision it's based on. Finally, each entry
189 contains a linkrev entry that can serve as a pointer to external
276 contains a linkrev entry that can serve as a pointer to external
190 data.
277 data.
191
278
192 The revision data itself is a linear collection of data chunks.
279 The revision data itself is a linear collection of data chunks.
193 Each chunk represents a revision and is usually represented as a
280 Each chunk represents a revision and is usually represented as a
194 delta against the previous chunk. To bound lookup time, runs of
281 delta against the previous chunk. To bound lookup time, runs of
195 deltas are limited to about 2 times the length of the original
282 deltas are limited to about 2 times the length of the original
196 version data. This makes retrieval of a version proportional to
283 version data. This makes retrieval of a version proportional to
197 its size, or O(1) relative to the number of revisions.
284 its size, or O(1) relative to the number of revisions.
198
285
199 Both pieces of the revlog are written to in an append-only
286 Both pieces of the revlog are written to in an append-only
200 fashion, which means we never need to rewrite a file to insert or
287 fashion, which means we never need to rewrite a file to insert or
201 remove data, and can use some simple techniques to avoid the need
288 remove data, and can use some simple techniques to avoid the need
202 for locking while reading.
289 for locking while reading.
203 """
290 """
204 def __init__(self, opener, indexfile, datafile, defversion=0):
291 def __init__(self, opener, indexfile, datafile, defversion=0):
205 """
292 """
206 create a revlog object
293 create a revlog object
207
294
208 opener is a function that abstracts the file opening operation
295 opener is a function that abstracts the file opening operation
209 and can be used to implement COW semantics or the like.
296 and can be used to implement COW semantics or the like.
210 """
297 """
211 self.indexfile = indexfile
298 self.indexfile = indexfile
212 self.datafile = datafile
299 self.datafile = datafile
213 self.opener = opener
300 self.opener = opener
214
301
215 self.indexstat = None
302 self.indexstat = None
216 self.cache = None
303 self.cache = None
217 self.chunkcache = None
304 self.chunkcache = None
218 self.defversion = defversion
305 self.defversion = defversion
219 self.load()
306 self.load()
220
307
221 def load(self):
308 def load(self):
222 v = self.defversion
309 v = self.defversion
223 try:
310 try:
224 f = self.opener(self.indexfile)
311 f = self.opener(self.indexfile)
225 i = f.read()
312 i = f.read(4)
313 f.seek(0)
226 except IOError, inst:
314 except IOError, inst:
227 if inst.errno != errno.ENOENT:
315 if inst.errno != errno.ENOENT:
228 raise
316 raise
229 i = ""
317 i = ""
230 else:
318 else:
231 try:
319 try:
232 st = os.fstat(f.fileno())
320 st = os.fstat(f.fileno())
233 except AttributeError, inst:
321 except AttributeError, inst:
234 st = None
322 st = None
235 else:
323 else:
236 oldst = self.indexstat
324 oldst = self.indexstat
237 if (oldst and st.st_dev == oldst.st_dev
325 if (oldst and st.st_dev == oldst.st_dev
238 and st.st_ino == oldst.st_ino
326 and st.st_ino == oldst.st_ino
239 and st.st_mtime == oldst.st_mtime
327 and st.st_mtime == oldst.st_mtime
240 and st.st_ctime == oldst.st_ctime):
328 and st.st_ctime == oldst.st_ctime):
241 return
329 return
242 self.indexstat = st
330 self.indexstat = st
243 if len(i) > 0:
331 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
332 v = struct.unpack(versionformat, i)[0]
245 flags = v & ~0xFFFF
333 flags = v & ~0xFFFF
246 fmt = v & 0xFFFF
334 fmt = v & 0xFFFF
247 if fmt == 0:
335 if fmt == 0:
248 if flags:
336 if flags:
249 raise RevlogError(_("index %s invalid flags %x for format v0" %
337 raise RevlogError(_("index %s invalid flags %x for format v0" %
250 (self.indexfile, flags)))
338 (self.indexfile, flags)))
251 elif fmt == REVLOGNG:
339 elif fmt == REVLOGNG:
252 if flags & ~REVLOGNGINLINEDATA:
340 if flags & ~REVLOGNGINLINEDATA:
253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
341 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 (self.indexfile, flags)))
342 (self.indexfile, flags)))
255 else:
343 else:
256 raise RevlogError(_("index %s invalid format %d" %
344 raise RevlogError(_("index %s invalid format %d" %
257 (self.indexfile, fmt)))
345 (self.indexfile, fmt)))
258 self.version = v
346 self.version = v
259 if v == 0:
347 if v == 0:
260 self.indexformat = indexformatv0
348 self.indexformat = indexformatv0
349 shaoffset = v0shaoffset
261 else:
350 else:
262 self.indexformat = indexformatng
351 self.indexformat = indexformatng
352 shaoffset = ngshaoffset
263
353
264 if i:
354 if i:
265 if not self.inlinedata() and st and st.st_size > 10000:
355 if not self.inlinedata() and st and st.st_size > 10000:
266 # big index, let's parse it on demand
356 # big index, let's parse it on demand
267 parser = lazyparser(i, self, self.indexformat)
357 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
268 self.index = lazyindex(parser)
358 self.index = lazyindex(parser)
269 self.nodemap = lazymap(parser)
359 self.nodemap = lazymap(parser)
270 else:
360 else:
361 i = f.read()
271 self.parseindex(i)
362 self.parseindex(i)
272 if self.inlinedata():
363 if self.inlinedata():
273 # we've already got the entire data file read in, save it
364 # we've already got the entire data file read in, save it
274 # in the chunk data
365 # in the chunk data
275 self.chunkcache = (0, i)
366 self.chunkcache = (0, i)
276 if self.version != 0:
367 if self.version != 0:
277 e = list(self.index[0])
368 e = list(self.index[0])
278 type = self.ngtype(e[0])
369 type = self.ngtype(e[0])
279 e[0] = self.offset_type(0, type)
370 e[0] = self.offset_type(0, type)
280 self.index[0] = e
371 self.index[0] = e
281 else:
372 else:
282 self.nodemap = { nullid: -1}
373 self.nodemap = { nullid: -1}
283 self.index = []
374 self.index = []
284
375
285
376
286 def parseindex(self, data):
377 def parseindex(self, data):
287 s = struct.calcsize(self.indexformat)
378 s = struct.calcsize(self.indexformat)
288 l = len(data)
379 l = len(data)
289 self.index = []
380 self.index = []
290 self.nodemap = {nullid: -1}
381 self.nodemap = {nullid: -1}
291 inline = self.inlinedata()
382 inline = self.inlinedata()
292 off = 0
383 off = 0
293 n = 0
384 n = 0
294 while off < l:
385 while off < l:
295 e = struct.unpack(self.indexformat, data[off:off + s])
386 e = struct.unpack(self.indexformat, data[off:off + s])
296 self.index.append(e)
387 self.index.append(e)
297 self.nodemap[e[-1]] = n
388 self.nodemap[e[-1]] = n
298 n += 1
389 n += 1
299 off += s
390 off += s
300 if inline:
391 if inline:
301 off += e[1]
392 off += e[1]
302
393
303 def ngoffset(self, q):
394 def ngoffset(self, q):
304 if q & 0xFFFF:
395 if q & 0xFFFF:
305 raise RevlogError(_('%s: incompatible revision flag %x') %
396 raise RevlogError(_('%s: incompatible revision flag %x') %
306 (self.indexfile, type))
397 (self.indexfile, type))
307 return long(q >> 16)
398 return long(q >> 16)
308
399
309 def ngtype(self, q):
400 def ngtype(self, q):
310 return int(q & 0xFFFF)
401 return int(q & 0xFFFF)
311
402
312 def offset_type(self, offset, type):
403 def offset_type(self, offset, type):
313 return long(long(offset) << 16 | type)
404 return long(long(offset) << 16 | type)
314
405
406 def loadindex(self, start, end):
407 """load a block of indexes all at once from the lazy parser"""
408 if isinstance(self.index, lazyindex):
409 self.index.p.loadindex(start, end)
410
315 def loadindexmap(self):
411 def loadindexmap(self):
316 """loads both the map and the index from the lazy parser"""
412 """loads both the map and the index from the lazy parser"""
317 if isinstance(self.index, lazyindex):
413 if isinstance(self.index, lazyindex):
318 p = self.index.p
414 p = self.index.p
319 p.load()
415 p.loadindex()
416 self.nodemap = p.map
417
418 def loadmap(self):
419 """loads the map from the lazy parser"""
420 if isinstance(self.nodemap, lazymap):
421 self.nodemap.p.loadmap()
422 self.nodemap = self.nodemap.p.map
320
423
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
424 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 def tip(self): return self.node(len(self.index) - 1)
425 def tip(self): return self.node(len(self.index) - 1)
323 def count(self): return len(self.index)
426 def count(self): return len(self.index)
324 def node(self, rev):
427 def node(self, rev):
325 return (rev < 0) and nullid or self.index[rev][-1]
428 return (rev < 0) and nullid or self.index[rev][-1]
326 def rev(self, node):
429 def rev(self, node):
327 try:
430 try:
328 return self.nodemap[node]
431 return self.nodemap[node]
329 except KeyError:
432 except KeyError:
330 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
433 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
331 def linkrev(self, node): return self.index[self.rev(node)][-4]
434 def linkrev(self, node): return self.index[self.rev(node)][-4]
332 def parents(self, node):
435 def parents(self, node):
333 if node == nullid: return (nullid, nullid)
436 if node == nullid: return (nullid, nullid)
334 r = self.rev(node)
437 r = self.rev(node)
335 d = self.index[r][-3:-1]
438 d = self.index[r][-3:-1]
336 if self.version == 0:
439 if self.version == 0:
337 return d
440 return d
338 return [ self.node(x) for x in d ]
441 return [ self.node(x) for x in d ]
339 def start(self, rev):
442 def start(self, rev):
340 if rev < 0:
443 if rev < 0:
341 return -1
444 return -1
342 if self.version != 0:
445 if self.version != 0:
343 return self.ngoffset(self.index[rev][0])
446 return self.ngoffset(self.index[rev][0])
344 return self.index[rev][0]
447 return self.index[rev][0]
345
448
346 def end(self, rev): return self.start(rev) + self.length(rev)
449 def end(self, rev): return self.start(rev) + self.length(rev)
347
450
348 def size(self, rev):
451 def size(self, rev):
349 """return the length of the uncompressed text for a given revision"""
452 """return the length of the uncompressed text for a given revision"""
350 l = -1
453 l = -1
351 if self.version != 0:
454 if self.version != 0:
352 l = self.index[rev][2]
455 l = self.index[rev][2]
353 if l >= 0:
456 if l >= 0:
354 return l
457 return l
355
458
356 t = self.revision(self.node(rev))
459 t = self.revision(self.node(rev))
357 return len(t)
460 return len(t)
358
461
359 # alternate implementation, The advantage to this code is it
462 # alternate implementation, The advantage to this code is it
360 # will be faster for a single revision. But, the results are not
463 # will be faster for a single revision. But, the results are not
361 # cached, so finding the size of every revision will be slower.
464 # cached, so finding the size of every revision will be slower.
362 """
465 """
363 if self.cache and self.cache[1] == rev:
466 if self.cache and self.cache[1] == rev:
364 return len(self.cache[2])
467 return len(self.cache[2])
365
468
366 base = self.base(rev)
469 base = self.base(rev)
367 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
470 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
368 base = self.cache[1]
471 base = self.cache[1]
369 text = self.cache[2]
472 text = self.cache[2]
370 else:
473 else:
371 text = self.revision(self.node(base))
474 text = self.revision(self.node(base))
372
475
373 l = len(text)
476 l = len(text)
374 for x in xrange(base + 1, rev + 1):
477 for x in xrange(base + 1, rev + 1):
375 l = mdiff.patchedsize(l, self.chunk(x))
478 l = mdiff.patchedsize(l, self.chunk(x))
376 return l
479 return l
377 """
480 """
378
481
379 def length(self, rev):
482 def length(self, rev):
380 if rev < 0:
483 if rev < 0:
381 return 0
484 return 0
382 else:
485 else:
383 return self.index[rev][1]
486 return self.index[rev][1]
384 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
487 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
385
488
386 def reachable(self, rev, stop=None):
489 def reachable(self, rev, stop=None):
387 reachable = {}
490 reachable = {}
388 visit = [rev]
491 visit = [rev]
389 reachable[rev] = 1
492 reachable[rev] = 1
390 if stop:
493 if stop:
391 stopn = self.rev(stop)
494 stopn = self.rev(stop)
392 else:
495 else:
393 stopn = 0
496 stopn = 0
394 while visit:
497 while visit:
395 n = visit.pop(0)
498 n = visit.pop(0)
396 if n == stop:
499 if n == stop:
397 continue
500 continue
398 if n == nullid:
501 if n == nullid:
399 continue
502 continue
400 for p in self.parents(n):
503 for p in self.parents(n):
401 if self.rev(p) < stopn:
504 if self.rev(p) < stopn:
402 continue
505 continue
403 if p not in reachable:
506 if p not in reachable:
404 reachable[p] = 1
507 reachable[p] = 1
405 visit.append(p)
508 visit.append(p)
406 return reachable
509 return reachable
407
510
408 def nodesbetween(self, roots=None, heads=None):
511 def nodesbetween(self, roots=None, heads=None):
409 """Return a tuple containing three elements. Elements 1 and 2 contain
512 """Return a tuple containing three elements. Elements 1 and 2 contain
410 a final list bases and heads after all the unreachable ones have been
513 a final list bases and heads after all the unreachable ones have been
411 pruned. Element 0 contains a topologically sorted list of all
514 pruned. Element 0 contains a topologically sorted list of all
412
515
413 nodes that satisfy these constraints:
516 nodes that satisfy these constraints:
414 1. All nodes must be descended from a node in roots (the nodes on
517 1. All nodes must be descended from a node in roots (the nodes on
415 roots are considered descended from themselves).
518 roots are considered descended from themselves).
416 2. All nodes must also be ancestors of a node in heads (the nodes in
519 2. All nodes must also be ancestors of a node in heads (the nodes in
417 heads are considered to be their own ancestors).
520 heads are considered to be their own ancestors).
418
521
419 If roots is unspecified, nullid is assumed as the only root.
522 If roots is unspecified, nullid is assumed as the only root.
420 If heads is unspecified, it is taken to be the output of the
523 If heads is unspecified, it is taken to be the output of the
421 heads method (i.e. a list of all nodes in the repository that
524 heads method (i.e. a list of all nodes in the repository that
422 have no children)."""
525 have no children)."""
423 nonodes = ([], [], [])
526 nonodes = ([], [], [])
424 if roots is not None:
527 if roots is not None:
425 roots = list(roots)
528 roots = list(roots)
426 if not roots:
529 if not roots:
427 return nonodes
530 return nonodes
428 lowestrev = min([self.rev(n) for n in roots])
531 lowestrev = min([self.rev(n) for n in roots])
429 else:
532 else:
430 roots = [nullid] # Everybody's a descendent of nullid
533 roots = [nullid] # Everybody's a descendent of nullid
431 lowestrev = -1
534 lowestrev = -1
432 if (lowestrev == -1) and (heads is None):
535 if (lowestrev == -1) and (heads is None):
433 # We want _all_ the nodes!
536 # We want _all_ the nodes!
434 return ([self.node(r) for r in xrange(0, self.count())],
537 return ([self.node(r) for r in xrange(0, self.count())],
435 [nullid], list(self.heads()))
538 [nullid], list(self.heads()))
436 if heads is None:
539 if heads is None:
437 # All nodes are ancestors, so the latest ancestor is the last
540 # All nodes are ancestors, so the latest ancestor is the last
438 # node.
541 # node.
439 highestrev = self.count() - 1
542 highestrev = self.count() - 1
440 # Set ancestors to None to signal that every node is an ancestor.
543 # Set ancestors to None to signal that every node is an ancestor.
441 ancestors = None
544 ancestors = None
442 # Set heads to an empty dictionary for later discovery of heads
545 # Set heads to an empty dictionary for later discovery of heads
443 heads = {}
546 heads = {}
444 else:
547 else:
445 heads = list(heads)
548 heads = list(heads)
446 if not heads:
549 if not heads:
447 return nonodes
550 return nonodes
448 ancestors = {}
551 ancestors = {}
449 # Start at the top and keep marking parents until we're done.
552 # Start at the top and keep marking parents until we're done.
450 nodestotag = heads[:]
553 nodestotag = heads[:]
451 # Turn heads into a dictionary so we can remove 'fake' heads.
554 # Turn heads into a dictionary so we can remove 'fake' heads.
452 # Also, later we will be using it to filter out the heads we can't
555 # Also, later we will be using it to filter out the heads we can't
453 # find from roots.
556 # find from roots.
454 heads = dict.fromkeys(heads, 0)
557 heads = dict.fromkeys(heads, 0)
455 # Remember where the top was so we can use it as a limit later.
558 # Remember where the top was so we can use it as a limit later.
456 highestrev = max([self.rev(n) for n in nodestotag])
559 highestrev = max([self.rev(n) for n in nodestotag])
457 while nodestotag:
560 while nodestotag:
458 # grab a node to tag
561 # grab a node to tag
459 n = nodestotag.pop()
562 n = nodestotag.pop()
460 # Never tag nullid
563 # Never tag nullid
461 if n == nullid:
564 if n == nullid:
462 continue
565 continue
463 # A node's revision number represents its place in a
566 # A node's revision number represents its place in a
464 # topologically sorted list of nodes.
567 # topologically sorted list of nodes.
465 r = self.rev(n)
568 r = self.rev(n)
466 if r >= lowestrev:
569 if r >= lowestrev:
467 if n not in ancestors:
570 if n not in ancestors:
468 # If we are possibly a descendent of one of the roots
571 # If we are possibly a descendent of one of the roots
469 # and we haven't already been marked as an ancestor
572 # and we haven't already been marked as an ancestor
470 ancestors[n] = 1 # Mark as ancestor
573 ancestors[n] = 1 # Mark as ancestor
471 # Add non-nullid parents to list of nodes to tag.
574 # Add non-nullid parents to list of nodes to tag.
472 nodestotag.extend([p for p in self.parents(n) if
575 nodestotag.extend([p for p in self.parents(n) if
473 p != nullid])
576 p != nullid])
474 elif n in heads: # We've seen it before, is it a fake head?
577 elif n in heads: # We've seen it before, is it a fake head?
475 # So it is, real heads should not be the ancestors of
578 # So it is, real heads should not be the ancestors of
476 # any other heads.
579 # any other heads.
477 heads.pop(n)
580 heads.pop(n)
478 if not ancestors:
581 if not ancestors:
479 return nonodes
582 return nonodes
480 # Now that we have our set of ancestors, we want to remove any
583 # Now that we have our set of ancestors, we want to remove any
481 # roots that are not ancestors.
584 # roots that are not ancestors.
482
585
483 # If one of the roots was nullid, everything is included anyway.
586 # If one of the roots was nullid, everything is included anyway.
484 if lowestrev > -1:
587 if lowestrev > -1:
485 # But, since we weren't, let's recompute the lowest rev to not
588 # But, since we weren't, let's recompute the lowest rev to not
486 # include roots that aren't ancestors.
589 # include roots that aren't ancestors.
487
590
488 # Filter out roots that aren't ancestors of heads
591 # Filter out roots that aren't ancestors of heads
489 roots = [n for n in roots if n in ancestors]
592 roots = [n for n in roots if n in ancestors]
490 # Recompute the lowest revision
593 # Recompute the lowest revision
491 if roots:
594 if roots:
492 lowestrev = min([self.rev(n) for n in roots])
595 lowestrev = min([self.rev(n) for n in roots])
493 else:
596 else:
494 # No more roots? Return empty list
597 # No more roots? Return empty list
495 return nonodes
598 return nonodes
496 else:
599 else:
497 # We are descending from nullid, and don't need to care about
600 # We are descending from nullid, and don't need to care about
498 # any other roots.
601 # any other roots.
499 lowestrev = -1
602 lowestrev = -1
500 roots = [nullid]
603 roots = [nullid]
501 # Transform our roots list into a 'set' (i.e. a dictionary where the
604 # Transform our roots list into a 'set' (i.e. a dictionary where the
502 # values don't matter.
605 # values don't matter.
503 descendents = dict.fromkeys(roots, 1)
606 descendents = dict.fromkeys(roots, 1)
504 # Also, keep the original roots so we can filter out roots that aren't
607 # Also, keep the original roots so we can filter out roots that aren't
505 # 'real' roots (i.e. are descended from other roots).
608 # 'real' roots (i.e. are descended from other roots).
506 roots = descendents.copy()
609 roots = descendents.copy()
507 # Our topologically sorted list of output nodes.
610 # Our topologically sorted list of output nodes.
508 orderedout = []
611 orderedout = []
509 # Don't start at nullid since we don't want nullid in our output list,
612 # Don't start at nullid since we don't want nullid in our output list,
510 # and if nullid shows up in descedents, empty parents will look like
613 # and if nullid shows up in descedents, empty parents will look like
511 # they're descendents.
614 # they're descendents.
512 for r in xrange(max(lowestrev, 0), highestrev + 1):
615 for r in xrange(max(lowestrev, 0), highestrev + 1):
513 n = self.node(r)
616 n = self.node(r)
514 isdescendent = False
617 isdescendent = False
515 if lowestrev == -1: # Everybody is a descendent of nullid
618 if lowestrev == -1: # Everybody is a descendent of nullid
516 isdescendent = True
619 isdescendent = True
517 elif n in descendents:
620 elif n in descendents:
518 # n is already a descendent
621 # n is already a descendent
519 isdescendent = True
622 isdescendent = True
520 # This check only needs to be done here because all the roots
623 # This check only needs to be done here because all the roots
521 # will start being marked is descendents before the loop.
624 # will start being marked is descendents before the loop.
522 if n in roots:
625 if n in roots:
523 # If n was a root, check if it's a 'real' root.
626 # If n was a root, check if it's a 'real' root.
524 p = tuple(self.parents(n))
627 p = tuple(self.parents(n))
525 # If any of its parents are descendents, it's not a root.
628 # If any of its parents are descendents, it's not a root.
526 if (p[0] in descendents) or (p[1] in descendents):
629 if (p[0] in descendents) or (p[1] in descendents):
527 roots.pop(n)
630 roots.pop(n)
528 else:
631 else:
529 p = tuple(self.parents(n))
632 p = tuple(self.parents(n))
530 # A node is a descendent if either of its parents are
633 # A node is a descendent if either of its parents are
531 # descendents. (We seeded the dependents list with the roots
634 # descendents. (We seeded the dependents list with the roots
532 # up there, remember?)
635 # up there, remember?)
533 if (p[0] in descendents) or (p[1] in descendents):
636 if (p[0] in descendents) or (p[1] in descendents):
534 descendents[n] = 1
637 descendents[n] = 1
535 isdescendent = True
638 isdescendent = True
536 if isdescendent and ((ancestors is None) or (n in ancestors)):
639 if isdescendent and ((ancestors is None) or (n in ancestors)):
537 # Only include nodes that are both descendents and ancestors.
640 # Only include nodes that are both descendents and ancestors.
538 orderedout.append(n)
641 orderedout.append(n)
539 if (ancestors is not None) and (n in heads):
642 if (ancestors is not None) and (n in heads):
540 # We're trying to figure out which heads are reachable
643 # We're trying to figure out which heads are reachable
541 # from roots.
644 # from roots.
542 # Mark this head as having been reached
645 # Mark this head as having been reached
543 heads[n] = 1
646 heads[n] = 1
544 elif ancestors is None:
647 elif ancestors is None:
545 # Otherwise, we're trying to discover the heads.
648 # Otherwise, we're trying to discover the heads.
546 # Assume this is a head because if it isn't, the next step
649 # Assume this is a head because if it isn't, the next step
547 # will eventually remove it.
650 # will eventually remove it.
548 heads[n] = 1
651 heads[n] = 1
549 # But, obviously its parents aren't.
652 # But, obviously its parents aren't.
550 for p in self.parents(n):
653 for p in self.parents(n):
551 heads.pop(p, None)
654 heads.pop(p, None)
552 heads = [n for n in heads.iterkeys() if heads[n] != 0]
655 heads = [n for n in heads.iterkeys() if heads[n] != 0]
553 roots = roots.keys()
656 roots = roots.keys()
554 assert orderedout
657 assert orderedout
555 assert roots
658 assert roots
556 assert heads
659 assert heads
557 return (orderedout, roots, heads)
660 return (orderedout, roots, heads)
558
661
559 def heads(self, start=None):
662 def heads(self, start=None):
560 """return the list of all nodes that have no children
663 """return the list of all nodes that have no children
561
664
562 if start is specified, only heads that are descendants of
665 if start is specified, only heads that are descendants of
563 start will be returned
666 start will be returned
564
667
565 """
668 """
566 if start is None:
669 if start is None:
567 start = nullid
670 start = nullid
568 reachable = {start: 1}
671 reachable = {start: 1}
569 heads = {start: 1}
672 heads = {start: 1}
570 startrev = self.rev(start)
673 startrev = self.rev(start)
571
674
572 for r in xrange(startrev + 1, self.count()):
675 for r in xrange(startrev + 1, self.count()):
573 n = self.node(r)
676 n = self.node(r)
574 for pn in self.parents(n):
677 for pn in self.parents(n):
575 if pn in reachable:
678 if pn in reachable:
576 reachable[n] = 1
679 reachable[n] = 1
577 heads[n] = 1
680 heads[n] = 1
578 if pn in heads:
681 if pn in heads:
579 del heads[pn]
682 del heads[pn]
580 return heads.keys()
683 return heads.keys()
581
684
582 def children(self, node):
685 def children(self, node):
583 """find the children of a given node"""
686 """find the children of a given node"""
584 c = []
687 c = []
585 p = self.rev(node)
688 p = self.rev(node)
586 for r in range(p + 1, self.count()):
689 for r in range(p + 1, self.count()):
587 n = self.node(r)
690 n = self.node(r)
588 for pn in self.parents(n):
691 for pn in self.parents(n):
589 if pn == node:
692 if pn == node:
590 c.append(n)
693 c.append(n)
591 continue
694 continue
592 elif pn == nullid:
695 elif pn == nullid:
593 continue
696 continue
594 return c
697 return c
595
698
596 def lookup(self, id):
699 def lookup(self, id):
597 """locate a node based on revision number or subset of hex nodeid"""
700 """locate a node based on revision number or subset of hex nodeid"""
598 try:
701 try:
599 rev = int(id)
702 rev = int(id)
600 if str(rev) != id: raise ValueError
703 if str(rev) != id: raise ValueError
601 if rev < 0: rev = self.count() + rev
704 if rev < 0: rev = self.count() + rev
602 if rev < 0 or rev >= self.count(): raise ValueError
705 if rev < 0 or rev >= self.count(): raise ValueError
603 return self.node(rev)
706 return self.node(rev)
604 except (ValueError, OverflowError):
707 except (ValueError, OverflowError):
605 c = []
708 c = []
606 for n in self.nodemap:
709 for n in self.nodemap:
607 if hex(n).startswith(id):
710 if hex(n).startswith(id):
608 c.append(n)
711 c.append(n)
609 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
712 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
610 if len(c) < 1: raise RevlogError(_("No match found"))
713 if len(c) < 1: raise RevlogError(_("No match found"))
611 return c[0]
714 return c[0]
612
715
613 return None
716 return None
614
717
615 def diff(self, a, b):
718 def diff(self, a, b):
616 """return a delta between two revisions"""
719 """return a delta between two revisions"""
617 return mdiff.textdiff(a, b)
720 return mdiff.textdiff(a, b)
618
721
619 def patches(self, t, pl):
722 def patches(self, t, pl):
620 """apply a list of patches to a string"""
723 """apply a list of patches to a string"""
621 return mdiff.patches(t, pl)
724 return mdiff.patches(t, pl)
622
725
623 def chunk(self, rev, df=None, cachelen=4096):
726 def chunk(self, rev, df=None, cachelen=4096):
624 start, length = self.start(rev), self.length(rev)
727 start, length = self.start(rev), self.length(rev)
625 inline = self.inlinedata()
728 inline = self.inlinedata()
626 if inline:
729 if inline:
627 start += (rev + 1) * struct.calcsize(self.indexformat)
730 start += (rev + 1) * struct.calcsize(self.indexformat)
628 end = start + length
731 end = start + length
629 def loadcache(df):
732 def loadcache(df):
630 cache_length = max(cachelen, length) # 4k
733 cache_length = max(cachelen, length) # 4k
631 if not df:
734 if not df:
632 if inline:
735 if inline:
633 df = self.opener(self.indexfile)
736 df = self.opener(self.indexfile)
634 else:
737 else:
635 df = self.opener(self.datafile)
738 df = self.opener(self.datafile)
636 df.seek(start)
739 df.seek(start)
637 self.chunkcache = (start, df.read(cache_length))
740 self.chunkcache = (start, df.read(cache_length))
638
741
639 if not self.chunkcache:
742 if not self.chunkcache:
640 loadcache(df)
743 loadcache(df)
641
744
642 cache_start = self.chunkcache[0]
745 cache_start = self.chunkcache[0]
643 cache_end = cache_start + len(self.chunkcache[1])
746 cache_end = cache_start + len(self.chunkcache[1])
644 if start >= cache_start and end <= cache_end:
747 if start >= cache_start and end <= cache_end:
645 # it is cached
748 # it is cached
646 offset = start - cache_start
749 offset = start - cache_start
647 else:
750 else:
648 loadcache(df)
751 loadcache(df)
649 offset = 0
752 offset = 0
650
753
651 #def checkchunk():
754 #def checkchunk():
652 # df = self.opener(self.datafile)
755 # df = self.opener(self.datafile)
653 # df.seek(start)
756 # df.seek(start)
654 # return df.read(length)
757 # return df.read(length)
655 #assert s == checkchunk()
758 #assert s == checkchunk()
656 return decompress(self.chunkcache[1][offset:offset + length])
759 return decompress(self.chunkcache[1][offset:offset + length])
657
760
658 def delta(self, node):
761 def delta(self, node):
659 """return or calculate a delta between a node and its predecessor"""
762 """return or calculate a delta between a node and its predecessor"""
660 r = self.rev(node)
763 r = self.rev(node)
661 return self.revdiff(r - 1, r)
764 return self.revdiff(r - 1, r)
662
765
663 def revdiff(self, rev1, rev2):
766 def revdiff(self, rev1, rev2):
664 """return or calculate a delta between two revisions"""
767 """return or calculate a delta between two revisions"""
665 b1 = self.base(rev1)
768 b1 = self.base(rev1)
666 b2 = self.base(rev2)
769 b2 = self.base(rev2)
667 if b1 == b2 and rev1 + 1 == rev2:
770 if b1 == b2 and rev1 + 1 == rev2:
668 return self.chunk(rev2)
771 return self.chunk(rev2)
669 else:
772 else:
670 return self.diff(self.revision(self.node(rev1)),
773 return self.diff(self.revision(self.node(rev1)),
671 self.revision(self.node(rev2)))
774 self.revision(self.node(rev2)))
672
775
673 def revision(self, node):
776 def revision(self, node):
674 """return an uncompressed revision of a given"""
777 """return an uncompressed revision of a given"""
675 if node == nullid: return ""
778 if node == nullid: return ""
676 if self.cache and self.cache[0] == node: return self.cache[2]
779 if self.cache and self.cache[0] == node: return self.cache[2]
677
780
678 # look up what we need to read
781 # look up what we need to read
679 text = None
782 text = None
680 rev = self.rev(node)
783 rev = self.rev(node)
681 base = self.base(rev)
784 base = self.base(rev)
682
785
683 if self.inlinedata():
786 if self.inlinedata():
684 # we probably have the whole chunk cached
787 # we probably have the whole chunk cached
685 df = None
788 df = None
686 else:
789 else:
687 df = self.opener(self.datafile)
790 df = self.opener(self.datafile)
688
791
689 # do we have useful data cached?
792 # do we have useful data cached?
690 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
793 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
691 base = self.cache[1]
794 base = self.cache[1]
692 text = self.cache[2]
795 text = self.cache[2]
796 self.loadindex(base, rev + 1)
693 else:
797 else:
798 self.loadindex(base, rev + 1)
694 text = self.chunk(base, df=df)
799 text = self.chunk(base, df=df)
695
800
696 bins = []
801 bins = []
697 for r in xrange(base + 1, rev + 1):
802 for r in xrange(base + 1, rev + 1):
698 bins.append(self.chunk(r, df=df))
803 bins.append(self.chunk(r, df=df))
699
804
700 text = self.patches(text, bins)
805 text = self.patches(text, bins)
701
806
702 p1, p2 = self.parents(node)
807 p1, p2 = self.parents(node)
703 if node != hash(text, p1, p2):
808 if node != hash(text, p1, p2):
704 raise RevlogError(_("integrity check failed on %s:%d")
809 raise RevlogError(_("integrity check failed on %s:%d")
705 % (self.datafile, rev))
810 % (self.datafile, rev))
706
811
707 self.cache = (node, rev, text)
812 self.cache = (node, rev, text)
708 return text
813 return text
709
814
710 def checkinlinesize(self, tr, fp=None):
815 def checkinlinesize(self, tr, fp=None):
711 if not self.inlinedata():
816 if not self.inlinedata():
712 return
817 return
713 if not fp:
818 if not fp:
714 fp = self.opener(self.indexfile, 'r')
819 fp = self.opener(self.indexfile, 'r')
715 size = fp.tell()
820 size = fp.tell()
716 if size < 131072:
821 if size < 131072:
717 return
822 return
718 tr.add(self.datafile, 0)
823 tr.add(self.datafile, 0)
719 df = self.opener(self.datafile, 'w')
824 df = self.opener(self.datafile, 'w')
720 calc = struct.calcsize(self.indexformat)
825 calc = struct.calcsize(self.indexformat)
721 for r in xrange(self.count()):
826 for r in xrange(self.count()):
722 start = self.start(r) + (r + 1) * calc
827 start = self.start(r) + (r + 1) * calc
723 length = self.length(r)
828 length = self.length(r)
724 fp.seek(start)
829 fp.seek(start)
725 d = fp.read(length)
830 d = fp.read(length)
726 df.write(d)
831 df.write(d)
727 fp.close()
832 fp.close()
728 df.close()
833 df.close()
729 fp = self.opener(self.indexfile, 'w', atomictemp=True)
834 fp = self.opener(self.indexfile, 'w', atomictemp=True)
730 self.version &= ~(REVLOGNGINLINEDATA)
835 self.version &= ~(REVLOGNGINLINEDATA)
731 if self.count():
836 if self.count():
732 x = self.index[0]
837 x = self.index[0]
733 e = struct.pack(self.indexformat, *x)[4:]
838 e = struct.pack(self.indexformat, *x)[4:]
734 l = struct.pack(versionformat, self.version)
839 l = struct.pack(versionformat, self.version)
735 fp.write(l)
840 fp.write(l)
736 fp.write(e)
841 fp.write(e)
737
842
738 for i in xrange(1, self.count()):
843 for i in xrange(1, self.count()):
739 x = self.index[i]
844 x = self.index[i]
740 e = struct.pack(self.indexformat, *x)
845 e = struct.pack(self.indexformat, *x)
741 fp.write(e)
846 fp.write(e)
742
847
743 # if we don't call rename, the temp file will never replace the
848 # if we don't call rename, the temp file will never replace the
744 # real index
849 # real index
745 fp.rename()
850 fp.rename()
746 self.chunkcache = None
851 self.chunkcache = None
747
852
748 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
853 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
749 """add a revision to the log
854 """add a revision to the log
750
855
751 text - the revision data to add
856 text - the revision data to add
752 transaction - the transaction object used for rollback
857 transaction - the transaction object used for rollback
753 link - the linkrev data to add
858 link - the linkrev data to add
754 p1, p2 - the parent nodeids of the revision
859 p1, p2 - the parent nodeids of the revision
755 d - an optional precomputed delta
860 d - an optional precomputed delta
756 """
861 """
757 if text is None: text = ""
862 if text is None: text = ""
758 if p1 is None: p1 = self.tip()
863 if p1 is None: p1 = self.tip()
759 if p2 is None: p2 = nullid
864 if p2 is None: p2 = nullid
760
865
761 node = hash(text, p1, p2)
866 node = hash(text, p1, p2)
762
867
763 if node in self.nodemap:
868 if node in self.nodemap:
764 return node
869 return node
765
870
766 n = self.count()
871 n = self.count()
767 t = n - 1
872 t = n - 1
768
873
769 if n:
874 if n:
770 base = self.base(t)
875 base = self.base(t)
771 start = self.start(base)
876 start = self.start(base)
772 end = self.end(t)
877 end = self.end(t)
773 if not d:
878 if not d:
774 prev = self.revision(self.tip())
879 prev = self.revision(self.tip())
775 d = self.diff(prev, str(text))
880 d = self.diff(prev, str(text))
776 data = compress(d)
881 data = compress(d)
777 l = len(data[1]) + len(data[0])
882 l = len(data[1]) + len(data[0])
778 dist = end - start + l
883 dist = end - start + l
779
884
780 # full versions are inserted when the needed deltas
885 # full versions are inserted when the needed deltas
781 # become comparable to the uncompressed text
886 # become comparable to the uncompressed text
782 if not n or dist > len(text) * 2:
887 if not n or dist > len(text) * 2:
783 data = compress(text)
888 data = compress(text)
784 l = len(data[1]) + len(data[0])
889 l = len(data[1]) + len(data[0])
785 base = n
890 base = n
786 else:
891 else:
787 base = self.base(t)
892 base = self.base(t)
788
893
789 offset = 0
894 offset = 0
790 if t >= 0:
895 if t >= 0:
791 offset = self.end(t)
896 offset = self.end(t)
792
897
793 if self.version == 0:
898 if self.version == 0:
794 e = (offset, l, base, link, p1, p2, node)
899 e = (offset, l, base, link, p1, p2, node)
795 else:
900 else:
796 e = (self.offset_type(offset, 0), l, len(text),
901 e = (self.offset_type(offset, 0), l, len(text),
797 base, link, self.rev(p1), self.rev(p2), node)
902 base, link, self.rev(p1), self.rev(p2), node)
798
903
799 self.index.append(e)
904 self.index.append(e)
800 self.nodemap[node] = n
905 self.nodemap[node] = n
801 entry = struct.pack(self.indexformat, *e)
906 entry = struct.pack(self.indexformat, *e)
802
907
803 if not self.inlinedata():
908 if not self.inlinedata():
804 transaction.add(self.datafile, offset)
909 transaction.add(self.datafile, offset)
805 transaction.add(self.indexfile, n * len(entry))
910 transaction.add(self.indexfile, n * len(entry))
806 f = self.opener(self.datafile, "a")
911 f = self.opener(self.datafile, "a")
807 if data[0]:
912 if data[0]:
808 f.write(data[0])
913 f.write(data[0])
809 f.write(data[1])
914 f.write(data[1])
810 f = self.opener(self.indexfile, "a")
915 f = self.opener(self.indexfile, "a")
811 else:
916 else:
812 f = self.opener(self.indexfile, "a+")
917 f = self.opener(self.indexfile, "a+")
813 f.seek(0, 2)
918 f.seek(0, 2)
814 transaction.add(self.indexfile, f.tell())
919 transaction.add(self.indexfile, f.tell())
815
920
816 if len(self.index) == 1 and self.version != 0:
921 if len(self.index) == 1 and self.version != 0:
817 l = struct.pack(versionformat, self.version)
922 l = struct.pack(versionformat, self.version)
818 f.write(l)
923 f.write(l)
819 entry = entry[4:]
924 entry = entry[4:]
820
925
821 f.write(entry)
926 f.write(entry)
822
927
823 if self.inlinedata():
928 if self.inlinedata():
824 f.write(data[0])
929 f.write(data[0])
825 f.write(data[1])
930 f.write(data[1])
826 self.checkinlinesize(transaction, f)
931 self.checkinlinesize(transaction, f)
827
932
828 self.cache = (node, n, text)
933 self.cache = (node, n, text)
829 return node
934 return node
830
935
831 def ancestor(self, a, b):
936 def ancestor(self, a, b):
832 """calculate the least common ancestor of nodes a and b"""
937 """calculate the least common ancestor of nodes a and b"""
833 # calculate the distance of every node from root
938 # calculate the distance of every node from root
834 dist = {nullid: 0}
939 dist = {nullid: 0}
835 for i in xrange(self.count()):
940 for i in xrange(self.count()):
836 n = self.node(i)
941 n = self.node(i)
837 p1, p2 = self.parents(n)
942 p1, p2 = self.parents(n)
838 dist[n] = max(dist[p1], dist[p2]) + 1
943 dist[n] = max(dist[p1], dist[p2]) + 1
839
944
840 # traverse ancestors in order of decreasing distance from root
945 # traverse ancestors in order of decreasing distance from root
841 def ancestors(node):
946 def ancestors(node):
842 # we store negative distances because heap returns smallest member
947 # we store negative distances because heap returns smallest member
843 h = [(-dist[node], node)]
948 h = [(-dist[node], node)]
844 seen = {}
949 seen = {}
845 while h:
950 while h:
846 d, n = heapq.heappop(h)
951 d, n = heapq.heappop(h)
847 if n not in seen:
952 if n not in seen:
848 seen[n] = 1
953 seen[n] = 1
849 yield (-d, n)
954 yield (-d, n)
850 for p in self.parents(n):
955 for p in self.parents(n):
851 heapq.heappush(h, (-dist[p], p))
956 heapq.heappush(h, (-dist[p], p))
852
957
853 def generations(node):
958 def generations(node):
854 sg, s = None, {}
959 sg, s = None, {}
855 for g,n in ancestors(node):
960 for g,n in ancestors(node):
856 if g != sg:
961 if g != sg:
857 if sg:
962 if sg:
858 yield sg, s
963 yield sg, s
859 sg, s = g, {n:1}
964 sg, s = g, {n:1}
860 else:
965 else:
861 s[n] = 1
966 s[n] = 1
862 yield sg, s
967 yield sg, s
863
968
864 x = generations(a)
969 x = generations(a)
865 y = generations(b)
970 y = generations(b)
866 gx = x.next()
971 gx = x.next()
867 gy = y.next()
972 gy = y.next()
868
973
869 # increment each ancestor list until it is closer to root than
974 # increment each ancestor list until it is closer to root than
870 # the other, or they match
975 # the other, or they match
871 while 1:
976 while 1:
872 #print "ancestor gen %s %s" % (gx[0], gy[0])
977 #print "ancestor gen %s %s" % (gx[0], gy[0])
873 if gx[0] == gy[0]:
978 if gx[0] == gy[0]:
874 # find the intersection
979 # find the intersection
875 i = [ n for n in gx[1] if n in gy[1] ]
980 i = [ n for n in gx[1] if n in gy[1] ]
876 if i:
981 if i:
877 return i[0]
982 return i[0]
878 else:
983 else:
879 #print "next"
984 #print "next"
880 gy = y.next()
985 gy = y.next()
881 gx = x.next()
986 gx = x.next()
882 elif gx[0] < gy[0]:
987 elif gx[0] < gy[0]:
883 #print "next y"
988 #print "next y"
884 gy = y.next()
989 gy = y.next()
885 else:
990 else:
886 #print "next x"
991 #print "next x"
887 gx = x.next()
992 gx = x.next()
888
993
889 def group(self, nodelist, lookup, infocollect=None):
994 def group(self, nodelist, lookup, infocollect=None):
890 """calculate a delta group
995 """calculate a delta group
891
996
892 Given a list of changeset revs, return a set of deltas and
997 Given a list of changeset revs, return a set of deltas and
893 metadata corresponding to nodes. the first delta is
998 metadata corresponding to nodes. the first delta is
894 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
999 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
895 have this parent as it has all history before these
1000 have this parent as it has all history before these
896 changesets. parent is parent[0]
1001 changesets. parent is parent[0]
897 """
1002 """
898 revs = [self.rev(n) for n in nodelist]
1003 revs = [self.rev(n) for n in nodelist]
899
1004
900 # if we don't have any revisions touched by these changesets, bail
1005 # if we don't have any revisions touched by these changesets, bail
901 if not revs:
1006 if not revs:
902 yield changegroup.closechunk()
1007 yield changegroup.closechunk()
903 return
1008 return
904
1009
905 # add the parent of the first rev
1010 # add the parent of the first rev
906 p = self.parents(self.node(revs[0]))[0]
1011 p = self.parents(self.node(revs[0]))[0]
907 revs.insert(0, self.rev(p))
1012 revs.insert(0, self.rev(p))
908
1013
909 # build deltas
1014 # build deltas
910 for d in xrange(0, len(revs) - 1):
1015 for d in xrange(0, len(revs) - 1):
911 a, b = revs[d], revs[d + 1]
1016 a, b = revs[d], revs[d + 1]
912 nb = self.node(b)
1017 nb = self.node(b)
913
1018
914 if infocollect is not None:
1019 if infocollect is not None:
915 infocollect(nb)
1020 infocollect(nb)
916
1021
917 d = self.revdiff(a, b)
1022 d = self.revdiff(a, b)
918 p = self.parents(nb)
1023 p = self.parents(nb)
919 meta = nb + p[0] + p[1] + lookup(nb)
1024 meta = nb + p[0] + p[1] + lookup(nb)
920 yield changegroup.genchunk("%s%s" % (meta, d))
1025 yield changegroup.genchunk("%s%s" % (meta, d))
921
1026
922 yield changegroup.closechunk()
1027 yield changegroup.closechunk()
923
1028
924 def addgroup(self, revs, linkmapper, transaction, unique=0):
1029 def addgroup(self, revs, linkmapper, transaction, unique=0):
925 """
1030 """
926 add a delta group
1031 add a delta group
927
1032
928 given a set of deltas, add them to the revision log. the
1033 given a set of deltas, add them to the revision log. the
929 first delta is against its parent, which should be in our
1034 first delta is against its parent, which should be in our
930 log, the rest are against the previous delta.
1035 log, the rest are against the previous delta.
931 """
1036 """
932
1037
933 #track the base of the current delta log
1038 #track the base of the current delta log
934 r = self.count()
1039 r = self.count()
935 t = r - 1
1040 t = r - 1
936 node = None
1041 node = None
937
1042
938 base = prev = -1
1043 base = prev = -1
939 start = end = textlen = 0
1044 start = end = textlen = 0
940 if r:
1045 if r:
941 end = self.end(t)
1046 end = self.end(t)
942
1047
943 ifh = self.opener(self.indexfile, "a+")
1048 ifh = self.opener(self.indexfile, "a+")
944 ifh.seek(0, 2)
1049 ifh.seek(0, 2)
945 transaction.add(self.indexfile, ifh.tell())
1050 transaction.add(self.indexfile, ifh.tell())
946 if self.inlinedata():
1051 if self.inlinedata():
947 dfh = None
1052 dfh = None
948 else:
1053 else:
949 transaction.add(self.datafile, end)
1054 transaction.add(self.datafile, end)
950 dfh = self.opener(self.datafile, "a")
1055 dfh = self.opener(self.datafile, "a")
951
1056
952 # loop through our set of deltas
1057 # loop through our set of deltas
953 chain = None
1058 chain = None
954 for chunk in revs:
1059 for chunk in revs:
955 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1060 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
956 link = linkmapper(cs)
1061 link = linkmapper(cs)
957 if node in self.nodemap:
1062 if node in self.nodemap:
958 # this can happen if two branches make the same change
1063 # this can happen if two branches make the same change
959 # if unique:
1064 # if unique:
960 # raise RevlogError(_("already have %s") % hex(node[:4]))
1065 # raise RevlogError(_("already have %s") % hex(node[:4]))
961 chain = node
1066 chain = node
962 continue
1067 continue
963 delta = chunk[80:]
1068 delta = chunk[80:]
964
1069
965 for p in (p1, p2):
1070 for p in (p1, p2):
966 if not p in self.nodemap:
1071 if not p in self.nodemap:
967 raise RevlogError(_("unknown parent %s") % short(p1))
1072 raise RevlogError(_("unknown parent %s") % short(p1))
968
1073
969 if not chain:
1074 if not chain:
970 # retrieve the parent revision of the delta chain
1075 # retrieve the parent revision of the delta chain
971 chain = p1
1076 chain = p1
972 if not chain in self.nodemap:
1077 if not chain in self.nodemap:
973 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1078 raise RevlogError(_("unknown base %s") % short(chain[:4]))
974
1079
975 # full versions are inserted when the needed deltas become
1080 # full versions are inserted when the needed deltas become
976 # comparable to the uncompressed text or when the previous
1081 # comparable to the uncompressed text or when the previous
977 # version is not the one we have a delta against. We use
1082 # version is not the one we have a delta against. We use
978 # the size of the previous full rev as a proxy for the
1083 # the size of the previous full rev as a proxy for the
979 # current size.
1084 # current size.
980
1085
981 if chain == prev:
1086 if chain == prev:
982 tempd = compress(delta)
1087 tempd = compress(delta)
983 cdelta = tempd[0] + tempd[1]
1088 cdelta = tempd[0] + tempd[1]
984 textlen = mdiff.patchedsize(textlen, delta)
1089 textlen = mdiff.patchedsize(textlen, delta)
985
1090
986 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1091 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
987 # flush our writes here so we can read it in revision
1092 # flush our writes here so we can read it in revision
988 if dfh:
1093 if dfh:
989 dfh.flush()
1094 dfh.flush()
990 ifh.flush()
1095 ifh.flush()
991 text = self.revision(chain)
1096 text = self.revision(chain)
992 text = self.patches(text, [delta])
1097 text = self.patches(text, [delta])
993 chk = self.addrevision(text, transaction, link, p1, p2)
1098 chk = self.addrevision(text, transaction, link, p1, p2)
994 if chk != node:
1099 if chk != node:
995 raise RevlogError(_("consistency error adding group"))
1100 raise RevlogError(_("consistency error adding group"))
996 textlen = len(text)
1101 textlen = len(text)
997 else:
1102 else:
998 if self.version == 0:
1103 if self.version == 0:
999 e = (end, len(cdelta), base, link, p1, p2, node)
1104 e = (end, len(cdelta), base, link, p1, p2, node)
1000 else:
1105 else:
1001 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1106 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1002 link, self.rev(p1), self.rev(p2), node)
1107 link, self.rev(p1), self.rev(p2), node)
1003 self.index.append(e)
1108 self.index.append(e)
1004 self.nodemap[node] = r
1109 self.nodemap[node] = r
1005 if self.inlinedata():
1110 if self.inlinedata():
1006 ifh.write(struct.pack(self.indexformat, *e))
1111 ifh.write(struct.pack(self.indexformat, *e))
1007 ifh.write(cdelta)
1112 ifh.write(cdelta)
1008 self.checkinlinesize(transaction, ifh)
1113 self.checkinlinesize(transaction, ifh)
1009 if not self.inlinedata():
1114 if not self.inlinedata():
1010 dfh = self.opener(self.datafile, "a")
1115 dfh = self.opener(self.datafile, "a")
1011 ifh = self.opener(self.indexfile, "a")
1116 ifh = self.opener(self.indexfile, "a")
1012 else:
1117 else:
1013 if not dfh:
1118 if not dfh:
1014 # addrevision switched from inline to conventional
1119 # addrevision switched from inline to conventional
1015 # reopen the index
1120 # reopen the index
1016 dfh = self.opener(self.datafile, "a")
1121 dfh = self.opener(self.datafile, "a")
1017 ifh = self.opener(self.indexfile, "a")
1122 ifh = self.opener(self.indexfile, "a")
1018 dfh.write(cdelta)
1123 dfh.write(cdelta)
1019 ifh.write(struct.pack(self.indexformat, *e))
1124 ifh.write(struct.pack(self.indexformat, *e))
1020
1125
1021 t, r, chain, prev = r, r + 1, node, node
1126 t, r, chain, prev = r, r + 1, node, node
1022 base = self.base(t)
1127 base = self.base(t)
1023 start = self.start(base)
1128 start = self.start(base)
1024 end = self.end(t)
1129 end = self.end(t)
1025
1130
1026 if node is None:
1131 if node is None:
1027 raise RevlogError(_("group to be added is empty"))
1132 raise RevlogError(_("group to be added is empty"))
1028 return node
1133 return node
1029
1134
1030 def strip(self, rev, minlink):
1135 def strip(self, rev, minlink):
1031 if self.count() == 0 or rev >= self.count():
1136 if self.count() == 0 or rev >= self.count():
1032 return
1137 return
1033
1138
1034 if isinstance(self.index, lazyindex):
1139 if isinstance(self.index, lazyindex):
1035 self.loadindexmap()
1140 self.loadindexmap()
1036
1141
1037 # When stripping away a revision, we need to make sure it
1142 # When stripping away a revision, we need to make sure it
1038 # does not actually belong to an older changeset.
1143 # does not actually belong to an older changeset.
1039 # The minlink parameter defines the oldest revision
1144 # The minlink parameter defines the oldest revision
1040 # we're allowed to strip away.
1145 # we're allowed to strip away.
1041 while minlink > self.index[rev][-4]:
1146 while minlink > self.index[rev][-4]:
1042 rev += 1
1147 rev += 1
1043 if rev >= self.count():
1148 if rev >= self.count():
1044 return
1149 return
1045
1150
1046 # first truncate the files on disk
1151 # first truncate the files on disk
1047 end = self.start(rev)
1152 end = self.start(rev)
1048 if not self.inlinedata():
1153 if not self.inlinedata():
1049 df = self.opener(self.datafile, "a")
1154 df = self.opener(self.datafile, "a")
1050 df.truncate(end)
1155 df.truncate(end)
1051 end = rev * struct.calcsize(self.indexformat)
1156 end = rev * struct.calcsize(self.indexformat)
1052 else:
1157 else:
1053 end += rev * struct.calcsize(self.indexformat)
1158 end += rev * struct.calcsize(self.indexformat)
1054
1159
1055 indexf = self.opener(self.indexfile, "a")
1160 indexf = self.opener(self.indexfile, "a")
1056 indexf.truncate(end)
1161 indexf.truncate(end)
1057
1162
1058 # then reset internal state in memory to forget those revisions
1163 # then reset internal state in memory to forget those revisions
1059 self.cache = None
1164 self.cache = None
1060 self.chunkcache = None
1165 self.chunkcache = None
1061 for x in xrange(rev, self.count()):
1166 for x in xrange(rev, self.count()):
1062 del self.nodemap[self.node(x)]
1167 del self.nodemap[self.node(x)]
1063
1168
1064 del self.index[rev:]
1169 del self.index[rev:]
1065
1170
1066 def checksize(self):
1171 def checksize(self):
1067 expected = 0
1172 expected = 0
1068 if self.count():
1173 if self.count():
1069 expected = self.end(self.count() - 1)
1174 expected = self.end(self.count() - 1)
1070
1175
1071 try:
1176 try:
1072 f = self.opener(self.datafile)
1177 f = self.opener(self.datafile)
1073 f.seek(0, 2)
1178 f.seek(0, 2)
1074 actual = f.tell()
1179 actual = f.tell()
1075 dd = actual - expected
1180 dd = actual - expected
1076 except IOError, inst:
1181 except IOError, inst:
1077 if inst.errno != errno.ENOENT:
1182 if inst.errno != errno.ENOENT:
1078 raise
1183 raise
1079 dd = 0
1184 dd = 0
1080
1185
1081 try:
1186 try:
1082 f = self.opener(self.indexfile)
1187 f = self.opener(self.indexfile)
1083 f.seek(0, 2)
1188 f.seek(0, 2)
1084 actual = f.tell()
1189 actual = f.tell()
1085 s = struct.calcsize(self.indexformat)
1190 s = struct.calcsize(self.indexformat)
1086 i = actual / s
1191 i = actual / s
1087 di = actual - (i * s)
1192 di = actual - (i * s)
1088 if self.inlinedata():
1193 if self.inlinedata():
1089 databytes = 0
1194 databytes = 0
1090 for r in xrange(self.count()):
1195 for r in xrange(self.count()):
1091 databytes += self.length(r)
1196 databytes += self.length(r)
1092 dd = 0
1197 dd = 0
1093 di = actual - self.count() * s - databytes
1198 di = actual - self.count() * s - databytes
1094 except IOError, inst:
1199 except IOError, inst:
1095 if inst.errno != errno.ENOENT:
1200 if inst.errno != errno.ENOENT:
1096 raise
1201 raise
1097 di = 0
1202 di = 0
1098
1203
1099 return (dd, di)
1204 return (dd, di)
1100
1205
1101
1206
General Comments 0
You need to be logged in to leave comments. Login now