##// END OF EJS Templates
Fix inlined revlogs to seek to eof after opening "a+"
mason@suse.com -
r2077:4d0700ae default
parent child Browse files
Show More
@@ -1,1066 +1,1068 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 demandload(globals(), "sha struct zlib")
17 demandload(globals(), "sha struct zlib")
18
18
19 # revlog version strings
19 # revlog version strings
20 REVLOGV0 = 0
20 REVLOGV0 = 0
21 REVLOGNG = 1
21 REVLOGNG = 1
22
22
23 # revlog flags
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOGNGINLINEDATA = (1 << 16)
25
25
26 def flagstr(flag):
26 def flagstr(flag):
27 if flag == "inline":
27 if flag == "inline":
28 return REVLOGNGINLINEDATA
28 return REVLOGNGINLINEDATA
29 raise RevlogError(_("unknown revlog flag %s" % flag))
29 raise RevlogError(_("unknown revlog flag %s" % flag))
30
30
31 def hash(text, p1, p2):
31 def hash(text, p1, p2):
32 """generate a hash from the given text and its parent hashes
32 """generate a hash from the given text and its parent hashes
33
33
34 This hash combines both the current file contents and its history
34 This hash combines both the current file contents and its history
35 in a manner that makes it easy to distinguish nodes with the same
35 in a manner that makes it easy to distinguish nodes with the same
36 content in the revision graph.
36 content in the revision graph.
37 """
37 """
38 l = [p1, p2]
38 l = [p1, p2]
39 l.sort()
39 l.sort()
40 s = sha.new(l[0])
40 s = sha.new(l[0])
41 s.update(l[1])
41 s.update(l[1])
42 s.update(text)
42 s.update(text)
43 return s.digest()
43 return s.digest()
44
44
45 def compress(text):
45 def compress(text):
46 """ generate a possibly-compressed representation of text """
46 """ generate a possibly-compressed representation of text """
47 if not text: return ("", text)
47 if not text: return ("", text)
48 if len(text) < 44:
48 if len(text) < 44:
49 if text[0] == '\0': return ("", text)
49 if text[0] == '\0': return ("", text)
50 return ('u', text)
50 return ('u', text)
51 bin = zlib.compress(text)
51 bin = zlib.compress(text)
52 if len(bin) > len(text):
52 if len(bin) > len(text):
53 if text[0] == '\0': return ("", text)
53 if text[0] == '\0': return ("", text)
54 return ('u', text)
54 return ('u', text)
55 return ("", bin)
55 return ("", bin)
56
56
57 def decompress(bin):
57 def decompress(bin):
58 """ decompress the given input """
58 """ decompress the given input """
59 if not bin: return bin
59 if not bin: return bin
60 t = bin[0]
60 t = bin[0]
61 if t == '\0': return bin
61 if t == '\0': return bin
62 if t == 'x': return zlib.decompress(bin)
62 if t == 'x': return zlib.decompress(bin)
63 if t == 'u': return bin[1:]
63 if t == 'u': return bin[1:]
64 raise RevlogError(_("unknown compression type %r") % t)
64 raise RevlogError(_("unknown compression type %r") % t)
65
65
66 indexformatv0 = ">4l20s20s20s"
66 indexformatv0 = ">4l20s20s20s"
67 # index ng:
67 # index ng:
68 # 6 bytes offset
68 # 6 bytes offset
69 # 2 bytes flags
69 # 2 bytes flags
70 # 4 bytes compressed length
70 # 4 bytes compressed length
71 # 4 bytes uncompressed length
71 # 4 bytes uncompressed length
72 # 4 bytes: base rev
72 # 4 bytes: base rev
73 # 4 bytes link rev
73 # 4 bytes link rev
74 # 4 bytes parent 1 rev
74 # 4 bytes parent 1 rev
75 # 4 bytes parent 2 rev
75 # 4 bytes parent 2 rev
76 # 32 bytes: nodeid
76 # 32 bytes: nodeid
77 indexformatng = ">Qiiiiii20s12x"
77 indexformatng = ">Qiiiiii20s12x"
78 versionformat = ">i"
78 versionformat = ">i"
79
79
80 class lazyparser(object):
80 class lazyparser(object):
81 """
81 """
82 this class avoids the need to parse the entirety of large indices
82 this class avoids the need to parse the entirety of large indices
83
83
84 By default we parse and load 1000 entries at a time.
84 By default we parse and load 1000 entries at a time.
85
85
86 If no position is specified, we load the whole index, and replace
86 If no position is specified, we load the whole index, and replace
87 the lazy objects in revlog with the underlying objects for
87 the lazy objects in revlog with the underlying objects for
88 efficiency in cases where we look at most of the nodes.
88 efficiency in cases where we look at most of the nodes.
89 """
89 """
90 def __init__(self, data, revlog, indexformat):
90 def __init__(self, data, revlog, indexformat):
91 self.data = data
91 self.data = data
92 self.s = struct.calcsize(indexformat)
92 self.s = struct.calcsize(indexformat)
93 self.indexformat = indexformat
93 self.indexformat = indexformat
94 self.l = len(data)/self.s
94 self.l = len(data)/self.s
95 self.index = [None] * self.l
95 self.index = [None] * self.l
96 self.map = {nullid: -1}
96 self.map = {nullid: -1}
97 self.all = 0
97 self.all = 0
98 self.revlog = revlog
98 self.revlog = revlog
99
99
100 def load(self, pos=None):
100 def load(self, pos=None):
101 if self.all: return
101 if self.all: return
102 if pos is not None:
102 if pos is not None:
103 block = pos / 1000
103 block = pos / 1000
104 i = block * 1000
104 i = block * 1000
105 end = min(self.l, i + 1000)
105 end = min(self.l, i + 1000)
106 else:
106 else:
107 self.all = 1
107 self.all = 1
108 i = 0
108 i = 0
109 end = self.l
109 end = self.l
110 self.revlog.index = self.index
110 self.revlog.index = self.index
111 self.revlog.nodemap = self.map
111 self.revlog.nodemap = self.map
112
112
113 while i < end:
113 while i < end:
114 if not self.index[i]:
114 if not self.index[i]:
115 d = self.data[i * self.s: (i + 1) * self.s]
115 d = self.data[i * self.s: (i + 1) * self.s]
116 e = struct.unpack(self.indexformat, d)
116 e = struct.unpack(self.indexformat, d)
117 self.index[i] = e
117 self.index[i] = e
118 self.map[e[-1]] = i
118 self.map[e[-1]] = i
119 i += 1
119 i += 1
120
120
121 class lazyindex(object):
121 class lazyindex(object):
122 """a lazy version of the index array"""
122 """a lazy version of the index array"""
123 def __init__(self, parser):
123 def __init__(self, parser):
124 self.p = parser
124 self.p = parser
125 def __len__(self):
125 def __len__(self):
126 return len(self.p.index)
126 return len(self.p.index)
127 def load(self, pos):
127 def load(self, pos):
128 if pos < 0:
128 if pos < 0:
129 pos += len(self.p.index)
129 pos += len(self.p.index)
130 self.p.load(pos)
130 self.p.load(pos)
131 return self.p.index[pos]
131 return self.p.index[pos]
132 def __getitem__(self, pos):
132 def __getitem__(self, pos):
133 return self.p.index[pos] or self.load(pos)
133 return self.p.index[pos] or self.load(pos)
134 def __setitem__(self, pos, item):
134 def __setitem__(self, pos, item):
135 self.p.index[pos] = item
135 self.p.index[pos] = item
136 def __delitem__(self, pos):
136 def __delitem__(self, pos):
137 del self.p.index[pos]
137 del self.p.index[pos]
138 def append(self, e):
138 def append(self, e):
139 self.p.index.append(e)
139 self.p.index.append(e)
140
140
141 class lazymap(object):
141 class lazymap(object):
142 """a lazy version of the node map"""
142 """a lazy version of the node map"""
143 def __init__(self, parser):
143 def __init__(self, parser):
144 self.p = parser
144 self.p = parser
145 def load(self, key):
145 def load(self, key):
146 if self.p.all: return
146 if self.p.all: return
147 n = self.p.data.find(key)
147 n = self.p.data.find(key)
148 if n < 0:
148 if n < 0:
149 raise KeyError(key)
149 raise KeyError(key)
150 pos = n / self.p.s
150 pos = n / self.p.s
151 self.p.load(pos)
151 self.p.load(pos)
152 def __contains__(self, key):
152 def __contains__(self, key):
153 self.p.load()
153 self.p.load()
154 return key in self.p.map
154 return key in self.p.map
155 def __iter__(self):
155 def __iter__(self):
156 yield nullid
156 yield nullid
157 for i in xrange(self.p.l):
157 for i in xrange(self.p.l):
158 try:
158 try:
159 yield self.p.index[i][-1]
159 yield self.p.index[i][-1]
160 except:
160 except:
161 self.p.load(i)
161 self.p.load(i)
162 yield self.p.index[i][-1]
162 yield self.p.index[i][-1]
163 def __getitem__(self, key):
163 def __getitem__(self, key):
164 try:
164 try:
165 return self.p.map[key]
165 return self.p.map[key]
166 except KeyError:
166 except KeyError:
167 try:
167 try:
168 self.load(key)
168 self.load(key)
169 return self.p.map[key]
169 return self.p.map[key]
170 except KeyError:
170 except KeyError:
171 raise KeyError("node " + hex(key))
171 raise KeyError("node " + hex(key))
172 def __setitem__(self, key, val):
172 def __setitem__(self, key, val):
173 self.p.map[key] = val
173 self.p.map[key] = val
174 def __delitem__(self, key):
174 def __delitem__(self, key):
175 del self.p.map[key]
175 del self.p.map[key]
176
176
177 class RevlogError(Exception): pass
177 class RevlogError(Exception): pass
178
178
179 class revlog(object):
179 class revlog(object):
180 """
180 """
181 the underlying revision storage object
181 the underlying revision storage object
182
182
183 A revlog consists of two parts, an index and the revision data.
183 A revlog consists of two parts, an index and the revision data.
184
184
185 The index is a file with a fixed record size containing
185 The index is a file with a fixed record size containing
186 information on each revision, includings its nodeid (hash), the
186 information on each revision, includings its nodeid (hash), the
187 nodeids of its parents, the position and offset of its data within
187 nodeids of its parents, the position and offset of its data within
188 the data file, and the revision it's based on. Finally, each entry
188 the data file, and the revision it's based on. Finally, each entry
189 contains a linkrev entry that can serve as a pointer to external
189 contains a linkrev entry that can serve as a pointer to external
190 data.
190 data.
191
191
192 The revision data itself is a linear collection of data chunks.
192 The revision data itself is a linear collection of data chunks.
193 Each chunk represents a revision and is usually represented as a
193 Each chunk represents a revision and is usually represented as a
194 delta against the previous chunk. To bound lookup time, runs of
194 delta against the previous chunk. To bound lookup time, runs of
195 deltas are limited to about 2 times the length of the original
195 deltas are limited to about 2 times the length of the original
196 version data. This makes retrieval of a version proportional to
196 version data. This makes retrieval of a version proportional to
197 its size, or O(1) relative to the number of revisions.
197 its size, or O(1) relative to the number of revisions.
198
198
199 Both pieces of the revlog are written to in an append-only
199 Both pieces of the revlog are written to in an append-only
200 fashion, which means we never need to rewrite a file to insert or
200 fashion, which means we never need to rewrite a file to insert or
201 remove data, and can use some simple techniques to avoid the need
201 remove data, and can use some simple techniques to avoid the need
202 for locking while reading.
202 for locking while reading.
203 """
203 """
204 def __init__(self, opener, indexfile, datafile, defversion=0):
204 def __init__(self, opener, indexfile, datafile, defversion=0):
205 """
205 """
206 create a revlog object
206 create a revlog object
207
207
208 opener is a function that abstracts the file opening operation
208 opener is a function that abstracts the file opening operation
209 and can be used to implement COW semantics or the like.
209 and can be used to implement COW semantics or the like.
210 """
210 """
211 self.indexfile = indexfile
211 self.indexfile = indexfile
212 self.datafile = datafile
212 self.datafile = datafile
213 self.opener = opener
213 self.opener = opener
214
214
215 self.indexstat = None
215 self.indexstat = None
216 self.cache = None
216 self.cache = None
217 self.chunkcache = None
217 self.chunkcache = None
218 self.defversion = defversion
218 self.defversion = defversion
219 self.load()
219 self.load()
220
220
221 def load(self):
221 def load(self):
222 v = self.defversion
222 v = self.defversion
223 try:
223 try:
224 f = self.opener(self.indexfile)
224 f = self.opener(self.indexfile)
225 i = f.read()
225 i = f.read()
226 except IOError, inst:
226 except IOError, inst:
227 if inst.errno != errno.ENOENT:
227 if inst.errno != errno.ENOENT:
228 raise
228 raise
229 i = ""
229 i = ""
230 else:
230 else:
231 try:
231 try:
232 st = os.fstat(f.fileno())
232 st = os.fstat(f.fileno())
233 except AttributeError, inst:
233 except AttributeError, inst:
234 st = None
234 st = None
235 else:
235 else:
236 oldst = self.indexstat
236 oldst = self.indexstat
237 if (oldst and st.st_dev == oldst.st_dev
237 if (oldst and st.st_dev == oldst.st_dev
238 and st.st_ino == oldst.st_ino
238 and st.st_ino == oldst.st_ino
239 and st.st_mtime == oldst.st_mtime
239 and st.st_mtime == oldst.st_mtime
240 and st.st_ctime == oldst.st_ctime):
240 and st.st_ctime == oldst.st_ctime):
241 return
241 return
242 self.indexstat = st
242 self.indexstat = st
243 if len(i) > 0:
243 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
244 v = struct.unpack(versionformat, i[:4])[0]
245 flags = v & ~0xFFFF
245 flags = v & ~0xFFFF
246 fmt = v & 0xFFFF
246 fmt = v & 0xFFFF
247 if fmt == 0:
247 if fmt == 0:
248 if flags:
248 if flags:
249 raise RevlogError(_("index %s invalid flags %x for format v0" %
249 raise RevlogError(_("index %s invalid flags %x for format v0" %
250 (self.indexfile, flags)))
250 (self.indexfile, flags)))
251 elif fmt == REVLOGNG:
251 elif fmt == REVLOGNG:
252 if flags & ~REVLOGNGINLINEDATA:
252 if flags & ~REVLOGNGINLINEDATA:
253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
253 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 (self.indexfile, flags)))
254 (self.indexfile, flags)))
255 else:
255 else:
256 raise RevlogError(_("index %s invalid format %d" %
256 raise RevlogError(_("index %s invalid format %d" %
257 (self.indexfile, fmt)))
257 (self.indexfile, fmt)))
258 self.version = v
258 self.version = v
259 if v == 0:
259 if v == 0:
260 self.indexformat = indexformatv0
260 self.indexformat = indexformatv0
261 else:
261 else:
262 self.indexformat = indexformatng
262 self.indexformat = indexformatng
263
263
264 if i:
264 if i:
265 if not self.inlinedata() and st and st.st_size > 10000:
265 if not self.inlinedata() and st and st.st_size > 10000:
266 # big index, let's parse it on demand
266 # big index, let's parse it on demand
267 parser = lazyparser(i, self, self.indexformat)
267 parser = lazyparser(i, self, self.indexformat)
268 self.index = lazyindex(parser)
268 self.index = lazyindex(parser)
269 self.nodemap = lazymap(parser)
269 self.nodemap = lazymap(parser)
270 else:
270 else:
271 self.parseindex(i)
271 self.parseindex(i)
272 if self.inlinedata():
272 if self.inlinedata():
273 # we've already got the entire data file read in, save it
273 # we've already got the entire data file read in, save it
274 # in the chunk data
274 # in the chunk data
275 self.chunkcache = (0, i)
275 self.chunkcache = (0, i)
276 if self.version != 0:
276 if self.version != 0:
277 e = list(self.index[0])
277 e = list(self.index[0])
278 type = self.ngtype(e[0])
278 type = self.ngtype(e[0])
279 e[0] = self.offset_type(0, type)
279 e[0] = self.offset_type(0, type)
280 self.index[0] = e
280 self.index[0] = e
281 else:
281 else:
282 self.nodemap = { nullid: -1}
282 self.nodemap = { nullid: -1}
283 self.index = []
283 self.index = []
284
284
285
285
286 def parseindex(self, data):
286 def parseindex(self, data):
287 s = struct.calcsize(self.indexformat)
287 s = struct.calcsize(self.indexformat)
288 l = len(data)
288 l = len(data)
289 self.index = []
289 self.index = []
290 self.nodemap = {nullid: -1}
290 self.nodemap = {nullid: -1}
291 inline = self.inlinedata()
291 inline = self.inlinedata()
292 off = 0
292 off = 0
293 n = 0
293 n = 0
294 while off < l:
294 while off < l:
295 e = struct.unpack(self.indexformat, data[off:off + s])
295 e = struct.unpack(self.indexformat, data[off:off + s])
296 self.index.append(e)
296 self.index.append(e)
297 self.nodemap[e[-1]] = n
297 self.nodemap[e[-1]] = n
298 n += 1
298 n += 1
299 off += s
299 off += s
300 if inline:
300 if inline:
301 off += e[1]
301 off += e[1]
302
302
303 def ngoffset(self, q):
303 def ngoffset(self, q):
304 if q & 0xFFFF:
304 if q & 0xFFFF:
305 raise RevlogError(_('%s: incompatible revision flag %x') %
305 raise RevlogError(_('%s: incompatible revision flag %x') %
306 (self.indexfile, type))
306 (self.indexfile, type))
307 return long(q >> 16)
307 return long(q >> 16)
308
308
309 def ngtype(self, q):
309 def ngtype(self, q):
310 return int(q & 0xFFFF)
310 return int(q & 0xFFFF)
311
311
312 def offset_type(self, offset, type):
312 def offset_type(self, offset, type):
313 return long(long(offset) << 16 | type)
313 return long(long(offset) << 16 | type)
314
314
315 def loadindexmap(self):
315 def loadindexmap(self):
316 """loads both the map and the index from the lazy parser"""
316 """loads both the map and the index from the lazy parser"""
317 if isinstance(self.index, lazyindex):
317 if isinstance(self.index, lazyindex):
318 p = self.index.p
318 p = self.index.p
319 p.load()
319 p.load()
320
320
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 def tip(self): return self.node(len(self.index) - 1)
322 def tip(self): return self.node(len(self.index) - 1)
323 def count(self): return len(self.index)
323 def count(self): return len(self.index)
324 def node(self, rev):
324 def node(self, rev):
325 return (rev < 0) and nullid or self.index[rev][-1]
325 return (rev < 0) and nullid or self.index[rev][-1]
326 def rev(self, node):
326 def rev(self, node):
327 try:
327 try:
328 return self.nodemap[node]
328 return self.nodemap[node]
329 except KeyError:
329 except KeyError:
330 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
330 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
331 def linkrev(self, node): return self.index[self.rev(node)][-4]
331 def linkrev(self, node): return self.index[self.rev(node)][-4]
332 def parents(self, node):
332 def parents(self, node):
333 if node == nullid: return (nullid, nullid)
333 if node == nullid: return (nullid, nullid)
334 r = self.rev(node)
334 r = self.rev(node)
335 d = self.index[r][-3:-1]
335 d = self.index[r][-3:-1]
336 if self.version == 0:
336 if self.version == 0:
337 return d
337 return d
338 return [ self.node(x) for x in d ]
338 return [ self.node(x) for x in d ]
339 def start(self, rev):
339 def start(self, rev):
340 if rev < 0:
340 if rev < 0:
341 return -1
341 return -1
342 if self.version != 0:
342 if self.version != 0:
343 return self.ngoffset(self.index[rev][0])
343 return self.ngoffset(self.index[rev][0])
344 return self.index[rev][0]
344 return self.index[rev][0]
345 def end(self, rev): return self.start(rev) + self.length(rev)
345 def end(self, rev): return self.start(rev) + self.length(rev)
346
346
347 def length(self, rev):
347 def length(self, rev):
348 if rev < 0:
348 if rev < 0:
349 return 0
349 return 0
350 else:
350 else:
351 return self.index[rev][1]
351 return self.index[rev][1]
352 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
352 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
353
353
354 def reachable(self, rev, stop=None):
354 def reachable(self, rev, stop=None):
355 reachable = {}
355 reachable = {}
356 visit = [rev]
356 visit = [rev]
357 reachable[rev] = 1
357 reachable[rev] = 1
358 if stop:
358 if stop:
359 stopn = self.rev(stop)
359 stopn = self.rev(stop)
360 else:
360 else:
361 stopn = 0
361 stopn = 0
362 while visit:
362 while visit:
363 n = visit.pop(0)
363 n = visit.pop(0)
364 if n == stop:
364 if n == stop:
365 continue
365 continue
366 if n == nullid:
366 if n == nullid:
367 continue
367 continue
368 for p in self.parents(n):
368 for p in self.parents(n):
369 if self.rev(p) < stopn:
369 if self.rev(p) < stopn:
370 continue
370 continue
371 if p not in reachable:
371 if p not in reachable:
372 reachable[p] = 1
372 reachable[p] = 1
373 visit.append(p)
373 visit.append(p)
374 return reachable
374 return reachable
375
375
376 def nodesbetween(self, roots=None, heads=None):
376 def nodesbetween(self, roots=None, heads=None):
377 """Return a tuple containing three elements. Elements 1 and 2 contain
377 """Return a tuple containing three elements. Elements 1 and 2 contain
378 a final list bases and heads after all the unreachable ones have been
378 a final list bases and heads after all the unreachable ones have been
379 pruned. Element 0 contains a topologically sorted list of all
379 pruned. Element 0 contains a topologically sorted list of all
380
380
381 nodes that satisfy these constraints:
381 nodes that satisfy these constraints:
382 1. All nodes must be descended from a node in roots (the nodes on
382 1. All nodes must be descended from a node in roots (the nodes on
383 roots are considered descended from themselves).
383 roots are considered descended from themselves).
384 2. All nodes must also be ancestors of a node in heads (the nodes in
384 2. All nodes must also be ancestors of a node in heads (the nodes in
385 heads are considered to be their own ancestors).
385 heads are considered to be their own ancestors).
386
386
387 If roots is unspecified, nullid is assumed as the only root.
387 If roots is unspecified, nullid is assumed as the only root.
388 If heads is unspecified, it is taken to be the output of the
388 If heads is unspecified, it is taken to be the output of the
389 heads method (i.e. a list of all nodes in the repository that
389 heads method (i.e. a list of all nodes in the repository that
390 have no children)."""
390 have no children)."""
391 nonodes = ([], [], [])
391 nonodes = ([], [], [])
392 if roots is not None:
392 if roots is not None:
393 roots = list(roots)
393 roots = list(roots)
394 if not roots:
394 if not roots:
395 return nonodes
395 return nonodes
396 lowestrev = min([self.rev(n) for n in roots])
396 lowestrev = min([self.rev(n) for n in roots])
397 else:
397 else:
398 roots = [nullid] # Everybody's a descendent of nullid
398 roots = [nullid] # Everybody's a descendent of nullid
399 lowestrev = -1
399 lowestrev = -1
400 if (lowestrev == -1) and (heads is None):
400 if (lowestrev == -1) and (heads is None):
401 # We want _all_ the nodes!
401 # We want _all_ the nodes!
402 return ([self.node(r) for r in xrange(0, self.count())],
402 return ([self.node(r) for r in xrange(0, self.count())],
403 [nullid], list(self.heads()))
403 [nullid], list(self.heads()))
404 if heads is None:
404 if heads is None:
405 # All nodes are ancestors, so the latest ancestor is the last
405 # All nodes are ancestors, so the latest ancestor is the last
406 # node.
406 # node.
407 highestrev = self.count() - 1
407 highestrev = self.count() - 1
408 # Set ancestors to None to signal that every node is an ancestor.
408 # Set ancestors to None to signal that every node is an ancestor.
409 ancestors = None
409 ancestors = None
410 # Set heads to an empty dictionary for later discovery of heads
410 # Set heads to an empty dictionary for later discovery of heads
411 heads = {}
411 heads = {}
412 else:
412 else:
413 heads = list(heads)
413 heads = list(heads)
414 if not heads:
414 if not heads:
415 return nonodes
415 return nonodes
416 ancestors = {}
416 ancestors = {}
417 # Start at the top and keep marking parents until we're done.
417 # Start at the top and keep marking parents until we're done.
418 nodestotag = heads[:]
418 nodestotag = heads[:]
419 # Turn heads into a dictionary so we can remove 'fake' heads.
419 # Turn heads into a dictionary so we can remove 'fake' heads.
420 # Also, later we will be using it to filter out the heads we can't
420 # Also, later we will be using it to filter out the heads we can't
421 # find from roots.
421 # find from roots.
422 heads = dict.fromkeys(heads, 0)
422 heads = dict.fromkeys(heads, 0)
423 # Remember where the top was so we can use it as a limit later.
423 # Remember where the top was so we can use it as a limit later.
424 highestrev = max([self.rev(n) for n in nodestotag])
424 highestrev = max([self.rev(n) for n in nodestotag])
425 while nodestotag:
425 while nodestotag:
426 # grab a node to tag
426 # grab a node to tag
427 n = nodestotag.pop()
427 n = nodestotag.pop()
428 # Never tag nullid
428 # Never tag nullid
429 if n == nullid:
429 if n == nullid:
430 continue
430 continue
431 # A node's revision number represents its place in a
431 # A node's revision number represents its place in a
432 # topologically sorted list of nodes.
432 # topologically sorted list of nodes.
433 r = self.rev(n)
433 r = self.rev(n)
434 if r >= lowestrev:
434 if r >= lowestrev:
435 if n not in ancestors:
435 if n not in ancestors:
436 # If we are possibly a descendent of one of the roots
436 # If we are possibly a descendent of one of the roots
437 # and we haven't already been marked as an ancestor
437 # and we haven't already been marked as an ancestor
438 ancestors[n] = 1 # Mark as ancestor
438 ancestors[n] = 1 # Mark as ancestor
439 # Add non-nullid parents to list of nodes to tag.
439 # Add non-nullid parents to list of nodes to tag.
440 nodestotag.extend([p for p in self.parents(n) if
440 nodestotag.extend([p for p in self.parents(n) if
441 p != nullid])
441 p != nullid])
442 elif n in heads: # We've seen it before, is it a fake head?
442 elif n in heads: # We've seen it before, is it a fake head?
443 # So it is, real heads should not be the ancestors of
443 # So it is, real heads should not be the ancestors of
444 # any other heads.
444 # any other heads.
445 heads.pop(n)
445 heads.pop(n)
446 if not ancestors:
446 if not ancestors:
447 return nonodes
447 return nonodes
448 # Now that we have our set of ancestors, we want to remove any
448 # Now that we have our set of ancestors, we want to remove any
449 # roots that are not ancestors.
449 # roots that are not ancestors.
450
450
451 # If one of the roots was nullid, everything is included anyway.
451 # If one of the roots was nullid, everything is included anyway.
452 if lowestrev > -1:
452 if lowestrev > -1:
453 # But, since we weren't, let's recompute the lowest rev to not
453 # But, since we weren't, let's recompute the lowest rev to not
454 # include roots that aren't ancestors.
454 # include roots that aren't ancestors.
455
455
456 # Filter out roots that aren't ancestors of heads
456 # Filter out roots that aren't ancestors of heads
457 roots = [n for n in roots if n in ancestors]
457 roots = [n for n in roots if n in ancestors]
458 # Recompute the lowest revision
458 # Recompute the lowest revision
459 if roots:
459 if roots:
460 lowestrev = min([self.rev(n) for n in roots])
460 lowestrev = min([self.rev(n) for n in roots])
461 else:
461 else:
462 # No more roots? Return empty list
462 # No more roots? Return empty list
463 return nonodes
463 return nonodes
464 else:
464 else:
465 # We are descending from nullid, and don't need to care about
465 # We are descending from nullid, and don't need to care about
466 # any other roots.
466 # any other roots.
467 lowestrev = -1
467 lowestrev = -1
468 roots = [nullid]
468 roots = [nullid]
469 # Transform our roots list into a 'set' (i.e. a dictionary where the
469 # Transform our roots list into a 'set' (i.e. a dictionary where the
470 # values don't matter.
470 # values don't matter.
471 descendents = dict.fromkeys(roots, 1)
471 descendents = dict.fromkeys(roots, 1)
472 # Also, keep the original roots so we can filter out roots that aren't
472 # Also, keep the original roots so we can filter out roots that aren't
473 # 'real' roots (i.e. are descended from other roots).
473 # 'real' roots (i.e. are descended from other roots).
474 roots = descendents.copy()
474 roots = descendents.copy()
475 # Our topologically sorted list of output nodes.
475 # Our topologically sorted list of output nodes.
476 orderedout = []
476 orderedout = []
477 # Don't start at nullid since we don't want nullid in our output list,
477 # Don't start at nullid since we don't want nullid in our output list,
478 # and if nullid shows up in descedents, empty parents will look like
478 # and if nullid shows up in descedents, empty parents will look like
479 # they're descendents.
479 # they're descendents.
480 for r in xrange(max(lowestrev, 0), highestrev + 1):
480 for r in xrange(max(lowestrev, 0), highestrev + 1):
481 n = self.node(r)
481 n = self.node(r)
482 isdescendent = False
482 isdescendent = False
483 if lowestrev == -1: # Everybody is a descendent of nullid
483 if lowestrev == -1: # Everybody is a descendent of nullid
484 isdescendent = True
484 isdescendent = True
485 elif n in descendents:
485 elif n in descendents:
486 # n is already a descendent
486 # n is already a descendent
487 isdescendent = True
487 isdescendent = True
488 # This check only needs to be done here because all the roots
488 # This check only needs to be done here because all the roots
489 # will start being marked is descendents before the loop.
489 # will start being marked is descendents before the loop.
490 if n in roots:
490 if n in roots:
491 # If n was a root, check if it's a 'real' root.
491 # If n was a root, check if it's a 'real' root.
492 p = tuple(self.parents(n))
492 p = tuple(self.parents(n))
493 # If any of its parents are descendents, it's not a root.
493 # If any of its parents are descendents, it's not a root.
494 if (p[0] in descendents) or (p[1] in descendents):
494 if (p[0] in descendents) or (p[1] in descendents):
495 roots.pop(n)
495 roots.pop(n)
496 else:
496 else:
497 p = tuple(self.parents(n))
497 p = tuple(self.parents(n))
498 # A node is a descendent if either of its parents are
498 # A node is a descendent if either of its parents are
499 # descendents. (We seeded the dependents list with the roots
499 # descendents. (We seeded the dependents list with the roots
500 # up there, remember?)
500 # up there, remember?)
501 if (p[0] in descendents) or (p[1] in descendents):
501 if (p[0] in descendents) or (p[1] in descendents):
502 descendents[n] = 1
502 descendents[n] = 1
503 isdescendent = True
503 isdescendent = True
504 if isdescendent and ((ancestors is None) or (n in ancestors)):
504 if isdescendent and ((ancestors is None) or (n in ancestors)):
505 # Only include nodes that are both descendents and ancestors.
505 # Only include nodes that are both descendents and ancestors.
506 orderedout.append(n)
506 orderedout.append(n)
507 if (ancestors is not None) and (n in heads):
507 if (ancestors is not None) and (n in heads):
508 # We're trying to figure out which heads are reachable
508 # We're trying to figure out which heads are reachable
509 # from roots.
509 # from roots.
510 # Mark this head as having been reached
510 # Mark this head as having been reached
511 heads[n] = 1
511 heads[n] = 1
512 elif ancestors is None:
512 elif ancestors is None:
513 # Otherwise, we're trying to discover the heads.
513 # Otherwise, we're trying to discover the heads.
514 # Assume this is a head because if it isn't, the next step
514 # Assume this is a head because if it isn't, the next step
515 # will eventually remove it.
515 # will eventually remove it.
516 heads[n] = 1
516 heads[n] = 1
517 # But, obviously its parents aren't.
517 # But, obviously its parents aren't.
518 for p in self.parents(n):
518 for p in self.parents(n):
519 heads.pop(p, None)
519 heads.pop(p, None)
520 heads = [n for n in heads.iterkeys() if heads[n] != 0]
520 heads = [n for n in heads.iterkeys() if heads[n] != 0]
521 roots = roots.keys()
521 roots = roots.keys()
522 assert orderedout
522 assert orderedout
523 assert roots
523 assert roots
524 assert heads
524 assert heads
525 return (orderedout, roots, heads)
525 return (orderedout, roots, heads)
526
526
527 def heads(self, start=None):
527 def heads(self, start=None):
528 """return the list of all nodes that have no children
528 """return the list of all nodes that have no children
529
529
530 if start is specified, only heads that are descendants of
530 if start is specified, only heads that are descendants of
531 start will be returned
531 start will be returned
532
532
533 """
533 """
534 if start is None:
534 if start is None:
535 start = nullid
535 start = nullid
536 reachable = {start: 1}
536 reachable = {start: 1}
537 heads = {start: 1}
537 heads = {start: 1}
538 startrev = self.rev(start)
538 startrev = self.rev(start)
539
539
540 for r in xrange(startrev + 1, self.count()):
540 for r in xrange(startrev + 1, self.count()):
541 n = self.node(r)
541 n = self.node(r)
542 for pn in self.parents(n):
542 for pn in self.parents(n):
543 if pn in reachable:
543 if pn in reachable:
544 reachable[n] = 1
544 reachable[n] = 1
545 heads[n] = 1
545 heads[n] = 1
546 if pn in heads:
546 if pn in heads:
547 del heads[pn]
547 del heads[pn]
548 return heads.keys()
548 return heads.keys()
549
549
550 def children(self, node):
550 def children(self, node):
551 """find the children of a given node"""
551 """find the children of a given node"""
552 c = []
552 c = []
553 p = self.rev(node)
553 p = self.rev(node)
554 for r in range(p + 1, self.count()):
554 for r in range(p + 1, self.count()):
555 n = self.node(r)
555 n = self.node(r)
556 for pn in self.parents(n):
556 for pn in self.parents(n):
557 if pn == node:
557 if pn == node:
558 c.append(n)
558 c.append(n)
559 continue
559 continue
560 elif pn == nullid:
560 elif pn == nullid:
561 continue
561 continue
562 return c
562 return c
563
563
564 def lookup(self, id):
564 def lookup(self, id):
565 """locate a node based on revision number or subset of hex nodeid"""
565 """locate a node based on revision number or subset of hex nodeid"""
566 try:
566 try:
567 rev = int(id)
567 rev = int(id)
568 if str(rev) != id: raise ValueError
568 if str(rev) != id: raise ValueError
569 if rev < 0: rev = self.count() + rev
569 if rev < 0: rev = self.count() + rev
570 if rev < 0 or rev >= self.count(): raise ValueError
570 if rev < 0 or rev >= self.count(): raise ValueError
571 return self.node(rev)
571 return self.node(rev)
572 except (ValueError, OverflowError):
572 except (ValueError, OverflowError):
573 c = []
573 c = []
574 for n in self.nodemap:
574 for n in self.nodemap:
575 if hex(n).startswith(id):
575 if hex(n).startswith(id):
576 c.append(n)
576 c.append(n)
577 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
577 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
578 if len(c) < 1: raise RevlogError(_("No match found"))
578 if len(c) < 1: raise RevlogError(_("No match found"))
579 return c[0]
579 return c[0]
580
580
581 return None
581 return None
582
582
583 def diff(self, a, b):
583 def diff(self, a, b):
584 """return a delta between two revisions"""
584 """return a delta between two revisions"""
585 return mdiff.textdiff(a, b)
585 return mdiff.textdiff(a, b)
586
586
587 def patches(self, t, pl):
587 def patches(self, t, pl):
588 """apply a list of patches to a string"""
588 """apply a list of patches to a string"""
589 return mdiff.patches(t, pl)
589 return mdiff.patches(t, pl)
590
590
591 def chunk(self, rev, df=None, cachelen=4096):
591 def chunk(self, rev, df=None, cachelen=4096):
592 start, length = self.start(rev), self.length(rev)
592 start, length = self.start(rev), self.length(rev)
593 inline = self.inlinedata()
593 inline = self.inlinedata()
594 if inline:
594 if inline:
595 start += (rev + 1) * struct.calcsize(self.indexformat)
595 start += (rev + 1) * struct.calcsize(self.indexformat)
596 end = start + length
596 end = start + length
597 def loadcache(df):
597 def loadcache(df):
598 cache_length = max(cachelen, length) # 4k
598 cache_length = max(cachelen, length) # 4k
599 if not df:
599 if not df:
600 if inline:
600 if inline:
601 df = self.opener(self.indexfile)
601 df = self.opener(self.indexfile)
602 else:
602 else:
603 df = self.opener(self.datafile)
603 df = self.opener(self.datafile)
604 df.seek(start)
604 df.seek(start)
605 self.chunkcache = (start, df.read(cache_length))
605 self.chunkcache = (start, df.read(cache_length))
606
606
607 if not self.chunkcache:
607 if not self.chunkcache:
608 loadcache(df)
608 loadcache(df)
609
609
610 cache_start = self.chunkcache[0]
610 cache_start = self.chunkcache[0]
611 cache_end = cache_start + len(self.chunkcache[1])
611 cache_end = cache_start + len(self.chunkcache[1])
612 if start >= cache_start and end <= cache_end:
612 if start >= cache_start and end <= cache_end:
613 # it is cached
613 # it is cached
614 offset = start - cache_start
614 offset = start - cache_start
615 else:
615 else:
616 loadcache(df)
616 loadcache(df)
617 offset = 0
617 offset = 0
618
618
619 #def checkchunk():
619 #def checkchunk():
620 # df = self.opener(self.datafile)
620 # df = self.opener(self.datafile)
621 # df.seek(start)
621 # df.seek(start)
622 # return df.read(length)
622 # return df.read(length)
623 #assert s == checkchunk()
623 #assert s == checkchunk()
624 return decompress(self.chunkcache[1][offset:offset + length])
624 return decompress(self.chunkcache[1][offset:offset + length])
625
625
626 def delta(self, node):
626 def delta(self, node):
627 """return or calculate a delta between a node and its predecessor"""
627 """return or calculate a delta between a node and its predecessor"""
628 r = self.rev(node)
628 r = self.rev(node)
629 return self.revdiff(r - 1, r)
629 return self.revdiff(r - 1, r)
630
630
631 def revdiff(self, rev1, rev2):
631 def revdiff(self, rev1, rev2):
632 """return or calculate a delta between two revisions"""
632 """return or calculate a delta between two revisions"""
633 b1 = self.base(rev1)
633 b1 = self.base(rev1)
634 b2 = self.base(rev2)
634 b2 = self.base(rev2)
635 if b1 == b2 and rev1 + 1 == rev2:
635 if b1 == b2 and rev1 + 1 == rev2:
636 return self.chunk(rev2)
636 return self.chunk(rev2)
637 else:
637 else:
638 return self.diff(self.revision(self.node(rev1)),
638 return self.diff(self.revision(self.node(rev1)),
639 self.revision(self.node(rev2)))
639 self.revision(self.node(rev2)))
640
640
641 def revision(self, node):
641 def revision(self, node):
642 """return an uncompressed revision of a given"""
642 """return an uncompressed revision of a given"""
643 if node == nullid: return ""
643 if node == nullid: return ""
644 if self.cache and self.cache[0] == node: return self.cache[2]
644 if self.cache and self.cache[0] == node: return self.cache[2]
645
645
646 # look up what we need to read
646 # look up what we need to read
647 text = None
647 text = None
648 rev = self.rev(node)
648 rev = self.rev(node)
649 base = self.base(rev)
649 base = self.base(rev)
650
650
651 if self.inlinedata():
651 if self.inlinedata():
652 # we probably have the whole chunk cached
652 # we probably have the whole chunk cached
653 df = None
653 df = None
654 else:
654 else:
655 df = self.opener(self.datafile)
655 df = self.opener(self.datafile)
656
656
657 # do we have useful data cached?
657 # do we have useful data cached?
658 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
658 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
659 base = self.cache[1]
659 base = self.cache[1]
660 text = self.cache[2]
660 text = self.cache[2]
661 else:
661 else:
662 text = self.chunk(base, df=df)
662 text = self.chunk(base, df=df)
663
663
664 bins = []
664 bins = []
665 for r in xrange(base + 1, rev + 1):
665 for r in xrange(base + 1, rev + 1):
666 bins.append(self.chunk(r, df=df))
666 bins.append(self.chunk(r, df=df))
667
667
668 text = self.patches(text, bins)
668 text = self.patches(text, bins)
669
669
670 p1, p2 = self.parents(node)
670 p1, p2 = self.parents(node)
671 if node != hash(text, p1, p2):
671 if node != hash(text, p1, p2):
672 raise RevlogError(_("integrity check failed on %s:%d")
672 raise RevlogError(_("integrity check failed on %s:%d")
673 % (self.datafile, rev))
673 % (self.datafile, rev))
674
674
675 self.cache = (node, rev, text)
675 self.cache = (node, rev, text)
676 return text
676 return text
677
677
678 def checkinlinesize(self, tr, fp=None):
678 def checkinlinesize(self, tr, fp=None):
679 if not self.inlinedata():
679 if not self.inlinedata():
680 return
680 return
681 if not fp:
681 if not fp:
682 fp = self.opener(self.indexfile, 'r')
682 fp = self.opener(self.indexfile, 'r')
683 size = fp.tell()
683 size = fp.tell()
684 if size < 131072:
684 if size < 131072:
685 return
685 return
686 tr.add(self.datafile, 0)
686 tr.add(self.datafile, 0)
687 df = self.opener(self.datafile, 'w')
687 df = self.opener(self.datafile, 'w')
688 calc = struct.calcsize(self.indexformat)
688 calc = struct.calcsize(self.indexformat)
689 for r in xrange(self.count()):
689 for r in xrange(self.count()):
690 start = self.start(r) + (r + 1) * calc
690 start = self.start(r) + (r + 1) * calc
691 length = self.length(r)
691 length = self.length(r)
692 fp.seek(start)
692 fp.seek(start)
693 d = fp.read(length)
693 d = fp.read(length)
694 df.write(d)
694 df.write(d)
695 fp.close()
695 fp.close()
696 df.close()
696 df.close()
697 fp = self.opener(self.indexfile, 'w', atomictemp=True)
697 fp = self.opener(self.indexfile, 'w', atomictemp=True)
698 self.version &= ~(REVLOGNGINLINEDATA)
698 self.version &= ~(REVLOGNGINLINEDATA)
699 if self.count():
699 if self.count():
700 x = self.index[0]
700 x = self.index[0]
701 e = struct.pack(self.indexformat, *x)[4:]
701 e = struct.pack(self.indexformat, *x)[4:]
702 l = struct.pack(versionformat, self.version)
702 l = struct.pack(versionformat, self.version)
703 fp.write(l)
703 fp.write(l)
704 fp.write(e)
704 fp.write(e)
705
705
706 for i in xrange(1, self.count()):
706 for i in xrange(1, self.count()):
707 x = self.index[i]
707 x = self.index[i]
708 e = struct.pack(self.indexformat, *x)
708 e = struct.pack(self.indexformat, *x)
709 fp.write(e)
709 fp.write(e)
710
710
711 # if we don't call rename, the temp file will never replace the
711 # if we don't call rename, the temp file will never replace the
712 # real index
712 # real index
713 fp.rename()
713 fp.rename()
714 self.chunkcache = None
714 self.chunkcache = None
715
715
716 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
716 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
717 """add a revision to the log
717 """add a revision to the log
718
718
719 text - the revision data to add
719 text - the revision data to add
720 transaction - the transaction object used for rollback
720 transaction - the transaction object used for rollback
721 link - the linkrev data to add
721 link - the linkrev data to add
722 p1, p2 - the parent nodeids of the revision
722 p1, p2 - the parent nodeids of the revision
723 d - an optional precomputed delta
723 d - an optional precomputed delta
724 """
724 """
725 if text is None: text = ""
725 if text is None: text = ""
726 if p1 is None: p1 = self.tip()
726 if p1 is None: p1 = self.tip()
727 if p2 is None: p2 = nullid
727 if p2 is None: p2 = nullid
728
728
729 node = hash(text, p1, p2)
729 node = hash(text, p1, p2)
730
730
731 if node in self.nodemap:
731 if node in self.nodemap:
732 return node
732 return node
733
733
734 n = self.count()
734 n = self.count()
735 t = n - 1
735 t = n - 1
736
736
737 if n:
737 if n:
738 base = self.base(t)
738 base = self.base(t)
739 start = self.start(base)
739 start = self.start(base)
740 end = self.end(t)
740 end = self.end(t)
741 if not d:
741 if not d:
742 prev = self.revision(self.tip())
742 prev = self.revision(self.tip())
743 d = self.diff(prev, str(text))
743 d = self.diff(prev, str(text))
744 data = compress(d)
744 data = compress(d)
745 l = len(data[1]) + len(data[0])
745 l = len(data[1]) + len(data[0])
746 dist = end - start + l
746 dist = end - start + l
747
747
748 # full versions are inserted when the needed deltas
748 # full versions are inserted when the needed deltas
749 # become comparable to the uncompressed text
749 # become comparable to the uncompressed text
750 if not n or dist > len(text) * 2:
750 if not n or dist > len(text) * 2:
751 data = compress(text)
751 data = compress(text)
752 l = len(data[1]) + len(data[0])
752 l = len(data[1]) + len(data[0])
753 base = n
753 base = n
754 else:
754 else:
755 base = self.base(t)
755 base = self.base(t)
756
756
757 offset = 0
757 offset = 0
758 if t >= 0:
758 if t >= 0:
759 offset = self.end(t)
759 offset = self.end(t)
760
760
761 if self.version == 0:
761 if self.version == 0:
762 e = (offset, l, base, link, p1, p2, node)
762 e = (offset, l, base, link, p1, p2, node)
763 else:
763 else:
764 e = (self.offset_type(offset, 0), l, len(text),
764 e = (self.offset_type(offset, 0), l, len(text),
765 base, link, self.rev(p1), self.rev(p2), node)
765 base, link, self.rev(p1), self.rev(p2), node)
766
766
767 self.index.append(e)
767 self.index.append(e)
768 self.nodemap[node] = n
768 self.nodemap[node] = n
769 entry = struct.pack(self.indexformat, *e)
769 entry = struct.pack(self.indexformat, *e)
770
770
771 if not self.inlinedata():
771 if not self.inlinedata():
772 transaction.add(self.datafile, offset)
772 transaction.add(self.datafile, offset)
773 transaction.add(self.indexfile, n * len(entry))
773 transaction.add(self.indexfile, n * len(entry))
774 f = self.opener(self.datafile, "a")
774 f = self.opener(self.datafile, "a")
775 if data[0]:
775 if data[0]:
776 f.write(data[0])
776 f.write(data[0])
777 f.write(data[1])
777 f.write(data[1])
778 f = self.opener(self.indexfile, "a")
778 f = self.opener(self.indexfile, "a")
779 else:
779 else:
780 f = self.opener(self.indexfile, "a+")
780 f = self.opener(self.indexfile, "a+")
781 f.seek(0, 2)
781 transaction.add(self.indexfile, f.tell())
782 transaction.add(self.indexfile, f.tell())
782
783
783 if len(self.index) == 1 and self.version != 0:
784 if len(self.index) == 1 and self.version != 0:
784 l = struct.pack(versionformat, self.version)
785 l = struct.pack(versionformat, self.version)
785 f.write(l)
786 f.write(l)
786 entry = entry[4:]
787 entry = entry[4:]
787
788
788 f.write(entry)
789 f.write(entry)
789
790
790 if self.inlinedata():
791 if self.inlinedata():
791 f.write(data[0])
792 f.write(data[0])
792 f.write(data[1])
793 f.write(data[1])
793 self.checkinlinesize(transaction, f)
794 self.checkinlinesize(transaction, f)
794
795
795 self.cache = (node, n, text)
796 self.cache = (node, n, text)
796 return node
797 return node
797
798
798 def ancestor(self, a, b):
799 def ancestor(self, a, b):
799 """calculate the least common ancestor of nodes a and b"""
800 """calculate the least common ancestor of nodes a and b"""
800 # calculate the distance of every node from root
801 # calculate the distance of every node from root
801 dist = {nullid: 0}
802 dist = {nullid: 0}
802 for i in xrange(self.count()):
803 for i in xrange(self.count()):
803 n = self.node(i)
804 n = self.node(i)
804 p1, p2 = self.parents(n)
805 p1, p2 = self.parents(n)
805 dist[n] = max(dist[p1], dist[p2]) + 1
806 dist[n] = max(dist[p1], dist[p2]) + 1
806
807
807 # traverse ancestors in order of decreasing distance from root
808 # traverse ancestors in order of decreasing distance from root
808 def ancestors(node):
809 def ancestors(node):
809 # we store negative distances because heap returns smallest member
810 # we store negative distances because heap returns smallest member
810 h = [(-dist[node], node)]
811 h = [(-dist[node], node)]
811 seen = {}
812 seen = {}
812 while h:
813 while h:
813 d, n = heapq.heappop(h)
814 d, n = heapq.heappop(h)
814 if n not in seen:
815 if n not in seen:
815 seen[n] = 1
816 seen[n] = 1
816 yield (-d, n)
817 yield (-d, n)
817 for p in self.parents(n):
818 for p in self.parents(n):
818 heapq.heappush(h, (-dist[p], p))
819 heapq.heappush(h, (-dist[p], p))
819
820
820 def generations(node):
821 def generations(node):
821 sg, s = None, {}
822 sg, s = None, {}
822 for g,n in ancestors(node):
823 for g,n in ancestors(node):
823 if g != sg:
824 if g != sg:
824 if sg:
825 if sg:
825 yield sg, s
826 yield sg, s
826 sg, s = g, {n:1}
827 sg, s = g, {n:1}
827 else:
828 else:
828 s[n] = 1
829 s[n] = 1
829 yield sg, s
830 yield sg, s
830
831
831 x = generations(a)
832 x = generations(a)
832 y = generations(b)
833 y = generations(b)
833 gx = x.next()
834 gx = x.next()
834 gy = y.next()
835 gy = y.next()
835
836
836 # increment each ancestor list until it is closer to root than
837 # increment each ancestor list until it is closer to root than
837 # the other, or they match
838 # the other, or they match
838 while 1:
839 while 1:
839 #print "ancestor gen %s %s" % (gx[0], gy[0])
840 #print "ancestor gen %s %s" % (gx[0], gy[0])
840 if gx[0] == gy[0]:
841 if gx[0] == gy[0]:
841 # find the intersection
842 # find the intersection
842 i = [ n for n in gx[1] if n in gy[1] ]
843 i = [ n for n in gx[1] if n in gy[1] ]
843 if i:
844 if i:
844 return i[0]
845 return i[0]
845 else:
846 else:
846 #print "next"
847 #print "next"
847 gy = y.next()
848 gy = y.next()
848 gx = x.next()
849 gx = x.next()
849 elif gx[0] < gy[0]:
850 elif gx[0] < gy[0]:
850 #print "next y"
851 #print "next y"
851 gy = y.next()
852 gy = y.next()
852 else:
853 else:
853 #print "next x"
854 #print "next x"
854 gx = x.next()
855 gx = x.next()
855
856
856 def group(self, nodelist, lookup, infocollect=None):
857 def group(self, nodelist, lookup, infocollect=None):
857 """calculate a delta group
858 """calculate a delta group
858
859
859 Given a list of changeset revs, return a set of deltas and
860 Given a list of changeset revs, return a set of deltas and
860 metadata corresponding to nodes. the first delta is
861 metadata corresponding to nodes. the first delta is
861 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
862 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
862 have this parent as it has all history before these
863 have this parent as it has all history before these
863 changesets. parent is parent[0]
864 changesets. parent is parent[0]
864 """
865 """
865 revs = [self.rev(n) for n in nodelist]
866 revs = [self.rev(n) for n in nodelist]
866
867
867 # if we don't have any revisions touched by these changesets, bail
868 # if we don't have any revisions touched by these changesets, bail
868 if not revs:
869 if not revs:
869 yield changegroup.closechunk()
870 yield changegroup.closechunk()
870 return
871 return
871
872
872 # add the parent of the first rev
873 # add the parent of the first rev
873 p = self.parents(self.node(revs[0]))[0]
874 p = self.parents(self.node(revs[0]))[0]
874 revs.insert(0, self.rev(p))
875 revs.insert(0, self.rev(p))
875
876
876 # build deltas
877 # build deltas
877 for d in xrange(0, len(revs) - 1):
878 for d in xrange(0, len(revs) - 1):
878 a, b = revs[d], revs[d + 1]
879 a, b = revs[d], revs[d + 1]
879 nb = self.node(b)
880 nb = self.node(b)
880
881
881 if infocollect is not None:
882 if infocollect is not None:
882 infocollect(nb)
883 infocollect(nb)
883
884
884 d = self.revdiff(a, b)
885 d = self.revdiff(a, b)
885 p = self.parents(nb)
886 p = self.parents(nb)
886 meta = nb + p[0] + p[1] + lookup(nb)
887 meta = nb + p[0] + p[1] + lookup(nb)
887 yield changegroup.genchunk("%s%s" % (meta, d))
888 yield changegroup.genchunk("%s%s" % (meta, d))
888
889
889 yield changegroup.closechunk()
890 yield changegroup.closechunk()
890
891
891 def addgroup(self, revs, linkmapper, transaction, unique=0):
892 def addgroup(self, revs, linkmapper, transaction, unique=0):
892 """
893 """
893 add a delta group
894 add a delta group
894
895
895 given a set of deltas, add them to the revision log. the
896 given a set of deltas, add them to the revision log. the
896 first delta is against its parent, which should be in our
897 first delta is against its parent, which should be in our
897 log, the rest are against the previous delta.
898 log, the rest are against the previous delta.
898 """
899 """
899
900
900 #track the base of the current delta log
901 #track the base of the current delta log
901 r = self.count()
902 r = self.count()
902 t = r - 1
903 t = r - 1
903 node = None
904 node = None
904
905
905 base = prev = -1
906 base = prev = -1
906 start = end = measure = 0
907 start = end = measure = 0
907 if r:
908 if r:
908 end = self.end(t)
909 end = self.end(t)
909
910
910 ifh = self.opener(self.indexfile, "a+")
911 ifh = self.opener(self.indexfile, "a+")
912 ifh.seek(0, 2)
911 transaction.add(self.indexfile, ifh.tell())
913 transaction.add(self.indexfile, ifh.tell())
912 if self.inlinedata():
914 if self.inlinedata():
913 dfh = None
915 dfh = None
914 else:
916 else:
915 transaction.add(self.datafile, end)
917 transaction.add(self.datafile, end)
916 dfh = self.opener(self.datafile, "a")
918 dfh = self.opener(self.datafile, "a")
917
919
918 # loop through our set of deltas
920 # loop through our set of deltas
919 chain = None
921 chain = None
920 for chunk in revs:
922 for chunk in revs:
921 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
923 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
922 link = linkmapper(cs)
924 link = linkmapper(cs)
923 if node in self.nodemap:
925 if node in self.nodemap:
924 # this can happen if two branches make the same change
926 # this can happen if two branches make the same change
925 # if unique:
927 # if unique:
926 # raise RevlogError(_("already have %s") % hex(node[:4]))
928 # raise RevlogError(_("already have %s") % hex(node[:4]))
927 chain = node
929 chain = node
928 continue
930 continue
929 delta = chunk[80:]
931 delta = chunk[80:]
930
932
931 for p in (p1, p2):
933 for p in (p1, p2):
932 if not p in self.nodemap:
934 if not p in self.nodemap:
933 raise RevlogError(_("unknown parent %s") % short(p1))
935 raise RevlogError(_("unknown parent %s") % short(p1))
934
936
935 if not chain:
937 if not chain:
936 # retrieve the parent revision of the delta chain
938 # retrieve the parent revision of the delta chain
937 chain = p1
939 chain = p1
938 if not chain in self.nodemap:
940 if not chain in self.nodemap:
939 raise RevlogError(_("unknown base %s") % short(chain[:4]))
941 raise RevlogError(_("unknown base %s") % short(chain[:4]))
940
942
941 # full versions are inserted when the needed deltas become
943 # full versions are inserted when the needed deltas become
942 # comparable to the uncompressed text or when the previous
944 # comparable to the uncompressed text or when the previous
943 # version is not the one we have a delta against. We use
945 # version is not the one we have a delta against. We use
944 # the size of the previous full rev as a proxy for the
946 # the size of the previous full rev as a proxy for the
945 # current size.
947 # current size.
946
948
947 if chain == prev:
949 if chain == prev:
948 tempd = compress(delta)
950 tempd = compress(delta)
949 cdelta = tempd[0] + tempd[1]
951 cdelta = tempd[0] + tempd[1]
950
952
951 if chain != prev or (end - start + len(cdelta)) > measure * 2:
953 if chain != prev or (end - start + len(cdelta)) > measure * 2:
952 # flush our writes here so we can read it in revision
954 # flush our writes here so we can read it in revision
953 if dfh:
955 if dfh:
954 dfh.flush()
956 dfh.flush()
955 ifh.flush()
957 ifh.flush()
956 text = self.revision(chain)
958 text = self.revision(chain)
957 text = self.patches(text, [delta])
959 text = self.patches(text, [delta])
958 chk = self.addrevision(text, transaction, link, p1, p2)
960 chk = self.addrevision(text, transaction, link, p1, p2)
959 if chk != node:
961 if chk != node:
960 raise RevlogError(_("consistency error adding group"))
962 raise RevlogError(_("consistency error adding group"))
961 measure = len(text)
963 measure = len(text)
962 else:
964 else:
963 if self.version == 0:
965 if self.version == 0:
964 e = (end, len(cdelta), base, link, p1, p2, node)
966 e = (end, len(cdelta), base, link, p1, p2, node)
965 else:
967 else:
966 e = (self.offset_type(end, 0), len(cdelta), -1, base,
968 e = (self.offset_type(end, 0), len(cdelta), -1, base,
967 link, self.rev(p1), self.rev(p2), node)
969 link, self.rev(p1), self.rev(p2), node)
968 self.index.append(e)
970 self.index.append(e)
969 self.nodemap[node] = r
971 self.nodemap[node] = r
970 if self.inlinedata():
972 if self.inlinedata():
971 ifh.write(struct.pack(self.indexformat, *e))
973 ifh.write(struct.pack(self.indexformat, *e))
972 ifh.write(cdelta)
974 ifh.write(cdelta)
973 self.checkinlinesize(transaction, ifh)
975 self.checkinlinesize(transaction, ifh)
974 if not self.inlinedata():
976 if not self.inlinedata():
975 dfh = self.opener(self.datafile, "a")
977 dfh = self.opener(self.datafile, "a")
976 ifh = self.opener(self.indexfile, "a")
978 ifh = self.opener(self.indexfile, "a")
977 else:
979 else:
978 if not dfh:
980 if not dfh:
979 # addrevision switched from inline to conventional
981 # addrevision switched from inline to conventional
980 # reopen the index
982 # reopen the index
981 dfh = self.opener(self.datafile, "a")
983 dfh = self.opener(self.datafile, "a")
982 ifh = self.opener(self.indexfile, "a")
984 ifh = self.opener(self.indexfile, "a")
983 dfh.write(cdelta)
985 dfh.write(cdelta)
984 ifh.write(struct.pack(self.indexformat, *e))
986 ifh.write(struct.pack(self.indexformat, *e))
985
987
986 t, r, chain, prev = r, r + 1, node, node
988 t, r, chain, prev = r, r + 1, node, node
987 base = self.base(t)
989 base = self.base(t)
988 start = self.start(base)
990 start = self.start(base)
989 end = self.end(t)
991 end = self.end(t)
990
992
991 if node is None:
993 if node is None:
992 raise RevlogError(_("group to be added is empty"))
994 raise RevlogError(_("group to be added is empty"))
993 return node
995 return node
994
996
995 def strip(self, rev, minlink):
997 def strip(self, rev, minlink):
996 if self.count() == 0 or rev >= self.count():
998 if self.count() == 0 or rev >= self.count():
997 return
999 return
998
1000
999 if isinstance(self.index, lazyindex):
1001 if isinstance(self.index, lazyindex):
1000 self.loadindexmap()
1002 self.loadindexmap()
1001
1003
1002 # When stripping away a revision, we need to make sure it
1004 # When stripping away a revision, we need to make sure it
1003 # does not actually belong to an older changeset.
1005 # does not actually belong to an older changeset.
1004 # The minlink parameter defines the oldest revision
1006 # The minlink parameter defines the oldest revision
1005 # we're allowed to strip away.
1007 # we're allowed to strip away.
1006 while minlink > self.index[rev][-4]:
1008 while minlink > self.index[rev][-4]:
1007 rev += 1
1009 rev += 1
1008 if rev >= self.count():
1010 if rev >= self.count():
1009 return
1011 return
1010
1012
1011 # first truncate the files on disk
1013 # first truncate the files on disk
1012 end = self.start(rev)
1014 end = self.start(rev)
1013 if not self.inlinedata():
1015 if not self.inlinedata():
1014 df = self.opener(self.datafile, "a")
1016 df = self.opener(self.datafile, "a")
1015 df.truncate(end)
1017 df.truncate(end)
1016 end = rev * struct.calcsize(self.indexformat)
1018 end = rev * struct.calcsize(self.indexformat)
1017 else:
1019 else:
1018 end += rev * struct.calcsize(self.indexformat)
1020 end += rev * struct.calcsize(self.indexformat)
1019
1021
1020 indexf = self.opener(self.indexfile, "a")
1022 indexf = self.opener(self.indexfile, "a")
1021 indexf.truncate(end)
1023 indexf.truncate(end)
1022
1024
1023 # then reset internal state in memory to forget those revisions
1025 # then reset internal state in memory to forget those revisions
1024 self.cache = None
1026 self.cache = None
1025 self.chunkcache = None
1027 self.chunkcache = None
1026 for x in xrange(rev, self.count()):
1028 for x in xrange(rev, self.count()):
1027 del self.nodemap[self.node(x)]
1029 del self.nodemap[self.node(x)]
1028
1030
1029 del self.index[rev:]
1031 del self.index[rev:]
1030
1032
1031 def checksize(self):
1033 def checksize(self):
1032 expected = 0
1034 expected = 0
1033 if self.count():
1035 if self.count():
1034 expected = self.end(self.count() - 1)
1036 expected = self.end(self.count() - 1)
1035
1037
1036 try:
1038 try:
1037 f = self.opener(self.datafile)
1039 f = self.opener(self.datafile)
1038 f.seek(0, 2)
1040 f.seek(0, 2)
1039 actual = f.tell()
1041 actual = f.tell()
1040 dd = actual - expected
1042 dd = actual - expected
1041 except IOError, inst:
1043 except IOError, inst:
1042 if inst.errno != errno.ENOENT:
1044 if inst.errno != errno.ENOENT:
1043 raise
1045 raise
1044 dd = 0
1046 dd = 0
1045
1047
1046 try:
1048 try:
1047 f = self.opener(self.indexfile)
1049 f = self.opener(self.indexfile)
1048 f.seek(0, 2)
1050 f.seek(0, 2)
1049 actual = f.tell()
1051 actual = f.tell()
1050 s = struct.calcsize(self.indexformat)
1052 s = struct.calcsize(self.indexformat)
1051 i = actual / s
1053 i = actual / s
1052 di = actual - (i * s)
1054 di = actual - (i * s)
1053 if self.inlinedata():
1055 if self.inlinedata():
1054 databytes = 0
1056 databytes = 0
1055 for r in xrange(self.count()):
1057 for r in xrange(self.count()):
1056 databytes += self.length(r)
1058 databytes += self.length(r)
1057 dd = 0
1059 dd = 0
1058 di = actual - self.count() * s - databytes
1060 di = actual - self.count() * s - databytes
1059 except IOError, inst:
1061 except IOError, inst:
1060 if inst.errno != errno.ENOENT:
1062 if inst.errno != errno.ENOENT:
1061 raise
1063 raise
1062 di = 0
1064 di = 0
1063
1065
1064 return (dd, di)
1066 return (dd, di)
1065
1067
1066
1068
General Comments 0
You need to be logged in to leave comments. Login now