##// END OF EJS Templates
revlog: faster hash computation when one of the parent node is null...
Nicolas Dumazet -
r7883:c63c30ae default
parent child Browse files
Show More
@@ -1,1361 +1,1370 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 # import stuff from node for others to import from revlog
13 # import stuff from node for others to import from revlog
14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from i18n import _
15 from i18n import _
16 import changegroup, errno, ancestor, mdiff, parsers
16 import changegroup, errno, ancestor, mdiff, parsers
17 import struct, util, zlib, error
17 import struct, util, zlib, error
18
18
19 _pack = struct.pack
19 _pack = struct.pack
20 _unpack = struct.unpack
20 _unpack = struct.unpack
21 _compress = zlib.compress
21 _compress = zlib.compress
22 _decompress = zlib.decompress
22 _decompress = zlib.decompress
23 _sha = util.sha1
23 _sha = util.sha1
24
24
25 # revlog flags
25 # revlog flags
26 REVLOGV0 = 0
26 REVLOGV0 = 0
27 REVLOGNG = 1
27 REVLOGNG = 1
28 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32
32
33 RevlogError = error.RevlogError
33 RevlogError = error.RevlogError
34 LookupError = error.LookupError
34 LookupError = error.LookupError
35
35
36 def getoffset(q):
36 def getoffset(q):
37 return int(q >> 16)
37 return int(q >> 16)
38
38
39 def gettype(q):
39 def gettype(q):
40 return int(q & 0xFFFF)
40 return int(q & 0xFFFF)
41
41
42 def offset_type(offset, type):
42 def offset_type(offset, type):
43 return long(long(offset) << 16 | type)
43 return long(long(offset) << 16 | type)
44
44
45 nullhash = _sha(nullid)
46
45 def hash(text, p1, p2):
47 def hash(text, p1, p2):
46 """generate a hash from the given text and its parent hashes
48 """generate a hash from the given text and its parent hashes
47
49
48 This hash combines both the current file contents and its history
50 This hash combines both the current file contents and its history
49 in a manner that makes it easy to distinguish nodes with the same
51 in a manner that makes it easy to distinguish nodes with the same
50 content in the revision graph.
52 content in the revision graph.
51 """
53 """
52 l = [p1, p2]
54 # As of now, if one of the parent node is null, p2 is null
53 l.sort()
55 if p2 == nullid:
54 s = _sha(l[0])
56 # deep copy of a hash is faster than creating one
55 s.update(l[1])
57 s = nullhash.copy()
58 s.update(p1)
59 else:
60 # none of the parent nodes are nullid
61 l = [p1, p2]
62 l.sort()
63 s = _sha(l[0])
64 s.update(l[1])
56 s.update(text)
65 s.update(text)
57 return s.digest()
66 return s.digest()
58
67
59 def compress(text):
68 def compress(text):
60 """ generate a possibly-compressed representation of text """
69 """ generate a possibly-compressed representation of text """
61 if not text:
70 if not text:
62 return ("", text)
71 return ("", text)
63 l = len(text)
72 l = len(text)
64 bin = None
73 bin = None
65 if l < 44:
74 if l < 44:
66 pass
75 pass
67 elif l > 1000000:
76 elif l > 1000000:
68 # zlib makes an internal copy, thus doubling memory usage for
77 # zlib makes an internal copy, thus doubling memory usage for
69 # large files, so lets do this in pieces
78 # large files, so lets do this in pieces
70 z = zlib.compressobj()
79 z = zlib.compressobj()
71 p = []
80 p = []
72 pos = 0
81 pos = 0
73 while pos < l:
82 while pos < l:
74 pos2 = pos + 2**20
83 pos2 = pos + 2**20
75 p.append(z.compress(text[pos:pos2]))
84 p.append(z.compress(text[pos:pos2]))
76 pos = pos2
85 pos = pos2
77 p.append(z.flush())
86 p.append(z.flush())
78 if sum(map(len, p)) < l:
87 if sum(map(len, p)) < l:
79 bin = "".join(p)
88 bin = "".join(p)
80 else:
89 else:
81 bin = _compress(text)
90 bin = _compress(text)
82 if bin is None or len(bin) > l:
91 if bin is None or len(bin) > l:
83 if text[0] == '\0':
92 if text[0] == '\0':
84 return ("", text)
93 return ("", text)
85 return ('u', text)
94 return ('u', text)
86 return ("", bin)
95 return ("", bin)
87
96
88 def decompress(bin):
97 def decompress(bin):
89 """ decompress the given input """
98 """ decompress the given input """
90 if not bin:
99 if not bin:
91 return bin
100 return bin
92 t = bin[0]
101 t = bin[0]
93 if t == '\0':
102 if t == '\0':
94 return bin
103 return bin
95 if t == 'x':
104 if t == 'x':
96 return _decompress(bin)
105 return _decompress(bin)
97 if t == 'u':
106 if t == 'u':
98 return bin[1:]
107 return bin[1:]
99 raise RevlogError(_("unknown compression type %r") % t)
108 raise RevlogError(_("unknown compression type %r") % t)
100
109
101 class lazyparser(object):
110 class lazyparser(object):
102 """
111 """
103 this class avoids the need to parse the entirety of large indices
112 this class avoids the need to parse the entirety of large indices
104 """
113 """
105
114
106 # lazyparser is not safe to use on windows if win32 extensions not
115 # lazyparser is not safe to use on windows if win32 extensions not
107 # available. it keeps file handle open, which make it not possible
116 # available. it keeps file handle open, which make it not possible
108 # to break hardlinks on local cloned repos.
117 # to break hardlinks on local cloned repos.
109
118
110 def __init__(self, dataf, size):
119 def __init__(self, dataf, size):
111 self.dataf = dataf
120 self.dataf = dataf
112 self.s = struct.calcsize(indexformatng)
121 self.s = struct.calcsize(indexformatng)
113 self.datasize = size
122 self.datasize = size
114 self.l = size/self.s
123 self.l = size/self.s
115 self.index = [None] * self.l
124 self.index = [None] * self.l
116 self.map = {nullid: nullrev}
125 self.map = {nullid: nullrev}
117 self.allmap = 0
126 self.allmap = 0
118 self.all = 0
127 self.all = 0
119 self.mapfind_count = 0
128 self.mapfind_count = 0
120
129
121 def loadmap(self):
130 def loadmap(self):
122 """
131 """
123 during a commit, we need to make sure the rev being added is
132 during a commit, we need to make sure the rev being added is
124 not a duplicate. This requires loading the entire index,
133 not a duplicate. This requires loading the entire index,
125 which is fairly slow. loadmap can load up just the node map,
134 which is fairly slow. loadmap can load up just the node map,
126 which takes much less time.
135 which takes much less time.
127 """
136 """
128 if self.allmap:
137 if self.allmap:
129 return
138 return
130 end = self.datasize
139 end = self.datasize
131 self.allmap = 1
140 self.allmap = 1
132 cur = 0
141 cur = 0
133 count = 0
142 count = 0
134 blocksize = self.s * 256
143 blocksize = self.s * 256
135 self.dataf.seek(0)
144 self.dataf.seek(0)
136 while cur < end:
145 while cur < end:
137 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
138 off = 0
147 off = 0
139 for x in xrange(256):
148 for x in xrange(256):
140 n = data[off + ngshaoffset:off + ngshaoffset + 20]
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
141 self.map[n] = count
150 self.map[n] = count
142 count += 1
151 count += 1
143 if count >= self.l:
152 if count >= self.l:
144 break
153 break
145 off += self.s
154 off += self.s
146 cur += blocksize
155 cur += blocksize
147
156
148 def loadblock(self, blockstart, blocksize, data=None):
157 def loadblock(self, blockstart, blocksize, data=None):
149 if self.all:
158 if self.all:
150 return
159 return
151 if data is None:
160 if data is None:
152 self.dataf.seek(blockstart)
161 self.dataf.seek(blockstart)
153 if blockstart + blocksize > self.datasize:
162 if blockstart + blocksize > self.datasize:
154 # the revlog may have grown since we've started running,
163 # the revlog may have grown since we've started running,
155 # but we don't have space in self.index for more entries.
164 # but we don't have space in self.index for more entries.
156 # limit blocksize so that we don't get too much data.
165 # limit blocksize so that we don't get too much data.
157 blocksize = max(self.datasize - blockstart, 0)
166 blocksize = max(self.datasize - blockstart, 0)
158 data = self.dataf.read(blocksize)
167 data = self.dataf.read(blocksize)
159 lend = len(data) / self.s
168 lend = len(data) / self.s
160 i = blockstart / self.s
169 i = blockstart / self.s
161 off = 0
170 off = 0
162 # lazyindex supports __delitem__
171 # lazyindex supports __delitem__
163 if lend > len(self.index) - i:
172 if lend > len(self.index) - i:
164 lend = len(self.index) - i
173 lend = len(self.index) - i
165 for x in xrange(lend):
174 for x in xrange(lend):
166 if self.index[i + x] == None:
175 if self.index[i + x] == None:
167 b = data[off : off + self.s]
176 b = data[off : off + self.s]
168 self.index[i + x] = b
177 self.index[i + x] = b
169 n = b[ngshaoffset:ngshaoffset + 20]
178 n = b[ngshaoffset:ngshaoffset + 20]
170 self.map[n] = i + x
179 self.map[n] = i + x
171 off += self.s
180 off += self.s
172
181
173 def findnode(self, node):
182 def findnode(self, node):
174 """search backwards through the index file for a specific node"""
183 """search backwards through the index file for a specific node"""
175 if self.allmap:
184 if self.allmap:
176 return None
185 return None
177
186
178 # hg log will cause many many searches for the manifest
187 # hg log will cause many many searches for the manifest
179 # nodes. After we get called a few times, just load the whole
188 # nodes. After we get called a few times, just load the whole
180 # thing.
189 # thing.
181 if self.mapfind_count > 8:
190 if self.mapfind_count > 8:
182 self.loadmap()
191 self.loadmap()
183 if node in self.map:
192 if node in self.map:
184 return node
193 return node
185 return None
194 return None
186 self.mapfind_count += 1
195 self.mapfind_count += 1
187 last = self.l - 1
196 last = self.l - 1
188 while self.index[last] != None:
197 while self.index[last] != None:
189 if last == 0:
198 if last == 0:
190 self.all = 1
199 self.all = 1
191 self.allmap = 1
200 self.allmap = 1
192 return None
201 return None
193 last -= 1
202 last -= 1
194 end = (last + 1) * self.s
203 end = (last + 1) * self.s
195 blocksize = self.s * 256
204 blocksize = self.s * 256
196 while end >= 0:
205 while end >= 0:
197 start = max(end - blocksize, 0)
206 start = max(end - blocksize, 0)
198 self.dataf.seek(start)
207 self.dataf.seek(start)
199 data = self.dataf.read(end - start)
208 data = self.dataf.read(end - start)
200 findend = end - start
209 findend = end - start
201 while True:
210 while True:
202 # we're searching backwards, so we have to make sure
211 # we're searching backwards, so we have to make sure
203 # we don't find a changeset where this node is a parent
212 # we don't find a changeset where this node is a parent
204 off = data.find(node, 0, findend)
213 off = data.find(node, 0, findend)
205 findend = off
214 findend = off
206 if off >= 0:
215 if off >= 0:
207 i = off / self.s
216 i = off / self.s
208 off = i * self.s
217 off = i * self.s
209 n = data[off + ngshaoffset:off + ngshaoffset + 20]
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
210 if n == node:
219 if n == node:
211 self.map[n] = i + start / self.s
220 self.map[n] = i + start / self.s
212 return node
221 return node
213 else:
222 else:
214 break
223 break
215 end -= blocksize
224 end -= blocksize
216 return None
225 return None
217
226
218 def loadindex(self, i=None, end=None):
227 def loadindex(self, i=None, end=None):
219 if self.all:
228 if self.all:
220 return
229 return
221 all = False
230 all = False
222 if i == None:
231 if i == None:
223 blockstart = 0
232 blockstart = 0
224 blocksize = (65536 / self.s) * self.s
233 blocksize = (65536 / self.s) * self.s
225 end = self.datasize
234 end = self.datasize
226 all = True
235 all = True
227 else:
236 else:
228 if end:
237 if end:
229 blockstart = i * self.s
238 blockstart = i * self.s
230 end = end * self.s
239 end = end * self.s
231 blocksize = end - blockstart
240 blocksize = end - blockstart
232 else:
241 else:
233 blockstart = (i & ~1023) * self.s
242 blockstart = (i & ~1023) * self.s
234 blocksize = self.s * 1024
243 blocksize = self.s * 1024
235 end = blockstart + blocksize
244 end = blockstart + blocksize
236 while blockstart < end:
245 while blockstart < end:
237 self.loadblock(blockstart, blocksize)
246 self.loadblock(blockstart, blocksize)
238 blockstart += blocksize
247 blockstart += blocksize
239 if all:
248 if all:
240 self.all = True
249 self.all = True
241
250
242 class lazyindex(object):
251 class lazyindex(object):
243 """a lazy version of the index array"""
252 """a lazy version of the index array"""
244 def __init__(self, parser):
253 def __init__(self, parser):
245 self.p = parser
254 self.p = parser
246 def __len__(self):
255 def __len__(self):
247 return len(self.p.index)
256 return len(self.p.index)
248 def load(self, pos):
257 def load(self, pos):
249 if pos < 0:
258 if pos < 0:
250 pos += len(self.p.index)
259 pos += len(self.p.index)
251 self.p.loadindex(pos)
260 self.p.loadindex(pos)
252 return self.p.index[pos]
261 return self.p.index[pos]
253 def __getitem__(self, pos):
262 def __getitem__(self, pos):
254 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
255 def __setitem__(self, pos, item):
264 def __setitem__(self, pos, item):
256 self.p.index[pos] = _pack(indexformatng, *item)
265 self.p.index[pos] = _pack(indexformatng, *item)
257 def __delitem__(self, pos):
266 def __delitem__(self, pos):
258 del self.p.index[pos]
267 del self.p.index[pos]
259 def insert(self, pos, e):
268 def insert(self, pos, e):
260 self.p.index.insert(pos, _pack(indexformatng, *e))
269 self.p.index.insert(pos, _pack(indexformatng, *e))
261 def append(self, e):
270 def append(self, e):
262 self.p.index.append(_pack(indexformatng, *e))
271 self.p.index.append(_pack(indexformatng, *e))
263
272
264 class lazymap(object):
273 class lazymap(object):
265 """a lazy version of the node map"""
274 """a lazy version of the node map"""
266 def __init__(self, parser):
275 def __init__(self, parser):
267 self.p = parser
276 self.p = parser
268 def load(self, key):
277 def load(self, key):
269 n = self.p.findnode(key)
278 n = self.p.findnode(key)
270 if n == None:
279 if n == None:
271 raise KeyError(key)
280 raise KeyError(key)
272 def __contains__(self, key):
281 def __contains__(self, key):
273 if key in self.p.map:
282 if key in self.p.map:
274 return True
283 return True
275 self.p.loadmap()
284 self.p.loadmap()
276 return key in self.p.map
285 return key in self.p.map
277 def __iter__(self):
286 def __iter__(self):
278 yield nullid
287 yield nullid
279 for i in xrange(self.p.l):
288 for i in xrange(self.p.l):
280 ret = self.p.index[i]
289 ret = self.p.index[i]
281 if not ret:
290 if not ret:
282 self.p.loadindex(i)
291 self.p.loadindex(i)
283 ret = self.p.index[i]
292 ret = self.p.index[i]
284 if isinstance(ret, str):
293 if isinstance(ret, str):
285 ret = _unpack(indexformatng, ret)
294 ret = _unpack(indexformatng, ret)
286 yield ret[7]
295 yield ret[7]
287 def __getitem__(self, key):
296 def __getitem__(self, key):
288 try:
297 try:
289 return self.p.map[key]
298 return self.p.map[key]
290 except KeyError:
299 except KeyError:
291 try:
300 try:
292 self.load(key)
301 self.load(key)
293 return self.p.map[key]
302 return self.p.map[key]
294 except KeyError:
303 except KeyError:
295 raise KeyError("node " + hex(key))
304 raise KeyError("node " + hex(key))
296 def __setitem__(self, key, val):
305 def __setitem__(self, key, val):
297 self.p.map[key] = val
306 self.p.map[key] = val
298 def __delitem__(self, key):
307 def __delitem__(self, key):
299 del self.p.map[key]
308 del self.p.map[key]
300
309
301 indexformatv0 = ">4l20s20s20s"
310 indexformatv0 = ">4l20s20s20s"
302 v0shaoffset = 56
311 v0shaoffset = 56
303
312
304 class revlogoldio(object):
313 class revlogoldio(object):
305 def __init__(self):
314 def __init__(self):
306 self.size = struct.calcsize(indexformatv0)
315 self.size = struct.calcsize(indexformatv0)
307
316
308 def parseindex(self, fp, inline):
317 def parseindex(self, fp, inline):
309 s = self.size
318 s = self.size
310 index = []
319 index = []
311 nodemap = {nullid: nullrev}
320 nodemap = {nullid: nullrev}
312 n = off = 0
321 n = off = 0
313 data = fp.read()
322 data = fp.read()
314 l = len(data)
323 l = len(data)
315 while off + s <= l:
324 while off + s <= l:
316 cur = data[off:off + s]
325 cur = data[off:off + s]
317 off += s
326 off += s
318 e = _unpack(indexformatv0, cur)
327 e = _unpack(indexformatv0, cur)
319 # transform to revlogv1 format
328 # transform to revlogv1 format
320 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
321 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
322 index.append(e2)
331 index.append(e2)
323 nodemap[e[6]] = n
332 nodemap[e[6]] = n
324 n += 1
333 n += 1
325
334
326 return index, nodemap, None
335 return index, nodemap, None
327
336
328 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
329 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
330 node(entry[5]), node(entry[6]), entry[7])
339 node(entry[5]), node(entry[6]), entry[7])
331 return _pack(indexformatv0, *e2)
340 return _pack(indexformatv0, *e2)
332
341
333 # index ng:
342 # index ng:
334 # 6 bytes offset
343 # 6 bytes offset
335 # 2 bytes flags
344 # 2 bytes flags
336 # 4 bytes compressed length
345 # 4 bytes compressed length
337 # 4 bytes uncompressed length
346 # 4 bytes uncompressed length
338 # 4 bytes: base rev
347 # 4 bytes: base rev
339 # 4 bytes link rev
348 # 4 bytes link rev
340 # 4 bytes parent 1 rev
349 # 4 bytes parent 1 rev
341 # 4 bytes parent 2 rev
350 # 4 bytes parent 2 rev
342 # 32 bytes: nodeid
351 # 32 bytes: nodeid
343 indexformatng = ">Qiiiiii20s12x"
352 indexformatng = ">Qiiiiii20s12x"
344 ngshaoffset = 32
353 ngshaoffset = 32
345 versionformat = ">I"
354 versionformat = ">I"
346
355
347 class revlogio(object):
356 class revlogio(object):
348 def __init__(self):
357 def __init__(self):
349 self.size = struct.calcsize(indexformatng)
358 self.size = struct.calcsize(indexformatng)
350
359
351 def parseindex(self, fp, inline):
360 def parseindex(self, fp, inline):
352 try:
361 try:
353 size = util.fstat(fp).st_size
362 size = util.fstat(fp).st_size
354 except AttributeError:
363 except AttributeError:
355 size = 0
364 size = 0
356
365
357 if util.openhardlinks() and not inline and size > 1000000:
366 if util.openhardlinks() and not inline and size > 1000000:
358 # big index, let's parse it on demand
367 # big index, let's parse it on demand
359 parser = lazyparser(fp, size)
368 parser = lazyparser(fp, size)
360 index = lazyindex(parser)
369 index = lazyindex(parser)
361 nodemap = lazymap(parser)
370 nodemap = lazymap(parser)
362 e = list(index[0])
371 e = list(index[0])
363 type = gettype(e[0])
372 type = gettype(e[0])
364 e[0] = offset_type(0, type)
373 e[0] = offset_type(0, type)
365 index[0] = e
374 index[0] = e
366 return index, nodemap, None
375 return index, nodemap, None
367
376
368 data = fp.read()
377 data = fp.read()
369 # call the C implementation to parse the index data
378 # call the C implementation to parse the index data
370 index, nodemap, cache = parsers.parse_index(data, inline)
379 index, nodemap, cache = parsers.parse_index(data, inline)
371 return index, nodemap, cache
380 return index, nodemap, cache
372
381
373 def packentry(self, entry, node, version, rev):
382 def packentry(self, entry, node, version, rev):
374 p = _pack(indexformatng, *entry)
383 p = _pack(indexformatng, *entry)
375 if rev == 0:
384 if rev == 0:
376 p = _pack(versionformat, version) + p[4:]
385 p = _pack(versionformat, version) + p[4:]
377 return p
386 return p
378
387
379 class revlog(object):
388 class revlog(object):
380 """
389 """
381 the underlying revision storage object
390 the underlying revision storage object
382
391
383 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
384
393
385 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
386 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
387 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
388 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
389 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
390 data.
399 data.
391
400
392 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
393 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
394 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
395 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
396 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
397 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
398
407
399 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
400 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
401 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
402 for locking while reading.
411 for locking while reading.
403 """
412 """
404 def __init__(self, opener, indexfile):
413 def __init__(self, opener, indexfile):
405 """
414 """
406 create a revlog object
415 create a revlog object
407
416
408 opener is a function that abstracts the file opening operation
417 opener is a function that abstracts the file opening operation
409 and can be used to implement COW semantics or the like.
418 and can be used to implement COW semantics or the like.
410 """
419 """
411 self.indexfile = indexfile
420 self.indexfile = indexfile
412 self.datafile = indexfile[:-2] + ".d"
421 self.datafile = indexfile[:-2] + ".d"
413 self.opener = opener
422 self.opener = opener
414 self._cache = None
423 self._cache = None
415 self._chunkcache = None
424 self._chunkcache = None
416 self.nodemap = {nullid: nullrev}
425 self.nodemap = {nullid: nullrev}
417 self.index = []
426 self.index = []
418
427
419 v = REVLOG_DEFAULT_VERSION
428 v = REVLOG_DEFAULT_VERSION
420 if hasattr(opener, "defversion"):
429 if hasattr(opener, "defversion"):
421 v = opener.defversion
430 v = opener.defversion
422 if v & REVLOGNG:
431 if v & REVLOGNG:
423 v |= REVLOGNGINLINEDATA
432 v |= REVLOGNGINLINEDATA
424
433
425 i = ""
434 i = ""
426 try:
435 try:
427 f = self.opener(self.indexfile)
436 f = self.opener(self.indexfile)
428 i = f.read(4)
437 i = f.read(4)
429 f.seek(0)
438 f.seek(0)
430 if len(i) > 0:
439 if len(i) > 0:
431 v = struct.unpack(versionformat, i)[0]
440 v = struct.unpack(versionformat, i)[0]
432 except IOError, inst:
441 except IOError, inst:
433 if inst.errno != errno.ENOENT:
442 if inst.errno != errno.ENOENT:
434 raise
443 raise
435
444
436 self.version = v
445 self.version = v
437 self._inline = v & REVLOGNGINLINEDATA
446 self._inline = v & REVLOGNGINLINEDATA
438 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
439 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
440 if fmt == REVLOGV0 and flags:
449 if fmt == REVLOGV0 and flags:
441 raise RevlogError(_("index %s unknown flags %#04x for format v0")
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
442 % (self.indexfile, flags >> 16))
451 % (self.indexfile, flags >> 16))
443 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
444 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
445 % (self.indexfile, flags >> 16))
454 % (self.indexfile, flags >> 16))
446 elif fmt > REVLOGNG:
455 elif fmt > REVLOGNG:
447 raise RevlogError(_("index %s unknown format %d")
456 raise RevlogError(_("index %s unknown format %d")
448 % (self.indexfile, fmt))
457 % (self.indexfile, fmt))
449
458
450 self._io = revlogio()
459 self._io = revlogio()
451 if self.version == REVLOGV0:
460 if self.version == REVLOGV0:
452 self._io = revlogoldio()
461 self._io = revlogoldio()
453 if i:
462 if i:
454 d = self._io.parseindex(f, self._inline)
463 d = self._io.parseindex(f, self._inline)
455 self.index, self.nodemap, self._chunkcache = d
464 self.index, self.nodemap, self._chunkcache = d
456
465
457 # add the magic null revision at -1 (if it hasn't been done already)
466 # add the magic null revision at -1 (if it hasn't been done already)
458 if (self.index == [] or isinstance(self.index, lazyindex) or
467 if (self.index == [] or isinstance(self.index, lazyindex) or
459 self.index[-1][7] != nullid) :
468 self.index[-1][7] != nullid) :
460 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
461
470
462 def _loadindex(self, start, end):
471 def _loadindex(self, start, end):
463 """load a block of indexes all at once from the lazy parser"""
472 """load a block of indexes all at once from the lazy parser"""
464 if isinstance(self.index, lazyindex):
473 if isinstance(self.index, lazyindex):
465 self.index.p.loadindex(start, end)
474 self.index.p.loadindex(start, end)
466
475
467 def _loadindexmap(self):
476 def _loadindexmap(self):
468 """loads both the map and the index from the lazy parser"""
477 """loads both the map and the index from the lazy parser"""
469 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
470 p = self.index.p
479 p = self.index.p
471 p.loadindex()
480 p.loadindex()
472 self.nodemap = p.map
481 self.nodemap = p.map
473
482
474 def _loadmap(self):
483 def _loadmap(self):
475 """loads the map from the lazy parser"""
484 """loads the map from the lazy parser"""
476 if isinstance(self.nodemap, lazymap):
485 if isinstance(self.nodemap, lazymap):
477 self.nodemap.p.loadmap()
486 self.nodemap.p.loadmap()
478 self.nodemap = self.nodemap.p.map
487 self.nodemap = self.nodemap.p.map
479
488
480 def tip(self):
489 def tip(self):
481 return self.node(len(self.index) - 2)
490 return self.node(len(self.index) - 2)
482 def __len__(self):
491 def __len__(self):
483 return len(self.index) - 1
492 return len(self.index) - 1
484 def __iter__(self):
493 def __iter__(self):
485 for i in xrange(len(self)):
494 for i in xrange(len(self)):
486 yield i
495 yield i
487 def rev(self, node):
496 def rev(self, node):
488 try:
497 try:
489 return self.nodemap[node]
498 return self.nodemap[node]
490 except KeyError:
499 except KeyError:
491 raise LookupError(node, self.indexfile, _('no node'))
500 raise LookupError(node, self.indexfile, _('no node'))
492 def node(self, rev):
501 def node(self, rev):
493 return self.index[rev][7]
502 return self.index[rev][7]
494 def linkrev(self, rev):
503 def linkrev(self, rev):
495 return self.index[rev][4]
504 return self.index[rev][4]
496 def parents(self, node):
505 def parents(self, node):
497 i = self.index
506 i = self.index
498 d = i[self.rev(node)]
507 d = i[self.rev(node)]
499 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
508 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
500 def parentrevs(self, rev):
509 def parentrevs(self, rev):
501 return self.index[rev][5:7]
510 return self.index[rev][5:7]
502 def start(self, rev):
511 def start(self, rev):
503 return int(self.index[rev][0] >> 16)
512 return int(self.index[rev][0] >> 16)
504 def end(self, rev):
513 def end(self, rev):
505 return self.start(rev) + self.length(rev)
514 return self.start(rev) + self.length(rev)
506 def length(self, rev):
515 def length(self, rev):
507 return self.index[rev][1]
516 return self.index[rev][1]
508 def base(self, rev):
517 def base(self, rev):
509 return self.index[rev][3]
518 return self.index[rev][3]
510
519
511 def size(self, rev):
520 def size(self, rev):
512 """return the length of the uncompressed text for a given revision"""
521 """return the length of the uncompressed text for a given revision"""
513 l = self.index[rev][2]
522 l = self.index[rev][2]
514 if l >= 0:
523 if l >= 0:
515 return l
524 return l
516
525
517 t = self.revision(self.node(rev))
526 t = self.revision(self.node(rev))
518 return len(t)
527 return len(t)
519
528
520 # alternate implementation, The advantage to this code is it
529 # alternate implementation, The advantage to this code is it
521 # will be faster for a single revision. But, the results are not
530 # will be faster for a single revision. But, the results are not
522 # cached, so finding the size of every revision will be slower.
531 # cached, so finding the size of every revision will be slower.
523 """
532 """
524 if self.cache and self.cache[1] == rev:
533 if self.cache and self.cache[1] == rev:
525 return len(self.cache[2])
534 return len(self.cache[2])
526
535
527 base = self.base(rev)
536 base = self.base(rev)
528 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
529 base = self.cache[1]
538 base = self.cache[1]
530 text = self.cache[2]
539 text = self.cache[2]
531 else:
540 else:
532 text = self.revision(self.node(base))
541 text = self.revision(self.node(base))
533
542
534 l = len(text)
543 l = len(text)
535 for x in xrange(base + 1, rev + 1):
544 for x in xrange(base + 1, rev + 1):
536 l = mdiff.patchedsize(l, self.chunk(x))
545 l = mdiff.patchedsize(l, self.chunk(x))
537 return l
546 return l
538 """
547 """
539
548
540 def reachable(self, node, stop=None):
549 def reachable(self, node, stop=None):
541 """return a hash of all nodes ancestral to a given node, including
550 """return a hash of all nodes ancestral to a given node, including
542 the node itself, stopping when stop is matched"""
551 the node itself, stopping when stop is matched"""
543 reachable = {}
552 reachable = {}
544 visit = [node]
553 visit = [node]
545 reachable[node] = 1
554 reachable[node] = 1
546 if stop:
555 if stop:
547 stopn = self.rev(stop)
556 stopn = self.rev(stop)
548 else:
557 else:
549 stopn = 0
558 stopn = 0
550 while visit:
559 while visit:
551 n = visit.pop(0)
560 n = visit.pop(0)
552 if n == stop:
561 if n == stop:
553 continue
562 continue
554 if n == nullid:
563 if n == nullid:
555 continue
564 continue
556 for p in self.parents(n):
565 for p in self.parents(n):
557 if self.rev(p) < stopn:
566 if self.rev(p) < stopn:
558 continue
567 continue
559 if p not in reachable:
568 if p not in reachable:
560 reachable[p] = 1
569 reachable[p] = 1
561 visit.append(p)
570 visit.append(p)
562 return reachable
571 return reachable
563
572
564 def ancestors(self, *revs):
573 def ancestors(self, *revs):
565 'Generate the ancestors of revs using a breadth-first visit'
574 'Generate the ancestors of revs using a breadth-first visit'
566 visit = list(revs)
575 visit = list(revs)
567 seen = util.set([nullrev])
576 seen = util.set([nullrev])
568 while visit:
577 while visit:
569 for parent in self.parentrevs(visit.pop(0)):
578 for parent in self.parentrevs(visit.pop(0)):
570 if parent not in seen:
579 if parent not in seen:
571 visit.append(parent)
580 visit.append(parent)
572 seen.add(parent)
581 seen.add(parent)
573 yield parent
582 yield parent
574
583
575 def descendants(self, *revs):
584 def descendants(self, *revs):
576 'Generate the descendants of revs in topological order'
585 'Generate the descendants of revs in topological order'
577 seen = util.set(revs)
586 seen = util.set(revs)
578 for i in xrange(min(revs) + 1, len(self)):
587 for i in xrange(min(revs) + 1, len(self)):
579 for x in self.parentrevs(i):
588 for x in self.parentrevs(i):
580 if x != nullrev and x in seen:
589 if x != nullrev and x in seen:
581 seen.add(i)
590 seen.add(i)
582 yield i
591 yield i
583 break
592 break
584
593
585 def findmissing(self, common=None, heads=None):
594 def findmissing(self, common=None, heads=None):
586 '''
595 '''
587 returns the topologically sorted list of nodes from the set:
596 returns the topologically sorted list of nodes from the set:
588 missing = (ancestors(heads) \ ancestors(common))
597 missing = (ancestors(heads) \ ancestors(common))
589
598
590 where ancestors() is the set of ancestors from heads, heads included
599 where ancestors() is the set of ancestors from heads, heads included
591
600
592 if heads is None, the heads of the revlog are used
601 if heads is None, the heads of the revlog are used
593 if common is None, nullid is assumed to be a common node
602 if common is None, nullid is assumed to be a common node
594 '''
603 '''
595 if common is None:
604 if common is None:
596 common = [nullid]
605 common = [nullid]
597 if heads is None:
606 if heads is None:
598 heads = self.heads()
607 heads = self.heads()
599
608
600 common = [self.rev(n) for n in common]
609 common = [self.rev(n) for n in common]
601 heads = [self.rev(n) for n in heads]
610 heads = [self.rev(n) for n in heads]
602
611
603 # we want the ancestors, but inclusive
612 # we want the ancestors, but inclusive
604 has = dict.fromkeys(self.ancestors(*common))
613 has = dict.fromkeys(self.ancestors(*common))
605 has[nullrev] = None
614 has[nullrev] = None
606 for r in common:
615 for r in common:
607 has[r] = None
616 has[r] = None
608
617
609 # take all ancestors from heads that aren't in has
618 # take all ancestors from heads that aren't in has
610 missing = {}
619 missing = {}
611 visit = [r for r in heads if r not in has]
620 visit = [r for r in heads if r not in has]
612 while visit:
621 while visit:
613 r = visit.pop(0)
622 r = visit.pop(0)
614 if r in missing:
623 if r in missing:
615 continue
624 continue
616 else:
625 else:
617 missing[r] = None
626 missing[r] = None
618 for p in self.parentrevs(r):
627 for p in self.parentrevs(r):
619 if p not in has:
628 if p not in has:
620 visit.append(p)
629 visit.append(p)
621 missing = missing.keys()
630 missing = missing.keys()
622 missing.sort()
631 missing.sort()
623 return [self.node(r) for r in missing]
632 return [self.node(r) for r in missing]
624
633
625 def nodesbetween(self, roots=None, heads=None):
634 def nodesbetween(self, roots=None, heads=None):
626 """Return a tuple containing three elements. Elements 1 and 2 contain
635 """Return a tuple containing three elements. Elements 1 and 2 contain
627 a final list bases and heads after all the unreachable ones have been
636 a final list bases and heads after all the unreachable ones have been
628 pruned. Element 0 contains a topologically sorted list of all
637 pruned. Element 0 contains a topologically sorted list of all
629
638
630 nodes that satisfy these constraints:
639 nodes that satisfy these constraints:
631 1. All nodes must be descended from a node in roots (the nodes on
640 1. All nodes must be descended from a node in roots (the nodes on
632 roots are considered descended from themselves).
641 roots are considered descended from themselves).
633 2. All nodes must also be ancestors of a node in heads (the nodes in
642 2. All nodes must also be ancestors of a node in heads (the nodes in
634 heads are considered to be their own ancestors).
643 heads are considered to be their own ancestors).
635
644
636 If roots is unspecified, nullid is assumed as the only root.
645 If roots is unspecified, nullid is assumed as the only root.
637 If heads is unspecified, it is taken to be the output of the
646 If heads is unspecified, it is taken to be the output of the
638 heads method (i.e. a list of all nodes in the repository that
647 heads method (i.e. a list of all nodes in the repository that
639 have no children)."""
648 have no children)."""
640 nonodes = ([], [], [])
649 nonodes = ([], [], [])
641 if roots is not None:
650 if roots is not None:
642 roots = list(roots)
651 roots = list(roots)
643 if not roots:
652 if not roots:
644 return nonodes
653 return nonodes
645 lowestrev = min([self.rev(n) for n in roots])
654 lowestrev = min([self.rev(n) for n in roots])
646 else:
655 else:
647 roots = [nullid] # Everybody's a descendent of nullid
656 roots = [nullid] # Everybody's a descendent of nullid
648 lowestrev = nullrev
657 lowestrev = nullrev
649 if (lowestrev == nullrev) and (heads is None):
658 if (lowestrev == nullrev) and (heads is None):
650 # We want _all_ the nodes!
659 # We want _all_ the nodes!
651 return ([self.node(r) for r in self], [nullid], list(self.heads()))
660 return ([self.node(r) for r in self], [nullid], list(self.heads()))
652 if heads is None:
661 if heads is None:
653 # All nodes are ancestors, so the latest ancestor is the last
662 # All nodes are ancestors, so the latest ancestor is the last
654 # node.
663 # node.
655 highestrev = len(self) - 1
664 highestrev = len(self) - 1
656 # Set ancestors to None to signal that every node is an ancestor.
665 # Set ancestors to None to signal that every node is an ancestor.
657 ancestors = None
666 ancestors = None
658 # Set heads to an empty dictionary for later discovery of heads
667 # Set heads to an empty dictionary for later discovery of heads
659 heads = {}
668 heads = {}
660 else:
669 else:
661 heads = list(heads)
670 heads = list(heads)
662 if not heads:
671 if not heads:
663 return nonodes
672 return nonodes
664 ancestors = {}
673 ancestors = {}
665 # Turn heads into a dictionary so we can remove 'fake' heads.
674 # Turn heads into a dictionary so we can remove 'fake' heads.
666 # Also, later we will be using it to filter out the heads we can't
675 # Also, later we will be using it to filter out the heads we can't
667 # find from roots.
676 # find from roots.
668 heads = dict.fromkeys(heads, 0)
677 heads = dict.fromkeys(heads, 0)
669 # Start at the top and keep marking parents until we're done.
678 # Start at the top and keep marking parents until we're done.
670 nodestotag = heads.keys()
679 nodestotag = heads.keys()
671 # Remember where the top was so we can use it as a limit later.
680 # Remember where the top was so we can use it as a limit later.
672 highestrev = max([self.rev(n) for n in nodestotag])
681 highestrev = max([self.rev(n) for n in nodestotag])
673 while nodestotag:
682 while nodestotag:
674 # grab a node to tag
683 # grab a node to tag
675 n = nodestotag.pop()
684 n = nodestotag.pop()
676 # Never tag nullid
685 # Never tag nullid
677 if n == nullid:
686 if n == nullid:
678 continue
687 continue
679 # A node's revision number represents its place in a
688 # A node's revision number represents its place in a
680 # topologically sorted list of nodes.
689 # topologically sorted list of nodes.
681 r = self.rev(n)
690 r = self.rev(n)
682 if r >= lowestrev:
691 if r >= lowestrev:
683 if n not in ancestors:
692 if n not in ancestors:
684 # If we are possibly a descendent of one of the roots
693 # If we are possibly a descendent of one of the roots
685 # and we haven't already been marked as an ancestor
694 # and we haven't already been marked as an ancestor
686 ancestors[n] = 1 # Mark as ancestor
695 ancestors[n] = 1 # Mark as ancestor
687 # Add non-nullid parents to list of nodes to tag.
696 # Add non-nullid parents to list of nodes to tag.
688 nodestotag.extend([p for p in self.parents(n) if
697 nodestotag.extend([p for p in self.parents(n) if
689 p != nullid])
698 p != nullid])
690 elif n in heads: # We've seen it before, is it a fake head?
699 elif n in heads: # We've seen it before, is it a fake head?
691 # So it is, real heads should not be the ancestors of
700 # So it is, real heads should not be the ancestors of
692 # any other heads.
701 # any other heads.
693 heads.pop(n)
702 heads.pop(n)
694 if not ancestors:
703 if not ancestors:
695 return nonodes
704 return nonodes
696 # Now that we have our set of ancestors, we want to remove any
705 # Now that we have our set of ancestors, we want to remove any
697 # roots that are not ancestors.
706 # roots that are not ancestors.
698
707
699 # If one of the roots was nullid, everything is included anyway.
708 # If one of the roots was nullid, everything is included anyway.
700 if lowestrev > nullrev:
709 if lowestrev > nullrev:
701 # But, since we weren't, let's recompute the lowest rev to not
710 # But, since we weren't, let's recompute the lowest rev to not
702 # include roots that aren't ancestors.
711 # include roots that aren't ancestors.
703
712
704 # Filter out roots that aren't ancestors of heads
713 # Filter out roots that aren't ancestors of heads
705 roots = [n for n in roots if n in ancestors]
714 roots = [n for n in roots if n in ancestors]
706 # Recompute the lowest revision
715 # Recompute the lowest revision
707 if roots:
716 if roots:
708 lowestrev = min([self.rev(n) for n in roots])
717 lowestrev = min([self.rev(n) for n in roots])
709 else:
718 else:
710 # No more roots? Return empty list
719 # No more roots? Return empty list
711 return nonodes
720 return nonodes
712 else:
721 else:
713 # We are descending from nullid, and don't need to care about
722 # We are descending from nullid, and don't need to care about
714 # any other roots.
723 # any other roots.
715 lowestrev = nullrev
724 lowestrev = nullrev
716 roots = [nullid]
725 roots = [nullid]
717 # Transform our roots list into a 'set' (i.e. a dictionary where the
726 # Transform our roots list into a 'set' (i.e. a dictionary where the
718 # values don't matter.
727 # values don't matter.
719 descendents = dict.fromkeys(roots, 1)
728 descendents = dict.fromkeys(roots, 1)
720 # Also, keep the original roots so we can filter out roots that aren't
729 # Also, keep the original roots so we can filter out roots that aren't
721 # 'real' roots (i.e. are descended from other roots).
730 # 'real' roots (i.e. are descended from other roots).
722 roots = descendents.copy()
731 roots = descendents.copy()
723 # Our topologically sorted list of output nodes.
732 # Our topologically sorted list of output nodes.
724 orderedout = []
733 orderedout = []
725 # Don't start at nullid since we don't want nullid in our output list,
734 # Don't start at nullid since we don't want nullid in our output list,
726 # and if nullid shows up in descedents, empty parents will look like
735 # and if nullid shows up in descedents, empty parents will look like
727 # they're descendents.
736 # they're descendents.
728 for r in xrange(max(lowestrev, 0), highestrev + 1):
737 for r in xrange(max(lowestrev, 0), highestrev + 1):
729 n = self.node(r)
738 n = self.node(r)
730 isdescendent = False
739 isdescendent = False
731 if lowestrev == nullrev: # Everybody is a descendent of nullid
740 if lowestrev == nullrev: # Everybody is a descendent of nullid
732 isdescendent = True
741 isdescendent = True
733 elif n in descendents:
742 elif n in descendents:
734 # n is already a descendent
743 # n is already a descendent
735 isdescendent = True
744 isdescendent = True
736 # This check only needs to be done here because all the roots
745 # This check only needs to be done here because all the roots
737 # will start being marked is descendents before the loop.
746 # will start being marked is descendents before the loop.
738 if n in roots:
747 if n in roots:
739 # If n was a root, check if it's a 'real' root.
748 # If n was a root, check if it's a 'real' root.
740 p = tuple(self.parents(n))
749 p = tuple(self.parents(n))
741 # If any of its parents are descendents, it's not a root.
750 # If any of its parents are descendents, it's not a root.
742 if (p[0] in descendents) or (p[1] in descendents):
751 if (p[0] in descendents) or (p[1] in descendents):
743 roots.pop(n)
752 roots.pop(n)
744 else:
753 else:
745 p = tuple(self.parents(n))
754 p = tuple(self.parents(n))
746 # A node is a descendent if either of its parents are
755 # A node is a descendent if either of its parents are
747 # descendents. (We seeded the dependents list with the roots
756 # descendents. (We seeded the dependents list with the roots
748 # up there, remember?)
757 # up there, remember?)
749 if (p[0] in descendents) or (p[1] in descendents):
758 if (p[0] in descendents) or (p[1] in descendents):
750 descendents[n] = 1
759 descendents[n] = 1
751 isdescendent = True
760 isdescendent = True
752 if isdescendent and ((ancestors is None) or (n in ancestors)):
761 if isdescendent and ((ancestors is None) or (n in ancestors)):
753 # Only include nodes that are both descendents and ancestors.
762 # Only include nodes that are both descendents and ancestors.
754 orderedout.append(n)
763 orderedout.append(n)
755 if (ancestors is not None) and (n in heads):
764 if (ancestors is not None) and (n in heads):
756 # We're trying to figure out which heads are reachable
765 # We're trying to figure out which heads are reachable
757 # from roots.
766 # from roots.
758 # Mark this head as having been reached
767 # Mark this head as having been reached
759 heads[n] = 1
768 heads[n] = 1
760 elif ancestors is None:
769 elif ancestors is None:
761 # Otherwise, we're trying to discover the heads.
770 # Otherwise, we're trying to discover the heads.
762 # Assume this is a head because if it isn't, the next step
771 # Assume this is a head because if it isn't, the next step
763 # will eventually remove it.
772 # will eventually remove it.
764 heads[n] = 1
773 heads[n] = 1
765 # But, obviously its parents aren't.
774 # But, obviously its parents aren't.
766 for p in self.parents(n):
775 for p in self.parents(n):
767 heads.pop(p, None)
776 heads.pop(p, None)
768 heads = [n for n in heads.iterkeys() if heads[n] != 0]
777 heads = [n for n in heads.iterkeys() if heads[n] != 0]
769 roots = roots.keys()
778 roots = roots.keys()
770 assert orderedout
779 assert orderedout
771 assert roots
780 assert roots
772 assert heads
781 assert heads
773 return (orderedout, roots, heads)
782 return (orderedout, roots, heads)
774
783
775 def heads(self, start=None, stop=None):
784 def heads(self, start=None, stop=None):
776 """return the list of all nodes that have no children
785 """return the list of all nodes that have no children
777
786
778 if start is specified, only heads that are descendants of
787 if start is specified, only heads that are descendants of
779 start will be returned
788 start will be returned
780 if stop is specified, it will consider all the revs from stop
789 if stop is specified, it will consider all the revs from stop
781 as if they had no children
790 as if they had no children
782 """
791 """
783 if start is None and stop is None:
792 if start is None and stop is None:
784 count = len(self)
793 count = len(self)
785 if not count:
794 if not count:
786 return [nullid]
795 return [nullid]
787 ishead = [1] * (count + 1)
796 ishead = [1] * (count + 1)
788 index = self.index
797 index = self.index
789 for r in xrange(count):
798 for r in xrange(count):
790 e = index[r]
799 e = index[r]
791 ishead[e[5]] = ishead[e[6]] = 0
800 ishead[e[5]] = ishead[e[6]] = 0
792 return [self.node(r) for r in xrange(count) if ishead[r]]
801 return [self.node(r) for r in xrange(count) if ishead[r]]
793
802
794 if start is None:
803 if start is None:
795 start = nullid
804 start = nullid
796 if stop is None:
805 if stop is None:
797 stop = []
806 stop = []
798 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
807 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
799 startrev = self.rev(start)
808 startrev = self.rev(start)
800 reachable = {startrev: 1}
809 reachable = {startrev: 1}
801 heads = {startrev: 1}
810 heads = {startrev: 1}
802
811
803 parentrevs = self.parentrevs
812 parentrevs = self.parentrevs
804 for r in xrange(startrev + 1, len(self)):
813 for r in xrange(startrev + 1, len(self)):
805 for p in parentrevs(r):
814 for p in parentrevs(r):
806 if p in reachable:
815 if p in reachable:
807 if r not in stoprevs:
816 if r not in stoprevs:
808 reachable[r] = 1
817 reachable[r] = 1
809 heads[r] = 1
818 heads[r] = 1
810 if p in heads and p not in stoprevs:
819 if p in heads and p not in stoprevs:
811 del heads[p]
820 del heads[p]
812
821
813 return [self.node(r) for r in heads]
822 return [self.node(r) for r in heads]
814
823
815 def children(self, node):
824 def children(self, node):
816 """find the children of a given node"""
825 """find the children of a given node"""
817 c = []
826 c = []
818 p = self.rev(node)
827 p = self.rev(node)
819 for r in range(p + 1, len(self)):
828 for r in range(p + 1, len(self)):
820 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
829 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
821 if prevs:
830 if prevs:
822 for pr in prevs:
831 for pr in prevs:
823 if pr == p:
832 if pr == p:
824 c.append(self.node(r))
833 c.append(self.node(r))
825 elif p == nullrev:
834 elif p == nullrev:
826 c.append(self.node(r))
835 c.append(self.node(r))
827 return c
836 return c
828
837
829 def _match(self, id):
838 def _match(self, id):
830 if isinstance(id, (long, int)):
839 if isinstance(id, (long, int)):
831 # rev
840 # rev
832 return self.node(id)
841 return self.node(id)
833 if len(id) == 20:
842 if len(id) == 20:
834 # possibly a binary node
843 # possibly a binary node
835 # odds of a binary node being all hex in ASCII are 1 in 10**25
844 # odds of a binary node being all hex in ASCII are 1 in 10**25
836 try:
845 try:
837 node = id
846 node = id
838 self.rev(node) # quick search the index
847 self.rev(node) # quick search the index
839 return node
848 return node
840 except LookupError:
849 except LookupError:
841 pass # may be partial hex id
850 pass # may be partial hex id
842 try:
851 try:
843 # str(rev)
852 # str(rev)
844 rev = int(id)
853 rev = int(id)
845 if str(rev) != id:
854 if str(rev) != id:
846 raise ValueError
855 raise ValueError
847 if rev < 0:
856 if rev < 0:
848 rev = len(self) + rev
857 rev = len(self) + rev
849 if rev < 0 or rev >= len(self):
858 if rev < 0 or rev >= len(self):
850 raise ValueError
859 raise ValueError
851 return self.node(rev)
860 return self.node(rev)
852 except (ValueError, OverflowError):
861 except (ValueError, OverflowError):
853 pass
862 pass
854 if len(id) == 40:
863 if len(id) == 40:
855 try:
864 try:
856 # a full hex nodeid?
865 # a full hex nodeid?
857 node = bin(id)
866 node = bin(id)
858 self.rev(node)
867 self.rev(node)
859 return node
868 return node
860 except (TypeError, LookupError):
869 except (TypeError, LookupError):
861 pass
870 pass
862
871
863 def _partialmatch(self, id):
872 def _partialmatch(self, id):
864 if len(id) < 40:
873 if len(id) < 40:
865 try:
874 try:
866 # hex(node)[:...]
875 # hex(node)[:...]
867 l = len(id) / 2 # grab an even number of digits
876 l = len(id) / 2 # grab an even number of digits
868 bin_id = bin(id[:l*2])
877 bin_id = bin(id[:l*2])
869 nl = [n for n in self.nodemap if n[:l] == bin_id]
878 nl = [n for n in self.nodemap if n[:l] == bin_id]
870 nl = [n for n in nl if hex(n).startswith(id)]
879 nl = [n for n in nl if hex(n).startswith(id)]
871 if len(nl) > 0:
880 if len(nl) > 0:
872 if len(nl) == 1:
881 if len(nl) == 1:
873 return nl[0]
882 return nl[0]
874 raise LookupError(id, self.indexfile,
883 raise LookupError(id, self.indexfile,
875 _('ambiguous identifier'))
884 _('ambiguous identifier'))
876 return None
885 return None
877 except TypeError:
886 except TypeError:
878 pass
887 pass
879
888
880 def lookup(self, id):
889 def lookup(self, id):
881 """locate a node based on:
890 """locate a node based on:
882 - revision number or str(revision number)
891 - revision number or str(revision number)
883 - nodeid or subset of hex nodeid
892 - nodeid or subset of hex nodeid
884 """
893 """
885 n = self._match(id)
894 n = self._match(id)
886 if n is not None:
895 if n is not None:
887 return n
896 return n
888 n = self._partialmatch(id)
897 n = self._partialmatch(id)
889 if n:
898 if n:
890 return n
899 return n
891
900
892 raise LookupError(id, self.indexfile, _('no match found'))
901 raise LookupError(id, self.indexfile, _('no match found'))
893
902
894 def cmp(self, node, text):
903 def cmp(self, node, text):
895 """compare text with a given file revision"""
904 """compare text with a given file revision"""
896 p1, p2 = self.parents(node)
905 p1, p2 = self.parents(node)
897 return hash(text, p1, p2) != node
906 return hash(text, p1, p2) != node
898
907
899 def chunk(self, rev, df=None):
908 def chunk(self, rev, df=None):
900 def loadcache(df):
909 def loadcache(df):
901 if not df:
910 if not df:
902 if self._inline:
911 if self._inline:
903 df = self.opener(self.indexfile)
912 df = self.opener(self.indexfile)
904 else:
913 else:
905 df = self.opener(self.datafile)
914 df = self.opener(self.datafile)
906 df.seek(start)
915 df.seek(start)
907 self._chunkcache = (start, df.read(cache_length))
916 self._chunkcache = (start, df.read(cache_length))
908
917
909 start, length = self.start(rev), self.length(rev)
918 start, length = self.start(rev), self.length(rev)
910 if self._inline:
919 if self._inline:
911 start += (rev + 1) * self._io.size
920 start += (rev + 1) * self._io.size
912 end = start + length
921 end = start + length
913
922
914 offset = 0
923 offset = 0
915 if not self._chunkcache:
924 if not self._chunkcache:
916 cache_length = max(65536, length)
925 cache_length = max(65536, length)
917 loadcache(df)
926 loadcache(df)
918 else:
927 else:
919 cache_start = self._chunkcache[0]
928 cache_start = self._chunkcache[0]
920 cache_length = len(self._chunkcache[1])
929 cache_length = len(self._chunkcache[1])
921 cache_end = cache_start + cache_length
930 cache_end = cache_start + cache_length
922 if start >= cache_start and end <= cache_end:
931 if start >= cache_start and end <= cache_end:
923 # it is cached
932 # it is cached
924 offset = start - cache_start
933 offset = start - cache_start
925 else:
934 else:
926 cache_length = max(65536, length)
935 cache_length = max(65536, length)
927 loadcache(df)
936 loadcache(df)
928
937
929 # avoid copying large chunks
938 # avoid copying large chunks
930 c = self._chunkcache[1]
939 c = self._chunkcache[1]
931 if cache_length != length:
940 if cache_length != length:
932 c = c[offset:offset + length]
941 c = c[offset:offset + length]
933
942
934 return decompress(c)
943 return decompress(c)
935
944
936 def revdiff(self, rev1, rev2):
945 def revdiff(self, rev1, rev2):
937 """return or calculate a delta between two revisions"""
946 """return or calculate a delta between two revisions"""
938 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
947 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
939 return self.chunk(rev2)
948 return self.chunk(rev2)
940
949
941 return mdiff.textdiff(self.revision(self.node(rev1)),
950 return mdiff.textdiff(self.revision(self.node(rev1)),
942 self.revision(self.node(rev2)))
951 self.revision(self.node(rev2)))
943
952
944 def revision(self, node):
953 def revision(self, node):
945 """return an uncompressed revision of a given node"""
954 """return an uncompressed revision of a given node"""
946 if node == nullid:
955 if node == nullid:
947 return ""
956 return ""
948 if self._cache and self._cache[0] == node:
957 if self._cache and self._cache[0] == node:
949 return str(self._cache[2])
958 return str(self._cache[2])
950
959
951 # look up what we need to read
960 # look up what we need to read
952 text = None
961 text = None
953 rev = self.rev(node)
962 rev = self.rev(node)
954 base = self.base(rev)
963 base = self.base(rev)
955
964
956 # check rev flags
965 # check rev flags
957 if self.index[rev][0] & 0xFFFF:
966 if self.index[rev][0] & 0xFFFF:
958 raise RevlogError(_('incompatible revision flag %x') %
967 raise RevlogError(_('incompatible revision flag %x') %
959 (self.index[rev][0] & 0xFFFF))
968 (self.index[rev][0] & 0xFFFF))
960
969
961 df = None
970 df = None
962
971
963 # do we have useful data cached?
972 # do we have useful data cached?
964 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
973 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
965 base = self._cache[1]
974 base = self._cache[1]
966 text = str(self._cache[2])
975 text = str(self._cache[2])
967 self._loadindex(base, rev + 1)
976 self._loadindex(base, rev + 1)
968 if not self._inline and rev > base + 1:
977 if not self._inline and rev > base + 1:
969 df = self.opener(self.datafile)
978 df = self.opener(self.datafile)
970 else:
979 else:
971 self._loadindex(base, rev + 1)
980 self._loadindex(base, rev + 1)
972 if not self._inline and rev > base:
981 if not self._inline and rev > base:
973 df = self.opener(self.datafile)
982 df = self.opener(self.datafile)
974 text = self.chunk(base, df=df)
983 text = self.chunk(base, df=df)
975
984
976 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
985 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
977 text = mdiff.patches(text, bins)
986 text = mdiff.patches(text, bins)
978 p1, p2 = self.parents(node)
987 p1, p2 = self.parents(node)
979 if node != hash(text, p1, p2):
988 if node != hash(text, p1, p2):
980 raise RevlogError(_("integrity check failed on %s:%d")
989 raise RevlogError(_("integrity check failed on %s:%d")
981 % (self.datafile, rev))
990 % (self.datafile, rev))
982
991
983 self._cache = (node, rev, text)
992 self._cache = (node, rev, text)
984 return text
993 return text
985
994
986 def checkinlinesize(self, tr, fp=None):
995 def checkinlinesize(self, tr, fp=None):
987 if not self._inline:
996 if not self._inline:
988 return
997 return
989 if not fp:
998 if not fp:
990 fp = self.opener(self.indexfile, 'r')
999 fp = self.opener(self.indexfile, 'r')
991 fp.seek(0, 2)
1000 fp.seek(0, 2)
992 size = fp.tell()
1001 size = fp.tell()
993 if size < 131072:
1002 if size < 131072:
994 return
1003 return
995 trinfo = tr.find(self.indexfile)
1004 trinfo = tr.find(self.indexfile)
996 if trinfo == None:
1005 if trinfo == None:
997 raise RevlogError(_("%s not found in the transaction")
1006 raise RevlogError(_("%s not found in the transaction")
998 % self.indexfile)
1007 % self.indexfile)
999
1008
1000 trindex = trinfo[2]
1009 trindex = trinfo[2]
1001 dataoff = self.start(trindex)
1010 dataoff = self.start(trindex)
1002
1011
1003 tr.add(self.datafile, dataoff)
1012 tr.add(self.datafile, dataoff)
1004 df = self.opener(self.datafile, 'w')
1013 df = self.opener(self.datafile, 'w')
1005 try:
1014 try:
1006 calc = self._io.size
1015 calc = self._io.size
1007 for r in self:
1016 for r in self:
1008 start = self.start(r) + (r + 1) * calc
1017 start = self.start(r) + (r + 1) * calc
1009 length = self.length(r)
1018 length = self.length(r)
1010 fp.seek(start)
1019 fp.seek(start)
1011 d = fp.read(length)
1020 d = fp.read(length)
1012 df.write(d)
1021 df.write(d)
1013 finally:
1022 finally:
1014 df.close()
1023 df.close()
1015
1024
1016 fp.close()
1025 fp.close()
1017 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1026 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1018 self.version &= ~(REVLOGNGINLINEDATA)
1027 self.version &= ~(REVLOGNGINLINEDATA)
1019 self._inline = False
1028 self._inline = False
1020 for i in self:
1029 for i in self:
1021 e = self._io.packentry(self.index[i], self.node, self.version, i)
1030 e = self._io.packentry(self.index[i], self.node, self.version, i)
1022 fp.write(e)
1031 fp.write(e)
1023
1032
1024 # if we don't call rename, the temp file will never replace the
1033 # if we don't call rename, the temp file will never replace the
1025 # real index
1034 # real index
1026 fp.rename()
1035 fp.rename()
1027
1036
1028 tr.replace(self.indexfile, trindex * calc)
1037 tr.replace(self.indexfile, trindex * calc)
1029 self._chunkcache = None
1038 self._chunkcache = None
1030
1039
1031 def addrevision(self, text, transaction, link, p1, p2, d=None):
1040 def addrevision(self, text, transaction, link, p1, p2, d=None):
1032 """add a revision to the log
1041 """add a revision to the log
1033
1042
1034 text - the revision data to add
1043 text - the revision data to add
1035 transaction - the transaction object used for rollback
1044 transaction - the transaction object used for rollback
1036 link - the linkrev data to add
1045 link - the linkrev data to add
1037 p1, p2 - the parent nodeids of the revision
1046 p1, p2 - the parent nodeids of the revision
1038 d - an optional precomputed delta
1047 d - an optional precomputed delta
1039 """
1048 """
1040 dfh = None
1049 dfh = None
1041 if not self._inline:
1050 if not self._inline:
1042 dfh = self.opener(self.datafile, "a")
1051 dfh = self.opener(self.datafile, "a")
1043 ifh = self.opener(self.indexfile, "a+")
1052 ifh = self.opener(self.indexfile, "a+")
1044 try:
1053 try:
1045 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1054 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1046 finally:
1055 finally:
1047 if dfh:
1056 if dfh:
1048 dfh.close()
1057 dfh.close()
1049 ifh.close()
1058 ifh.close()
1050
1059
1051 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1060 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1052 node = hash(text, p1, p2)
1061 node = hash(text, p1, p2)
1053 if node in self.nodemap:
1062 if node in self.nodemap:
1054 return node
1063 return node
1055
1064
1056 curr = len(self)
1065 curr = len(self)
1057 prev = curr - 1
1066 prev = curr - 1
1058 base = self.base(prev)
1067 base = self.base(prev)
1059 offset = self.end(prev)
1068 offset = self.end(prev)
1060
1069
1061 if curr:
1070 if curr:
1062 if not d:
1071 if not d:
1063 ptext = self.revision(self.node(prev))
1072 ptext = self.revision(self.node(prev))
1064 d = mdiff.textdiff(ptext, text)
1073 d = mdiff.textdiff(ptext, text)
1065 data = compress(d)
1074 data = compress(d)
1066 l = len(data[1]) + len(data[0])
1075 l = len(data[1]) + len(data[0])
1067 dist = l + offset - self.start(base)
1076 dist = l + offset - self.start(base)
1068
1077
1069 # full versions are inserted when the needed deltas
1078 # full versions are inserted when the needed deltas
1070 # become comparable to the uncompressed text
1079 # become comparable to the uncompressed text
1071 if not curr or dist > len(text) * 2:
1080 if not curr or dist > len(text) * 2:
1072 data = compress(text)
1081 data = compress(text)
1073 l = len(data[1]) + len(data[0])
1082 l = len(data[1]) + len(data[0])
1074 base = curr
1083 base = curr
1075
1084
1076 e = (offset_type(offset, 0), l, len(text),
1085 e = (offset_type(offset, 0), l, len(text),
1077 base, link, self.rev(p1), self.rev(p2), node)
1086 base, link, self.rev(p1), self.rev(p2), node)
1078 self.index.insert(-1, e)
1087 self.index.insert(-1, e)
1079 self.nodemap[node] = curr
1088 self.nodemap[node] = curr
1080
1089
1081 entry = self._io.packentry(e, self.node, self.version, curr)
1090 entry = self._io.packentry(e, self.node, self.version, curr)
1082 if not self._inline:
1091 if not self._inline:
1083 transaction.add(self.datafile, offset)
1092 transaction.add(self.datafile, offset)
1084 transaction.add(self.indexfile, curr * len(entry))
1093 transaction.add(self.indexfile, curr * len(entry))
1085 if data[0]:
1094 if data[0]:
1086 dfh.write(data[0])
1095 dfh.write(data[0])
1087 dfh.write(data[1])
1096 dfh.write(data[1])
1088 dfh.flush()
1097 dfh.flush()
1089 ifh.write(entry)
1098 ifh.write(entry)
1090 else:
1099 else:
1091 offset += curr * self._io.size
1100 offset += curr * self._io.size
1092 transaction.add(self.indexfile, offset, curr)
1101 transaction.add(self.indexfile, offset, curr)
1093 ifh.write(entry)
1102 ifh.write(entry)
1094 ifh.write(data[0])
1103 ifh.write(data[0])
1095 ifh.write(data[1])
1104 ifh.write(data[1])
1096 self.checkinlinesize(transaction, ifh)
1105 self.checkinlinesize(transaction, ifh)
1097
1106
1098 self._cache = (node, curr, text)
1107 self._cache = (node, curr, text)
1099 return node
1108 return node
1100
1109
1101 def ancestor(self, a, b):
1110 def ancestor(self, a, b):
1102 """calculate the least common ancestor of nodes a and b"""
1111 """calculate the least common ancestor of nodes a and b"""
1103
1112
1104 def parents(rev):
1113 def parents(rev):
1105 return [p for p in self.parentrevs(rev) if p != nullrev]
1114 return [p for p in self.parentrevs(rev) if p != nullrev]
1106
1115
1107 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1116 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1108 if c is None:
1117 if c is None:
1109 return nullid
1118 return nullid
1110
1119
1111 return self.node(c)
1120 return self.node(c)
1112
1121
1113 def group(self, nodelist, lookup, infocollect=None):
1122 def group(self, nodelist, lookup, infocollect=None):
1114 """calculate a delta group
1123 """calculate a delta group
1115
1124
1116 Given a list of changeset revs, return a set of deltas and
1125 Given a list of changeset revs, return a set of deltas and
1117 metadata corresponding to nodes. the first delta is
1126 metadata corresponding to nodes. the first delta is
1118 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1127 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1119 have this parent as it has all history before these
1128 have this parent as it has all history before these
1120 changesets. parent is parent[0]
1129 changesets. parent is parent[0]
1121 """
1130 """
1122 revs = [self.rev(n) for n in nodelist]
1131 revs = [self.rev(n) for n in nodelist]
1123
1132
1124 # if we don't have any revisions touched by these changesets, bail
1133 # if we don't have any revisions touched by these changesets, bail
1125 if not revs:
1134 if not revs:
1126 yield changegroup.closechunk()
1135 yield changegroup.closechunk()
1127 return
1136 return
1128
1137
1129 # add the parent of the first rev
1138 # add the parent of the first rev
1130 p = self.parents(self.node(revs[0]))[0]
1139 p = self.parents(self.node(revs[0]))[0]
1131 revs.insert(0, self.rev(p))
1140 revs.insert(0, self.rev(p))
1132
1141
1133 # build deltas
1142 # build deltas
1134 for d in xrange(0, len(revs) - 1):
1143 for d in xrange(0, len(revs) - 1):
1135 a, b = revs[d], revs[d + 1]
1144 a, b = revs[d], revs[d + 1]
1136 nb = self.node(b)
1145 nb = self.node(b)
1137
1146
1138 if infocollect is not None:
1147 if infocollect is not None:
1139 infocollect(nb)
1148 infocollect(nb)
1140
1149
1141 p = self.parents(nb)
1150 p = self.parents(nb)
1142 meta = nb + p[0] + p[1] + lookup(nb)
1151 meta = nb + p[0] + p[1] + lookup(nb)
1143 if a == -1:
1152 if a == -1:
1144 d = self.revision(nb)
1153 d = self.revision(nb)
1145 meta += mdiff.trivialdiffheader(len(d))
1154 meta += mdiff.trivialdiffheader(len(d))
1146 else:
1155 else:
1147 d = self.revdiff(a, b)
1156 d = self.revdiff(a, b)
1148 yield changegroup.chunkheader(len(meta) + len(d))
1157 yield changegroup.chunkheader(len(meta) + len(d))
1149 yield meta
1158 yield meta
1150 if len(d) > 2**20:
1159 if len(d) > 2**20:
1151 pos = 0
1160 pos = 0
1152 while pos < len(d):
1161 while pos < len(d):
1153 pos2 = pos + 2 ** 18
1162 pos2 = pos + 2 ** 18
1154 yield d[pos:pos2]
1163 yield d[pos:pos2]
1155 pos = pos2
1164 pos = pos2
1156 else:
1165 else:
1157 yield d
1166 yield d
1158
1167
1159 yield changegroup.closechunk()
1168 yield changegroup.closechunk()
1160
1169
1161 def addgroup(self, revs, linkmapper, transaction):
1170 def addgroup(self, revs, linkmapper, transaction):
1162 """
1171 """
1163 add a delta group
1172 add a delta group
1164
1173
1165 given a set of deltas, add them to the revision log. the
1174 given a set of deltas, add them to the revision log. the
1166 first delta is against its parent, which should be in our
1175 first delta is against its parent, which should be in our
1167 log, the rest are against the previous delta.
1176 log, the rest are against the previous delta.
1168 """
1177 """
1169
1178
1170 #track the base of the current delta log
1179 #track the base of the current delta log
1171 r = len(self)
1180 r = len(self)
1172 t = r - 1
1181 t = r - 1
1173 node = None
1182 node = None
1174
1183
1175 base = prev = nullrev
1184 base = prev = nullrev
1176 start = end = textlen = 0
1185 start = end = textlen = 0
1177 if r:
1186 if r:
1178 end = self.end(t)
1187 end = self.end(t)
1179
1188
1180 ifh = self.opener(self.indexfile, "a+")
1189 ifh = self.opener(self.indexfile, "a+")
1181 isize = r * self._io.size
1190 isize = r * self._io.size
1182 if self._inline:
1191 if self._inline:
1183 transaction.add(self.indexfile, end + isize, r)
1192 transaction.add(self.indexfile, end + isize, r)
1184 dfh = None
1193 dfh = None
1185 else:
1194 else:
1186 transaction.add(self.indexfile, isize, r)
1195 transaction.add(self.indexfile, isize, r)
1187 transaction.add(self.datafile, end)
1196 transaction.add(self.datafile, end)
1188 dfh = self.opener(self.datafile, "a")
1197 dfh = self.opener(self.datafile, "a")
1189
1198
1190 try:
1199 try:
1191 # loop through our set of deltas
1200 # loop through our set of deltas
1192 chain = None
1201 chain = None
1193 for chunk in revs:
1202 for chunk in revs:
1194 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1203 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1195 link = linkmapper(cs)
1204 link = linkmapper(cs)
1196 if node in self.nodemap:
1205 if node in self.nodemap:
1197 # this can happen if two branches make the same change
1206 # this can happen if two branches make the same change
1198 chain = node
1207 chain = node
1199 continue
1208 continue
1200 delta = buffer(chunk, 80)
1209 delta = buffer(chunk, 80)
1201 del chunk
1210 del chunk
1202
1211
1203 for p in (p1, p2):
1212 for p in (p1, p2):
1204 if not p in self.nodemap:
1213 if not p in self.nodemap:
1205 raise LookupError(p, self.indexfile, _('unknown parent'))
1214 raise LookupError(p, self.indexfile, _('unknown parent'))
1206
1215
1207 if not chain:
1216 if not chain:
1208 # retrieve the parent revision of the delta chain
1217 # retrieve the parent revision of the delta chain
1209 chain = p1
1218 chain = p1
1210 if not chain in self.nodemap:
1219 if not chain in self.nodemap:
1211 raise LookupError(chain, self.indexfile, _('unknown base'))
1220 raise LookupError(chain, self.indexfile, _('unknown base'))
1212
1221
1213 # full versions are inserted when the needed deltas become
1222 # full versions are inserted when the needed deltas become
1214 # comparable to the uncompressed text or when the previous
1223 # comparable to the uncompressed text or when the previous
1215 # version is not the one we have a delta against. We use
1224 # version is not the one we have a delta against. We use
1216 # the size of the previous full rev as a proxy for the
1225 # the size of the previous full rev as a proxy for the
1217 # current size.
1226 # current size.
1218
1227
1219 if chain == prev:
1228 if chain == prev:
1220 cdelta = compress(delta)
1229 cdelta = compress(delta)
1221 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1230 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1222 textlen = mdiff.patchedsize(textlen, delta)
1231 textlen = mdiff.patchedsize(textlen, delta)
1223
1232
1224 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1233 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1225 # flush our writes here so we can read it in revision
1234 # flush our writes here so we can read it in revision
1226 if dfh:
1235 if dfh:
1227 dfh.flush()
1236 dfh.flush()
1228 ifh.flush()
1237 ifh.flush()
1229 text = self.revision(chain)
1238 text = self.revision(chain)
1230 if len(text) == 0:
1239 if len(text) == 0:
1231 # skip over trivial delta header
1240 # skip over trivial delta header
1232 text = buffer(delta, 12)
1241 text = buffer(delta, 12)
1233 else:
1242 else:
1234 text = mdiff.patches(text, [delta])
1243 text = mdiff.patches(text, [delta])
1235 del delta
1244 del delta
1236 chk = self._addrevision(text, transaction, link, p1, p2, None,
1245 chk = self._addrevision(text, transaction, link, p1, p2, None,
1237 ifh, dfh)
1246 ifh, dfh)
1238 if not dfh and not self._inline:
1247 if not dfh and not self._inline:
1239 # addrevision switched from inline to conventional
1248 # addrevision switched from inline to conventional
1240 # reopen the index
1249 # reopen the index
1241 dfh = self.opener(self.datafile, "a")
1250 dfh = self.opener(self.datafile, "a")
1242 ifh = self.opener(self.indexfile, "a")
1251 ifh = self.opener(self.indexfile, "a")
1243 if chk != node:
1252 if chk != node:
1244 raise RevlogError(_("consistency error adding group"))
1253 raise RevlogError(_("consistency error adding group"))
1245 textlen = len(text)
1254 textlen = len(text)
1246 else:
1255 else:
1247 e = (offset_type(end, 0), cdeltalen, textlen, base,
1256 e = (offset_type(end, 0), cdeltalen, textlen, base,
1248 link, self.rev(p1), self.rev(p2), node)
1257 link, self.rev(p1), self.rev(p2), node)
1249 self.index.insert(-1, e)
1258 self.index.insert(-1, e)
1250 self.nodemap[node] = r
1259 self.nodemap[node] = r
1251 entry = self._io.packentry(e, self.node, self.version, r)
1260 entry = self._io.packentry(e, self.node, self.version, r)
1252 if self._inline:
1261 if self._inline:
1253 ifh.write(entry)
1262 ifh.write(entry)
1254 ifh.write(cdelta[0])
1263 ifh.write(cdelta[0])
1255 ifh.write(cdelta[1])
1264 ifh.write(cdelta[1])
1256 self.checkinlinesize(transaction, ifh)
1265 self.checkinlinesize(transaction, ifh)
1257 if not self._inline:
1266 if not self._inline:
1258 dfh = self.opener(self.datafile, "a")
1267 dfh = self.opener(self.datafile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1268 ifh = self.opener(self.indexfile, "a")
1260 else:
1269 else:
1261 dfh.write(cdelta[0])
1270 dfh.write(cdelta[0])
1262 dfh.write(cdelta[1])
1271 dfh.write(cdelta[1])
1263 ifh.write(entry)
1272 ifh.write(entry)
1264
1273
1265 t, r, chain, prev = r, r + 1, node, node
1274 t, r, chain, prev = r, r + 1, node, node
1266 base = self.base(t)
1275 base = self.base(t)
1267 start = self.start(base)
1276 start = self.start(base)
1268 end = self.end(t)
1277 end = self.end(t)
1269 finally:
1278 finally:
1270 if dfh:
1279 if dfh:
1271 dfh.close()
1280 dfh.close()
1272 ifh.close()
1281 ifh.close()
1273
1282
1274 return node
1283 return node
1275
1284
1276 def strip(self, minlink):
1285 def strip(self, minlink):
1277 """truncate the revlog on the first revision with a linkrev >= minlink
1286 """truncate the revlog on the first revision with a linkrev >= minlink
1278
1287
1279 This function is called when we're stripping revision minlink and
1288 This function is called when we're stripping revision minlink and
1280 its descendants from the repository.
1289 its descendants from the repository.
1281
1290
1282 We have to remove all revisions with linkrev >= minlink, because
1291 We have to remove all revisions with linkrev >= minlink, because
1283 the equivalent changelog revisions will be renumbered after the
1292 the equivalent changelog revisions will be renumbered after the
1284 strip.
1293 strip.
1285
1294
1286 So we truncate the revlog on the first of these revisions, and
1295 So we truncate the revlog on the first of these revisions, and
1287 trust that the caller has saved the revisions that shouldn't be
1296 trust that the caller has saved the revisions that shouldn't be
1288 removed and that it'll readd them after this truncation.
1297 removed and that it'll readd them after this truncation.
1289 """
1298 """
1290 if len(self) == 0:
1299 if len(self) == 0:
1291 return
1300 return
1292
1301
1293 if isinstance(self.index, lazyindex):
1302 if isinstance(self.index, lazyindex):
1294 self._loadindexmap()
1303 self._loadindexmap()
1295
1304
1296 for rev in self:
1305 for rev in self:
1297 if self.index[rev][4] >= minlink:
1306 if self.index[rev][4] >= minlink:
1298 break
1307 break
1299 else:
1308 else:
1300 return
1309 return
1301
1310
1302 # first truncate the files on disk
1311 # first truncate the files on disk
1303 end = self.start(rev)
1312 end = self.start(rev)
1304 if not self._inline:
1313 if not self._inline:
1305 df = self.opener(self.datafile, "a")
1314 df = self.opener(self.datafile, "a")
1306 df.truncate(end)
1315 df.truncate(end)
1307 end = rev * self._io.size
1316 end = rev * self._io.size
1308 else:
1317 else:
1309 end += rev * self._io.size
1318 end += rev * self._io.size
1310
1319
1311 indexf = self.opener(self.indexfile, "a")
1320 indexf = self.opener(self.indexfile, "a")
1312 indexf.truncate(end)
1321 indexf.truncate(end)
1313
1322
1314 # then reset internal state in memory to forget those revisions
1323 # then reset internal state in memory to forget those revisions
1315 self._cache = None
1324 self._cache = None
1316 self._chunkcache = None
1325 self._chunkcache = None
1317 for x in xrange(rev, len(self)):
1326 for x in xrange(rev, len(self)):
1318 del self.nodemap[self.node(x)]
1327 del self.nodemap[self.node(x)]
1319
1328
1320 del self.index[rev:-1]
1329 del self.index[rev:-1]
1321
1330
1322 def checksize(self):
1331 def checksize(self):
1323 expected = 0
1332 expected = 0
1324 if len(self):
1333 if len(self):
1325 expected = max(0, self.end(len(self) - 1))
1334 expected = max(0, self.end(len(self) - 1))
1326
1335
1327 try:
1336 try:
1328 f = self.opener(self.datafile)
1337 f = self.opener(self.datafile)
1329 f.seek(0, 2)
1338 f.seek(0, 2)
1330 actual = f.tell()
1339 actual = f.tell()
1331 dd = actual - expected
1340 dd = actual - expected
1332 except IOError, inst:
1341 except IOError, inst:
1333 if inst.errno != errno.ENOENT:
1342 if inst.errno != errno.ENOENT:
1334 raise
1343 raise
1335 dd = 0
1344 dd = 0
1336
1345
1337 try:
1346 try:
1338 f = self.opener(self.indexfile)
1347 f = self.opener(self.indexfile)
1339 f.seek(0, 2)
1348 f.seek(0, 2)
1340 actual = f.tell()
1349 actual = f.tell()
1341 s = self._io.size
1350 s = self._io.size
1342 i = max(0, actual / s)
1351 i = max(0, actual / s)
1343 di = actual - (i * s)
1352 di = actual - (i * s)
1344 if self._inline:
1353 if self._inline:
1345 databytes = 0
1354 databytes = 0
1346 for r in self:
1355 for r in self:
1347 databytes += max(0, self.length(r))
1356 databytes += max(0, self.length(r))
1348 dd = 0
1357 dd = 0
1349 di = actual - len(self) * s - databytes
1358 di = actual - len(self) * s - databytes
1350 except IOError, inst:
1359 except IOError, inst:
1351 if inst.errno != errno.ENOENT:
1360 if inst.errno != errno.ENOENT:
1352 raise
1361 raise
1353 di = 0
1362 di = 0
1354
1363
1355 return (dd, di)
1364 return (dd, di)
1356
1365
1357 def files(self):
1366 def files(self):
1358 res = [ self.indexfile ]
1367 res = [ self.indexfile ]
1359 if not self._inline:
1368 if not self._inline:
1360 res.append(self.datafile)
1369 res.append(self.datafile)
1361 return res
1370 return res
General Comments 0
You need to be logged in to leave comments. Login now