##// END OF EJS Templates
revlog: add punched revision flag...
Vishakh H -
r11745:138c055e default
parent child Browse files
Show More
@@ -1,1400 +1,1405
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVIDX_PUNCHED_FLAG = 2
34 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG
33
35
34 # amount of data read unconditionally, should be >= 4
36 # amount of data read unconditionally, should be >= 4
35 # when not inline: threshold for using lazy index
37 # when not inline: threshold for using lazy index
36 _prereadsize = 1048576
38 _prereadsize = 1048576
37 # max size of revlog with inline data
39 # max size of revlog with inline data
38 _maxinline = 131072
40 _maxinline = 131072
39
41
40 RevlogError = error.RevlogError
42 RevlogError = error.RevlogError
41 LookupError = error.LookupError
43 LookupError = error.LookupError
42
44
43 def getoffset(q):
45 def getoffset(q):
44 return int(q >> 16)
46 return int(q >> 16)
45
47
46 def gettype(q):
48 def gettype(q):
47 return int(q & 0xFFFF)
49 return int(q & 0xFFFF)
48
50
49 def offset_type(offset, type):
51 def offset_type(offset, type):
50 return long(long(offset) << 16 | type)
52 return long(long(offset) << 16 | type)
51
53
52 nullhash = _sha(nullid)
54 nullhash = _sha(nullid)
53
55
54 def hash(text, p1, p2):
56 def hash(text, p1, p2):
55 """generate a hash from the given text and its parent hashes
57 """generate a hash from the given text and its parent hashes
56
58
57 This hash combines both the current file contents and its history
59 This hash combines both the current file contents and its history
58 in a manner that makes it easy to distinguish nodes with the same
60 in a manner that makes it easy to distinguish nodes with the same
59 content in the revision graph.
61 content in the revision graph.
60 """
62 """
61 # As of now, if one of the parent node is null, p2 is null
63 # As of now, if one of the parent node is null, p2 is null
62 if p2 == nullid:
64 if p2 == nullid:
63 # deep copy of a hash is faster than creating one
65 # deep copy of a hash is faster than creating one
64 s = nullhash.copy()
66 s = nullhash.copy()
65 s.update(p1)
67 s.update(p1)
66 else:
68 else:
67 # none of the parent nodes are nullid
69 # none of the parent nodes are nullid
68 l = [p1, p2]
70 l = [p1, p2]
69 l.sort()
71 l.sort()
70 s = _sha(l[0])
72 s = _sha(l[0])
71 s.update(l[1])
73 s.update(l[1])
72 s.update(text)
74 s.update(text)
73 return s.digest()
75 return s.digest()
74
76
75 def compress(text):
77 def compress(text):
76 """ generate a possibly-compressed representation of text """
78 """ generate a possibly-compressed representation of text """
77 if not text:
79 if not text:
78 return ("", text)
80 return ("", text)
79 l = len(text)
81 l = len(text)
80 bin = None
82 bin = None
81 if l < 44:
83 if l < 44:
82 pass
84 pass
83 elif l > 1000000:
85 elif l > 1000000:
84 # zlib makes an internal copy, thus doubling memory usage for
86 # zlib makes an internal copy, thus doubling memory usage for
85 # large files, so lets do this in pieces
87 # large files, so lets do this in pieces
86 z = zlib.compressobj()
88 z = zlib.compressobj()
87 p = []
89 p = []
88 pos = 0
90 pos = 0
89 while pos < l:
91 while pos < l:
90 pos2 = pos + 2**20
92 pos2 = pos + 2**20
91 p.append(z.compress(text[pos:pos2]))
93 p.append(z.compress(text[pos:pos2]))
92 pos = pos2
94 pos = pos2
93 p.append(z.flush())
95 p.append(z.flush())
94 if sum(map(len, p)) < l:
96 if sum(map(len, p)) < l:
95 bin = "".join(p)
97 bin = "".join(p)
96 else:
98 else:
97 bin = _compress(text)
99 bin = _compress(text)
98 if bin is None or len(bin) > l:
100 if bin is None or len(bin) > l:
99 if text[0] == '\0':
101 if text[0] == '\0':
100 return ("", text)
102 return ("", text)
101 return ('u', text)
103 return ('u', text)
102 return ("", bin)
104 return ("", bin)
103
105
104 def decompress(bin):
106 def decompress(bin):
105 """ decompress the given input """
107 """ decompress the given input """
106 if not bin:
108 if not bin:
107 return bin
109 return bin
108 t = bin[0]
110 t = bin[0]
109 if t == '\0':
111 if t == '\0':
110 return bin
112 return bin
111 if t == 'x':
113 if t == 'x':
112 return _decompress(bin)
114 return _decompress(bin)
113 if t == 'u':
115 if t == 'u':
114 return bin[1:]
116 return bin[1:]
115 raise RevlogError(_("unknown compression type %r") % t)
117 raise RevlogError(_("unknown compression type %r") % t)
116
118
117 class lazyparser(object):
119 class lazyparser(object):
118 """
120 """
119 this class avoids the need to parse the entirety of large indices
121 this class avoids the need to parse the entirety of large indices
120 """
122 """
121
123
122 # lazyparser is not safe to use on windows if win32 extensions not
124 # lazyparser is not safe to use on windows if win32 extensions not
123 # available. it keeps file handle open, which make it not possible
125 # available. it keeps file handle open, which make it not possible
124 # to break hardlinks on local cloned repos.
126 # to break hardlinks on local cloned repos.
125
127
126 def __init__(self, dataf):
128 def __init__(self, dataf):
127 try:
129 try:
128 size = util.fstat(dataf).st_size
130 size = util.fstat(dataf).st_size
129 except AttributeError:
131 except AttributeError:
130 size = 0
132 size = 0
131 self.dataf = dataf
133 self.dataf = dataf
132 self.s = struct.calcsize(indexformatng)
134 self.s = struct.calcsize(indexformatng)
133 self.datasize = size
135 self.datasize = size
134 self.l = size // self.s
136 self.l = size // self.s
135 self.index = [None] * self.l
137 self.index = [None] * self.l
136 self.map = {nullid: nullrev}
138 self.map = {nullid: nullrev}
137 self.allmap = 0
139 self.allmap = 0
138 self.all = 0
140 self.all = 0
139 self.mapfind_count = 0
141 self.mapfind_count = 0
140
142
141 def loadmap(self):
143 def loadmap(self):
142 """
144 """
143 during a commit, we need to make sure the rev being added is
145 during a commit, we need to make sure the rev being added is
144 not a duplicate. This requires loading the entire index,
146 not a duplicate. This requires loading the entire index,
145 which is fairly slow. loadmap can load up just the node map,
147 which is fairly slow. loadmap can load up just the node map,
146 which takes much less time.
148 which takes much less time.
147 """
149 """
148 if self.allmap:
150 if self.allmap:
149 return
151 return
150 end = self.datasize
152 end = self.datasize
151 self.allmap = 1
153 self.allmap = 1
152 cur = 0
154 cur = 0
153 count = 0
155 count = 0
154 blocksize = self.s * 256
156 blocksize = self.s * 256
155 self.dataf.seek(0)
157 self.dataf.seek(0)
156 while cur < end:
158 while cur < end:
157 data = self.dataf.read(blocksize)
159 data = self.dataf.read(blocksize)
158 off = 0
160 off = 0
159 for x in xrange(256):
161 for x in xrange(256):
160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
162 n = data[off + ngshaoffset:off + ngshaoffset + 20]
161 self.map[n] = count
163 self.map[n] = count
162 count += 1
164 count += 1
163 if count >= self.l:
165 if count >= self.l:
164 break
166 break
165 off += self.s
167 off += self.s
166 cur += blocksize
168 cur += blocksize
167
169
168 def loadblock(self, blockstart, blocksize, data=None):
170 def loadblock(self, blockstart, blocksize, data=None):
169 if self.all:
171 if self.all:
170 return
172 return
171 if data is None:
173 if data is None:
172 self.dataf.seek(blockstart)
174 self.dataf.seek(blockstart)
173 if blockstart + blocksize > self.datasize:
175 if blockstart + blocksize > self.datasize:
174 # the revlog may have grown since we've started running,
176 # the revlog may have grown since we've started running,
175 # but we don't have space in self.index for more entries.
177 # but we don't have space in self.index for more entries.
176 # limit blocksize so that we don't get too much data.
178 # limit blocksize so that we don't get too much data.
177 blocksize = max(self.datasize - blockstart, 0)
179 blocksize = max(self.datasize - blockstart, 0)
178 data = self.dataf.read(blocksize)
180 data = self.dataf.read(blocksize)
179 lend = len(data) // self.s
181 lend = len(data) // self.s
180 i = blockstart // self.s
182 i = blockstart // self.s
181 off = 0
183 off = 0
182 # lazyindex supports __delitem__
184 # lazyindex supports __delitem__
183 if lend > len(self.index) - i:
185 if lend > len(self.index) - i:
184 lend = len(self.index) - i
186 lend = len(self.index) - i
185 for x in xrange(lend):
187 for x in xrange(lend):
186 if self.index[i + x] is None:
188 if self.index[i + x] is None:
187 b = data[off : off + self.s]
189 b = data[off : off + self.s]
188 self.index[i + x] = b
190 self.index[i + x] = b
189 n = b[ngshaoffset:ngshaoffset + 20]
191 n = b[ngshaoffset:ngshaoffset + 20]
190 self.map[n] = i + x
192 self.map[n] = i + x
191 off += self.s
193 off += self.s
192
194
193 def findnode(self, node):
195 def findnode(self, node):
194 """search backwards through the index file for a specific node"""
196 """search backwards through the index file for a specific node"""
195 if self.allmap:
197 if self.allmap:
196 return None
198 return None
197
199
198 # hg log will cause many many searches for the manifest
200 # hg log will cause many many searches for the manifest
199 # nodes. After we get called a few times, just load the whole
201 # nodes. After we get called a few times, just load the whole
200 # thing.
202 # thing.
201 if self.mapfind_count > 8:
203 if self.mapfind_count > 8:
202 self.loadmap()
204 self.loadmap()
203 if node in self.map:
205 if node in self.map:
204 return node
206 return node
205 return None
207 return None
206 self.mapfind_count += 1
208 self.mapfind_count += 1
207 last = self.l - 1
209 last = self.l - 1
208 while self.index[last] != None:
210 while self.index[last] != None:
209 if last == 0:
211 if last == 0:
210 self.all = 1
212 self.all = 1
211 self.allmap = 1
213 self.allmap = 1
212 return None
214 return None
213 last -= 1
215 last -= 1
214 end = (last + 1) * self.s
216 end = (last + 1) * self.s
215 blocksize = self.s * 256
217 blocksize = self.s * 256
216 while end >= 0:
218 while end >= 0:
217 start = max(end - blocksize, 0)
219 start = max(end - blocksize, 0)
218 self.dataf.seek(start)
220 self.dataf.seek(start)
219 data = self.dataf.read(end - start)
221 data = self.dataf.read(end - start)
220 findend = end - start
222 findend = end - start
221 while True:
223 while True:
222 # we're searching backwards, so we have to make sure
224 # we're searching backwards, so we have to make sure
223 # we don't find a changeset where this node is a parent
225 # we don't find a changeset where this node is a parent
224 off = data.find(node, 0, findend)
226 off = data.find(node, 0, findend)
225 findend = off
227 findend = off
226 if off >= 0:
228 if off >= 0:
227 i = off / self.s
229 i = off / self.s
228 off = i * self.s
230 off = i * self.s
229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
231 n = data[off + ngshaoffset:off + ngshaoffset + 20]
230 if n == node:
232 if n == node:
231 self.map[n] = i + start / self.s
233 self.map[n] = i + start / self.s
232 return node
234 return node
233 else:
235 else:
234 break
236 break
235 end -= blocksize
237 end -= blocksize
236 return None
238 return None
237
239
238 def loadindex(self, i=None, end=None):
240 def loadindex(self, i=None, end=None):
239 if self.all:
241 if self.all:
240 return
242 return
241 all = False
243 all = False
242 if i is None:
244 if i is None:
243 blockstart = 0
245 blockstart = 0
244 blocksize = (65536 / self.s) * self.s
246 blocksize = (65536 / self.s) * self.s
245 end = self.datasize
247 end = self.datasize
246 all = True
248 all = True
247 else:
249 else:
248 if end:
250 if end:
249 blockstart = i * self.s
251 blockstart = i * self.s
250 end = end * self.s
252 end = end * self.s
251 blocksize = end - blockstart
253 blocksize = end - blockstart
252 else:
254 else:
253 blockstart = (i & ~1023) * self.s
255 blockstart = (i & ~1023) * self.s
254 blocksize = self.s * 1024
256 blocksize = self.s * 1024
255 end = blockstart + blocksize
257 end = blockstart + blocksize
256 while blockstart < end:
258 while blockstart < end:
257 self.loadblock(blockstart, blocksize)
259 self.loadblock(blockstart, blocksize)
258 blockstart += blocksize
260 blockstart += blocksize
259 if all:
261 if all:
260 self.all = True
262 self.all = True
261
263
262 class lazyindex(object):
264 class lazyindex(object):
263 """a lazy version of the index array"""
265 """a lazy version of the index array"""
264 def __init__(self, parser):
266 def __init__(self, parser):
265 self.p = parser
267 self.p = parser
266 def __len__(self):
268 def __len__(self):
267 return len(self.p.index)
269 return len(self.p.index)
268 def load(self, pos):
270 def load(self, pos):
269 if pos < 0:
271 if pos < 0:
270 pos += len(self.p.index)
272 pos += len(self.p.index)
271 self.p.loadindex(pos)
273 self.p.loadindex(pos)
272 return self.p.index[pos]
274 return self.p.index[pos]
273 def __getitem__(self, pos):
275 def __getitem__(self, pos):
274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
276 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
275 def __setitem__(self, pos, item):
277 def __setitem__(self, pos, item):
276 self.p.index[pos] = _pack(indexformatng, *item)
278 self.p.index[pos] = _pack(indexformatng, *item)
277 def __delitem__(self, pos):
279 def __delitem__(self, pos):
278 del self.p.index[pos]
280 del self.p.index[pos]
279 def insert(self, pos, e):
281 def insert(self, pos, e):
280 self.p.index.insert(pos, _pack(indexformatng, *e))
282 self.p.index.insert(pos, _pack(indexformatng, *e))
281 def append(self, e):
283 def append(self, e):
282 self.p.index.append(_pack(indexformatng, *e))
284 self.p.index.append(_pack(indexformatng, *e))
283
285
284 class lazymap(object):
286 class lazymap(object):
285 """a lazy version of the node map"""
287 """a lazy version of the node map"""
286 def __init__(self, parser):
288 def __init__(self, parser):
287 self.p = parser
289 self.p = parser
288 def load(self, key):
290 def load(self, key):
289 n = self.p.findnode(key)
291 n = self.p.findnode(key)
290 if n is None:
292 if n is None:
291 raise KeyError(key)
293 raise KeyError(key)
292 def __contains__(self, key):
294 def __contains__(self, key):
293 if key in self.p.map:
295 if key in self.p.map:
294 return True
296 return True
295 self.p.loadmap()
297 self.p.loadmap()
296 return key in self.p.map
298 return key in self.p.map
297 def __iter__(self):
299 def __iter__(self):
298 yield nullid
300 yield nullid
299 for i, ret in enumerate(self.p.index):
301 for i, ret in enumerate(self.p.index):
300 if not ret:
302 if not ret:
301 self.p.loadindex(i)
303 self.p.loadindex(i)
302 ret = self.p.index[i]
304 ret = self.p.index[i]
303 if isinstance(ret, str):
305 if isinstance(ret, str):
304 ret = _unpack(indexformatng, ret)
306 ret = _unpack(indexformatng, ret)
305 yield ret[7]
307 yield ret[7]
306 def __getitem__(self, key):
308 def __getitem__(self, key):
307 try:
309 try:
308 return self.p.map[key]
310 return self.p.map[key]
309 except KeyError:
311 except KeyError:
310 try:
312 try:
311 self.load(key)
313 self.load(key)
312 return self.p.map[key]
314 return self.p.map[key]
313 except KeyError:
315 except KeyError:
314 raise KeyError("node " + hex(key))
316 raise KeyError("node " + hex(key))
315 def __setitem__(self, key, val):
317 def __setitem__(self, key, val):
316 self.p.map[key] = val
318 self.p.map[key] = val
317 def __delitem__(self, key):
319 def __delitem__(self, key):
318 del self.p.map[key]
320 del self.p.map[key]
319
321
320 indexformatv0 = ">4l20s20s20s"
322 indexformatv0 = ">4l20s20s20s"
321 v0shaoffset = 56
323 v0shaoffset = 56
322
324
323 class revlogoldio(object):
325 class revlogoldio(object):
324 def __init__(self):
326 def __init__(self):
325 self.size = struct.calcsize(indexformatv0)
327 self.size = struct.calcsize(indexformatv0)
326
328
327 def parseindex(self, fp, data, inline):
329 def parseindex(self, fp, data, inline):
328 s = self.size
330 s = self.size
329 index = []
331 index = []
330 nodemap = {nullid: nullrev}
332 nodemap = {nullid: nullrev}
331 n = off = 0
333 n = off = 0
332 if len(data) == _prereadsize:
334 if len(data) == _prereadsize:
333 data += fp.read() # read the rest
335 data += fp.read() # read the rest
334 l = len(data)
336 l = len(data)
335 while off + s <= l:
337 while off + s <= l:
336 cur = data[off:off + s]
338 cur = data[off:off + s]
337 off += s
339 off += s
338 e = _unpack(indexformatv0, cur)
340 e = _unpack(indexformatv0, cur)
339 # transform to revlogv1 format
341 # transform to revlogv1 format
340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
342 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
343 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
342 index.append(e2)
344 index.append(e2)
343 nodemap[e[6]] = n
345 nodemap[e[6]] = n
344 n += 1
346 n += 1
345
347
346 return index, nodemap, None
348 return index, nodemap, None
347
349
348 def packentry(self, entry, node, version, rev):
350 def packentry(self, entry, node, version, rev):
349 if gettype(entry[0]):
351 if gettype(entry[0]):
350 raise RevlogError(_("index entry flags need RevlogNG"))
352 raise RevlogError(_("index entry flags need RevlogNG"))
351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
353 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
352 node(entry[5]), node(entry[6]), entry[7])
354 node(entry[5]), node(entry[6]), entry[7])
353 return _pack(indexformatv0, *e2)
355 return _pack(indexformatv0, *e2)
354
356
355 # index ng:
357 # index ng:
356 # 6 bytes: offset
358 # 6 bytes: offset
357 # 2 bytes: flags
359 # 2 bytes: flags
358 # 4 bytes: compressed length
360 # 4 bytes: compressed length
359 # 4 bytes: uncompressed length
361 # 4 bytes: uncompressed length
360 # 4 bytes: base rev
362 # 4 bytes: base rev
361 # 4 bytes: link rev
363 # 4 bytes: link rev
362 # 4 bytes: parent 1 rev
364 # 4 bytes: parent 1 rev
363 # 4 bytes: parent 2 rev
365 # 4 bytes: parent 2 rev
364 # 32 bytes: nodeid
366 # 32 bytes: nodeid
365 indexformatng = ">Qiiiiii20s12x"
367 indexformatng = ">Qiiiiii20s12x"
366 ngshaoffset = 32
368 ngshaoffset = 32
367 versionformat = ">I"
369 versionformat = ">I"
368
370
369 class revlogio(object):
371 class revlogio(object):
370 def __init__(self):
372 def __init__(self):
371 self.size = struct.calcsize(indexformatng)
373 self.size = struct.calcsize(indexformatng)
372
374
373 def parseindex(self, fp, data, inline):
375 def parseindex(self, fp, data, inline):
374 if len(data) == _prereadsize:
376 if len(data) == _prereadsize:
375 if util.openhardlinks() and not inline:
377 if util.openhardlinks() and not inline:
376 # big index, let's parse it on demand
378 # big index, let's parse it on demand
377 parser = lazyparser(fp)
379 parser = lazyparser(fp)
378 index = lazyindex(parser)
380 index = lazyindex(parser)
379 nodemap = lazymap(parser)
381 nodemap = lazymap(parser)
380 e = list(index[0])
382 e = list(index[0])
381 type = gettype(e[0])
383 type = gettype(e[0])
382 e[0] = offset_type(0, type)
384 e[0] = offset_type(0, type)
383 index[0] = e
385 index[0] = e
384 return index, nodemap, None
386 return index, nodemap, None
385 else:
387 else:
386 data += fp.read()
388 data += fp.read()
387
389
388 # call the C implementation to parse the index data
390 # call the C implementation to parse the index data
389 index, nodemap, cache = parsers.parse_index(data, inline)
391 index, nodemap, cache = parsers.parse_index(data, inline)
390 return index, nodemap, cache
392 return index, nodemap, cache
391
393
392 def packentry(self, entry, node, version, rev):
394 def packentry(self, entry, node, version, rev):
393 p = _pack(indexformatng, *entry)
395 p = _pack(indexformatng, *entry)
394 if rev == 0:
396 if rev == 0:
395 p = _pack(versionformat, version) + p[4:]
397 p = _pack(versionformat, version) + p[4:]
396 return p
398 return p
397
399
398 class revlog(object):
400 class revlog(object):
399 """
401 """
400 the underlying revision storage object
402 the underlying revision storage object
401
403
402 A revlog consists of two parts, an index and the revision data.
404 A revlog consists of two parts, an index and the revision data.
403
405
404 The index is a file with a fixed record size containing
406 The index is a file with a fixed record size containing
405 information on each revision, including its nodeid (hash), the
407 information on each revision, including its nodeid (hash), the
406 nodeids of its parents, the position and offset of its data within
408 nodeids of its parents, the position and offset of its data within
407 the data file, and the revision it's based on. Finally, each entry
409 the data file, and the revision it's based on. Finally, each entry
408 contains a linkrev entry that can serve as a pointer to external
410 contains a linkrev entry that can serve as a pointer to external
409 data.
411 data.
410
412
411 The revision data itself is a linear collection of data chunks.
413 The revision data itself is a linear collection of data chunks.
412 Each chunk represents a revision and is usually represented as a
414 Each chunk represents a revision and is usually represented as a
413 delta against the previous chunk. To bound lookup time, runs of
415 delta against the previous chunk. To bound lookup time, runs of
414 deltas are limited to about 2 times the length of the original
416 deltas are limited to about 2 times the length of the original
415 version data. This makes retrieval of a version proportional to
417 version data. This makes retrieval of a version proportional to
416 its size, or O(1) relative to the number of revisions.
418 its size, or O(1) relative to the number of revisions.
417
419
418 Both pieces of the revlog are written to in an append-only
420 Both pieces of the revlog are written to in an append-only
419 fashion, which means we never need to rewrite a file to insert or
421 fashion, which means we never need to rewrite a file to insert or
420 remove data, and can use some simple techniques to avoid the need
422 remove data, and can use some simple techniques to avoid the need
421 for locking while reading.
423 for locking while reading.
422 """
424 """
423 def __init__(self, opener, indexfile):
425 def __init__(self, opener, indexfile):
424 """
426 """
425 create a revlog object
427 create a revlog object
426
428
427 opener is a function that abstracts the file opening operation
429 opener is a function that abstracts the file opening operation
428 and can be used to implement COW semantics or the like.
430 and can be used to implement COW semantics or the like.
429 """
431 """
430 self.indexfile = indexfile
432 self.indexfile = indexfile
431 self.datafile = indexfile[:-2] + ".d"
433 self.datafile = indexfile[:-2] + ".d"
432 self.opener = opener
434 self.opener = opener
433 self._cache = None
435 self._cache = None
434 self._chunkcache = (0, '')
436 self._chunkcache = (0, '')
435 self.nodemap = {nullid: nullrev}
437 self.nodemap = {nullid: nullrev}
436 self.index = []
438 self.index = []
437
439
438 v = REVLOG_DEFAULT_VERSION
440 v = REVLOG_DEFAULT_VERSION
439 if hasattr(opener, 'options') and 'defversion' in opener.options:
441 if hasattr(opener, 'options') and 'defversion' in opener.options:
440 v = opener.options['defversion']
442 v = opener.options['defversion']
441 if v & REVLOGNG:
443 if v & REVLOGNG:
442 v |= REVLOGNGINLINEDATA
444 v |= REVLOGNGINLINEDATA
443
445
444 i = ''
446 i = ''
445 try:
447 try:
446 f = self.opener(self.indexfile)
448 f = self.opener(self.indexfile)
447 if "nonlazy" in getattr(self.opener, 'options', {}):
449 if "nonlazy" in getattr(self.opener, 'options', {}):
448 i = f.read()
450 i = f.read()
449 else:
451 else:
450 i = f.read(_prereadsize)
452 i = f.read(_prereadsize)
451 if len(i) > 0:
453 if len(i) > 0:
452 v = struct.unpack(versionformat, i[:4])[0]
454 v = struct.unpack(versionformat, i[:4])[0]
453 except IOError, inst:
455 except IOError, inst:
454 if inst.errno != errno.ENOENT:
456 if inst.errno != errno.ENOENT:
455 raise
457 raise
456
458
457 self.version = v
459 self.version = v
458 self._inline = v & REVLOGNGINLINEDATA
460 self._inline = v & REVLOGNGINLINEDATA
459 flags = v & ~0xFFFF
461 flags = v & ~0xFFFF
460 fmt = v & 0xFFFF
462 fmt = v & 0xFFFF
461 if fmt == REVLOGV0 and flags:
463 if fmt == REVLOGV0 and flags:
462 raise RevlogError(_("index %s unknown flags %#04x for format v0")
464 raise RevlogError(_("index %s unknown flags %#04x for format v0")
463 % (self.indexfile, flags >> 16))
465 % (self.indexfile, flags >> 16))
464 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
466 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
465 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
467 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
466 % (self.indexfile, flags >> 16))
468 % (self.indexfile, flags >> 16))
467 elif fmt > REVLOGNG:
469 elif fmt > REVLOGNG:
468 raise RevlogError(_("index %s unknown format %d")
470 raise RevlogError(_("index %s unknown format %d")
469 % (self.indexfile, fmt))
471 % (self.indexfile, fmt))
470
472
471 self._io = revlogio()
473 self._io = revlogio()
472 if self.version == REVLOGV0:
474 if self.version == REVLOGV0:
473 self._io = revlogoldio()
475 self._io = revlogoldio()
474 if i:
476 if i:
475 try:
477 try:
476 d = self._io.parseindex(f, i, self._inline)
478 d = self._io.parseindex(f, i, self._inline)
477 except (ValueError, IndexError):
479 except (ValueError, IndexError):
478 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
480 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
479 self.index, self.nodemap, self._chunkcache = d
481 self.index, self.nodemap, self._chunkcache = d
480 if not self._chunkcache:
482 if not self._chunkcache:
481 self._chunkclear()
483 self._chunkclear()
482
484
483 # add the magic null revision at -1 (if it hasn't been done already)
485 # add the magic null revision at -1 (if it hasn't been done already)
484 if (self.index == [] or isinstance(self.index, lazyindex) or
486 if (self.index == [] or isinstance(self.index, lazyindex) or
485 self.index[-1][7] != nullid) :
487 self.index[-1][7] != nullid) :
486 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
488 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
487
489
488 def _loadindex(self, start, end):
490 def _loadindex(self, start, end):
489 """load a block of indexes all at once from the lazy parser"""
491 """load a block of indexes all at once from the lazy parser"""
490 if isinstance(self.index, lazyindex):
492 if isinstance(self.index, lazyindex):
491 self.index.p.loadindex(start, end)
493 self.index.p.loadindex(start, end)
492
494
493 def _loadindexmap(self):
495 def _loadindexmap(self):
494 """loads both the map and the index from the lazy parser"""
496 """loads both the map and the index from the lazy parser"""
495 if isinstance(self.index, lazyindex):
497 if isinstance(self.index, lazyindex):
496 p = self.index.p
498 p = self.index.p
497 p.loadindex()
499 p.loadindex()
498 self.nodemap = p.map
500 self.nodemap = p.map
499
501
500 def _loadmap(self):
502 def _loadmap(self):
501 """loads the map from the lazy parser"""
503 """loads the map from the lazy parser"""
502 if isinstance(self.nodemap, lazymap):
504 if isinstance(self.nodemap, lazymap):
503 self.nodemap.p.loadmap()
505 self.nodemap.p.loadmap()
504 self.nodemap = self.nodemap.p.map
506 self.nodemap = self.nodemap.p.map
505
507
506 def tip(self):
508 def tip(self):
507 return self.node(len(self.index) - 2)
509 return self.node(len(self.index) - 2)
508 def __len__(self):
510 def __len__(self):
509 return len(self.index) - 1
511 return len(self.index) - 1
510 def __iter__(self):
512 def __iter__(self):
511 for i in xrange(len(self)):
513 for i in xrange(len(self)):
512 yield i
514 yield i
513 def rev(self, node):
515 def rev(self, node):
514 try:
516 try:
515 return self.nodemap[node]
517 return self.nodemap[node]
516 except KeyError:
518 except KeyError:
517 raise LookupError(node, self.indexfile, _('no node'))
519 raise LookupError(node, self.indexfile, _('no node'))
518 def node(self, rev):
520 def node(self, rev):
519 return self.index[rev][7]
521 return self.index[rev][7]
520 def linkrev(self, rev):
522 def linkrev(self, rev):
521 return self.index[rev][4]
523 return self.index[rev][4]
522 def parents(self, node):
524 def parents(self, node):
523 i = self.index
525 i = self.index
524 d = i[self.rev(node)]
526 d = i[self.rev(node)]
525 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
527 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
526 def parentrevs(self, rev):
528 def parentrevs(self, rev):
527 return self.index[rev][5:7]
529 return self.index[rev][5:7]
528 def start(self, rev):
530 def start(self, rev):
529 return int(self.index[rev][0] >> 16)
531 return int(self.index[rev][0] >> 16)
530 def end(self, rev):
532 def end(self, rev):
531 return self.start(rev) + self.length(rev)
533 return self.start(rev) + self.length(rev)
532 def length(self, rev):
534 def length(self, rev):
533 return self.index[rev][1]
535 return self.index[rev][1]
534 def base(self, rev):
536 def base(self, rev):
535 return self.index[rev][3]
537 return self.index[rev][3]
536 def flags(self, rev):
538 def flags(self, rev):
537 return self.index[rev][0] & 0xFFFF
539 return self.index[rev][0] & 0xFFFF
538
540
539 def size(self, rev):
541 def size(self, rev):
540 """return the length of the uncompressed text for a given revision"""
542 """return the length of the uncompressed text for a given revision"""
541 l = self.index[rev][2]
543 l = self.index[rev][2]
542 if l >= 0:
544 if l >= 0:
543 return l
545 return l
544
546
545 t = self.revision(self.node(rev))
547 t = self.revision(self.node(rev))
546 return len(t)
548 return len(t)
547
549
548 def reachable(self, node, stop=None):
550 def reachable(self, node, stop=None):
549 """return the set of all nodes ancestral to a given node, including
551 """return the set of all nodes ancestral to a given node, including
550 the node itself, stopping when stop is matched"""
552 the node itself, stopping when stop is matched"""
551 reachable = set((node,))
553 reachable = set((node,))
552 visit = [node]
554 visit = [node]
553 if stop:
555 if stop:
554 stopn = self.rev(stop)
556 stopn = self.rev(stop)
555 else:
557 else:
556 stopn = 0
558 stopn = 0
557 while visit:
559 while visit:
558 n = visit.pop(0)
560 n = visit.pop(0)
559 if n == stop:
561 if n == stop:
560 continue
562 continue
561 if n == nullid:
563 if n == nullid:
562 continue
564 continue
563 for p in self.parents(n):
565 for p in self.parents(n):
564 if self.rev(p) < stopn:
566 if self.rev(p) < stopn:
565 continue
567 continue
566 if p not in reachable:
568 if p not in reachable:
567 reachable.add(p)
569 reachable.add(p)
568 visit.append(p)
570 visit.append(p)
569 return reachable
571 return reachable
570
572
571 def ancestors(self, *revs):
573 def ancestors(self, *revs):
572 """Generate the ancestors of 'revs' in reverse topological order.
574 """Generate the ancestors of 'revs' in reverse topological order.
573
575
574 Yield a sequence of revision numbers starting with the parents
576 Yield a sequence of revision numbers starting with the parents
575 of each revision in revs, i.e., each revision is *not* considered
577 of each revision in revs, i.e., each revision is *not* considered
576 an ancestor of itself. Results are in breadth-first order:
578 an ancestor of itself. Results are in breadth-first order:
577 parents of each rev in revs, then parents of those, etc. Result
579 parents of each rev in revs, then parents of those, etc. Result
578 does not include the null revision."""
580 does not include the null revision."""
579 visit = list(revs)
581 visit = list(revs)
580 seen = set([nullrev])
582 seen = set([nullrev])
581 while visit:
583 while visit:
582 for parent in self.parentrevs(visit.pop(0)):
584 for parent in self.parentrevs(visit.pop(0)):
583 if parent not in seen:
585 if parent not in seen:
584 visit.append(parent)
586 visit.append(parent)
585 seen.add(parent)
587 seen.add(parent)
586 yield parent
588 yield parent
587
589
588 def descendants(self, *revs):
590 def descendants(self, *revs):
589 """Generate the descendants of 'revs' in revision order.
591 """Generate the descendants of 'revs' in revision order.
590
592
591 Yield a sequence of revision numbers starting with a child of
593 Yield a sequence of revision numbers starting with a child of
592 some rev in revs, i.e., each revision is *not* considered a
594 some rev in revs, i.e., each revision is *not* considered a
593 descendant of itself. Results are ordered by revision number (a
595 descendant of itself. Results are ordered by revision number (a
594 topological sort)."""
596 topological sort)."""
595 seen = set(revs)
597 seen = set(revs)
596 for i in xrange(min(revs) + 1, len(self)):
598 for i in xrange(min(revs) + 1, len(self)):
597 for x in self.parentrevs(i):
599 for x in self.parentrevs(i):
598 if x != nullrev and x in seen:
600 if x != nullrev and x in seen:
599 seen.add(i)
601 seen.add(i)
600 yield i
602 yield i
601 break
603 break
602
604
603 def findmissing(self, common=None, heads=None):
605 def findmissing(self, common=None, heads=None):
604 """Return the ancestors of heads that are not ancestors of common.
606 """Return the ancestors of heads that are not ancestors of common.
605
607
606 More specifically, return a list of nodes N such that every N
608 More specifically, return a list of nodes N such that every N
607 satisfies the following constraints:
609 satisfies the following constraints:
608
610
609 1. N is an ancestor of some node in 'heads'
611 1. N is an ancestor of some node in 'heads'
610 2. N is not an ancestor of any node in 'common'
612 2. N is not an ancestor of any node in 'common'
611
613
612 The list is sorted by revision number, meaning it is
614 The list is sorted by revision number, meaning it is
613 topologically sorted.
615 topologically sorted.
614
616
615 'heads' and 'common' are both lists of node IDs. If heads is
617 'heads' and 'common' are both lists of node IDs. If heads is
616 not supplied, uses all of the revlog's heads. If common is not
618 not supplied, uses all of the revlog's heads. If common is not
617 supplied, uses nullid."""
619 supplied, uses nullid."""
618 if common is None:
620 if common is None:
619 common = [nullid]
621 common = [nullid]
620 if heads is None:
622 if heads is None:
621 heads = self.heads()
623 heads = self.heads()
622
624
623 common = [self.rev(n) for n in common]
625 common = [self.rev(n) for n in common]
624 heads = [self.rev(n) for n in heads]
626 heads = [self.rev(n) for n in heads]
625
627
626 # we want the ancestors, but inclusive
628 # we want the ancestors, but inclusive
627 has = set(self.ancestors(*common))
629 has = set(self.ancestors(*common))
628 has.add(nullrev)
630 has.add(nullrev)
629 has.update(common)
631 has.update(common)
630
632
631 # take all ancestors from heads that aren't in has
633 # take all ancestors from heads that aren't in has
632 missing = set()
634 missing = set()
633 visit = [r for r in heads if r not in has]
635 visit = [r for r in heads if r not in has]
634 while visit:
636 while visit:
635 r = visit.pop(0)
637 r = visit.pop(0)
636 if r in missing:
638 if r in missing:
637 continue
639 continue
638 else:
640 else:
639 missing.add(r)
641 missing.add(r)
640 for p in self.parentrevs(r):
642 for p in self.parentrevs(r):
641 if p not in has:
643 if p not in has:
642 visit.append(p)
644 visit.append(p)
643 missing = list(missing)
645 missing = list(missing)
644 missing.sort()
646 missing.sort()
645 return [self.node(r) for r in missing]
647 return [self.node(r) for r in missing]
646
648
647 def nodesbetween(self, roots=None, heads=None):
649 def nodesbetween(self, roots=None, heads=None):
648 """Return a topological path from 'roots' to 'heads'.
650 """Return a topological path from 'roots' to 'heads'.
649
651
650 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
651 topologically sorted list of all nodes N that satisfy both of
653 topologically sorted list of all nodes N that satisfy both of
652 these constraints:
654 these constraints:
653
655
654 1. N is a descendant of some node in 'roots'
656 1. N is a descendant of some node in 'roots'
655 2. N is an ancestor of some node in 'heads'
657 2. N is an ancestor of some node in 'heads'
656
658
657 Every node is considered to be both a descendant and an ancestor
659 Every node is considered to be both a descendant and an ancestor
658 of itself, so every reachable node in 'roots' and 'heads' will be
660 of itself, so every reachable node in 'roots' and 'heads' will be
659 included in 'nodes'.
661 included in 'nodes'.
660
662
661 'outroots' is the list of reachable nodes in 'roots', i.e., the
663 'outroots' is the list of reachable nodes in 'roots', i.e., the
662 subset of 'roots' that is returned in 'nodes'. Likewise,
664 subset of 'roots' that is returned in 'nodes'. Likewise,
663 'outheads' is the subset of 'heads' that is also in 'nodes'.
665 'outheads' is the subset of 'heads' that is also in 'nodes'.
664
666
665 'roots' and 'heads' are both lists of node IDs. If 'roots' is
667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
666 unspecified, uses nullid as the only root. If 'heads' is
668 unspecified, uses nullid as the only root. If 'heads' is
667 unspecified, uses list of all of the revlog's heads."""
669 unspecified, uses list of all of the revlog's heads."""
668 nonodes = ([], [], [])
670 nonodes = ([], [], [])
669 if roots is not None:
671 if roots is not None:
670 roots = list(roots)
672 roots = list(roots)
671 if not roots:
673 if not roots:
672 return nonodes
674 return nonodes
673 lowestrev = min([self.rev(n) for n in roots])
675 lowestrev = min([self.rev(n) for n in roots])
674 else:
676 else:
675 roots = [nullid] # Everybody's a descendent of nullid
677 roots = [nullid] # Everybody's a descendent of nullid
676 lowestrev = nullrev
678 lowestrev = nullrev
677 if (lowestrev == nullrev) and (heads is None):
679 if (lowestrev == nullrev) and (heads is None):
678 # We want _all_ the nodes!
680 # We want _all_ the nodes!
679 return ([self.node(r) for r in self], [nullid], list(self.heads()))
681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
680 if heads is None:
682 if heads is None:
681 # All nodes are ancestors, so the latest ancestor is the last
683 # All nodes are ancestors, so the latest ancestor is the last
682 # node.
684 # node.
683 highestrev = len(self) - 1
685 highestrev = len(self) - 1
684 # Set ancestors to None to signal that every node is an ancestor.
686 # Set ancestors to None to signal that every node is an ancestor.
685 ancestors = None
687 ancestors = None
686 # Set heads to an empty dictionary for later discovery of heads
688 # Set heads to an empty dictionary for later discovery of heads
687 heads = {}
689 heads = {}
688 else:
690 else:
689 heads = list(heads)
691 heads = list(heads)
690 if not heads:
692 if not heads:
691 return nonodes
693 return nonodes
692 ancestors = set()
694 ancestors = set()
693 # Turn heads into a dictionary so we can remove 'fake' heads.
695 # Turn heads into a dictionary so we can remove 'fake' heads.
694 # Also, later we will be using it to filter out the heads we can't
696 # Also, later we will be using it to filter out the heads we can't
695 # find from roots.
697 # find from roots.
696 heads = dict.fromkeys(heads, 0)
698 heads = dict.fromkeys(heads, 0)
697 # Start at the top and keep marking parents until we're done.
699 # Start at the top and keep marking parents until we're done.
698 nodestotag = set(heads)
700 nodestotag = set(heads)
699 # Remember where the top was so we can use it as a limit later.
701 # Remember where the top was so we can use it as a limit later.
700 highestrev = max([self.rev(n) for n in nodestotag])
702 highestrev = max([self.rev(n) for n in nodestotag])
701 while nodestotag:
703 while nodestotag:
702 # grab a node to tag
704 # grab a node to tag
703 n = nodestotag.pop()
705 n = nodestotag.pop()
704 # Never tag nullid
706 # Never tag nullid
705 if n == nullid:
707 if n == nullid:
706 continue
708 continue
707 # A node's revision number represents its place in a
709 # A node's revision number represents its place in a
708 # topologically sorted list of nodes.
710 # topologically sorted list of nodes.
709 r = self.rev(n)
711 r = self.rev(n)
710 if r >= lowestrev:
712 if r >= lowestrev:
711 if n not in ancestors:
713 if n not in ancestors:
712 # If we are possibly a descendent of one of the roots
714 # If we are possibly a descendent of one of the roots
713 # and we haven't already been marked as an ancestor
715 # and we haven't already been marked as an ancestor
714 ancestors.add(n) # Mark as ancestor
716 ancestors.add(n) # Mark as ancestor
715 # Add non-nullid parents to list of nodes to tag.
717 # Add non-nullid parents to list of nodes to tag.
716 nodestotag.update([p for p in self.parents(n) if
718 nodestotag.update([p for p in self.parents(n) if
717 p != nullid])
719 p != nullid])
718 elif n in heads: # We've seen it before, is it a fake head?
720 elif n in heads: # We've seen it before, is it a fake head?
719 # So it is, real heads should not be the ancestors of
721 # So it is, real heads should not be the ancestors of
720 # any other heads.
722 # any other heads.
721 heads.pop(n)
723 heads.pop(n)
722 if not ancestors:
724 if not ancestors:
723 return nonodes
725 return nonodes
724 # Now that we have our set of ancestors, we want to remove any
726 # Now that we have our set of ancestors, we want to remove any
725 # roots that are not ancestors.
727 # roots that are not ancestors.
726
728
727 # If one of the roots was nullid, everything is included anyway.
729 # If one of the roots was nullid, everything is included anyway.
728 if lowestrev > nullrev:
730 if lowestrev > nullrev:
729 # But, since we weren't, let's recompute the lowest rev to not
731 # But, since we weren't, let's recompute the lowest rev to not
730 # include roots that aren't ancestors.
732 # include roots that aren't ancestors.
731
733
732 # Filter out roots that aren't ancestors of heads
734 # Filter out roots that aren't ancestors of heads
733 roots = [n for n in roots if n in ancestors]
735 roots = [n for n in roots if n in ancestors]
734 # Recompute the lowest revision
736 # Recompute the lowest revision
735 if roots:
737 if roots:
736 lowestrev = min([self.rev(n) for n in roots])
738 lowestrev = min([self.rev(n) for n in roots])
737 else:
739 else:
738 # No more roots? Return empty list
740 # No more roots? Return empty list
739 return nonodes
741 return nonodes
740 else:
742 else:
741 # We are descending from nullid, and don't need to care about
743 # We are descending from nullid, and don't need to care about
742 # any other roots.
744 # any other roots.
743 lowestrev = nullrev
745 lowestrev = nullrev
744 roots = [nullid]
746 roots = [nullid]
745 # Transform our roots list into a set.
747 # Transform our roots list into a set.
746 descendents = set(roots)
748 descendents = set(roots)
747 # Also, keep the original roots so we can filter out roots that aren't
749 # Also, keep the original roots so we can filter out roots that aren't
748 # 'real' roots (i.e. are descended from other roots).
750 # 'real' roots (i.e. are descended from other roots).
749 roots = descendents.copy()
751 roots = descendents.copy()
750 # Our topologically sorted list of output nodes.
752 # Our topologically sorted list of output nodes.
751 orderedout = []
753 orderedout = []
752 # Don't start at nullid since we don't want nullid in our output list,
754 # Don't start at nullid since we don't want nullid in our output list,
753 # and if nullid shows up in descedents, empty parents will look like
755 # and if nullid shows up in descedents, empty parents will look like
754 # they're descendents.
756 # they're descendents.
755 for r in xrange(max(lowestrev, 0), highestrev + 1):
757 for r in xrange(max(lowestrev, 0), highestrev + 1):
756 n = self.node(r)
758 n = self.node(r)
757 isdescendent = False
759 isdescendent = False
758 if lowestrev == nullrev: # Everybody is a descendent of nullid
760 if lowestrev == nullrev: # Everybody is a descendent of nullid
759 isdescendent = True
761 isdescendent = True
760 elif n in descendents:
762 elif n in descendents:
761 # n is already a descendent
763 # n is already a descendent
762 isdescendent = True
764 isdescendent = True
763 # This check only needs to be done here because all the roots
765 # This check only needs to be done here because all the roots
764 # will start being marked is descendents before the loop.
766 # will start being marked is descendents before the loop.
765 if n in roots:
767 if n in roots:
766 # If n was a root, check if it's a 'real' root.
768 # If n was a root, check if it's a 'real' root.
767 p = tuple(self.parents(n))
769 p = tuple(self.parents(n))
768 # If any of its parents are descendents, it's not a root.
770 # If any of its parents are descendents, it's not a root.
769 if (p[0] in descendents) or (p[1] in descendents):
771 if (p[0] in descendents) or (p[1] in descendents):
770 roots.remove(n)
772 roots.remove(n)
771 else:
773 else:
772 p = tuple(self.parents(n))
774 p = tuple(self.parents(n))
773 # A node is a descendent if either of its parents are
775 # A node is a descendent if either of its parents are
774 # descendents. (We seeded the dependents list with the roots
776 # descendents. (We seeded the dependents list with the roots
775 # up there, remember?)
777 # up there, remember?)
776 if (p[0] in descendents) or (p[1] in descendents):
778 if (p[0] in descendents) or (p[1] in descendents):
777 descendents.add(n)
779 descendents.add(n)
778 isdescendent = True
780 isdescendent = True
779 if isdescendent and ((ancestors is None) or (n in ancestors)):
781 if isdescendent and ((ancestors is None) or (n in ancestors)):
780 # Only include nodes that are both descendents and ancestors.
782 # Only include nodes that are both descendents and ancestors.
781 orderedout.append(n)
783 orderedout.append(n)
782 if (ancestors is not None) and (n in heads):
784 if (ancestors is not None) and (n in heads):
783 # We're trying to figure out which heads are reachable
785 # We're trying to figure out which heads are reachable
784 # from roots.
786 # from roots.
785 # Mark this head as having been reached
787 # Mark this head as having been reached
786 heads[n] = 1
788 heads[n] = 1
787 elif ancestors is None:
789 elif ancestors is None:
788 # Otherwise, we're trying to discover the heads.
790 # Otherwise, we're trying to discover the heads.
789 # Assume this is a head because if it isn't, the next step
791 # Assume this is a head because if it isn't, the next step
790 # will eventually remove it.
792 # will eventually remove it.
791 heads[n] = 1
793 heads[n] = 1
792 # But, obviously its parents aren't.
794 # But, obviously its parents aren't.
793 for p in self.parents(n):
795 for p in self.parents(n):
794 heads.pop(p, None)
796 heads.pop(p, None)
795 heads = [n for n in heads.iterkeys() if heads[n] != 0]
797 heads = [n for n in heads.iterkeys() if heads[n] != 0]
796 roots = list(roots)
798 roots = list(roots)
797 assert orderedout
799 assert orderedout
798 assert roots
800 assert roots
799 assert heads
801 assert heads
800 return (orderedout, roots, heads)
802 return (orderedout, roots, heads)
801
803
802 def heads(self, start=None, stop=None):
804 def heads(self, start=None, stop=None):
803 """return the list of all nodes that have no children
805 """return the list of all nodes that have no children
804
806
805 if start is specified, only heads that are descendants of
807 if start is specified, only heads that are descendants of
806 start will be returned
808 start will be returned
807 if stop is specified, it will consider all the revs from stop
809 if stop is specified, it will consider all the revs from stop
808 as if they had no children
810 as if they had no children
809 """
811 """
810 if start is None and stop is None:
812 if start is None and stop is None:
811 count = len(self)
813 count = len(self)
812 if not count:
814 if not count:
813 return [nullid]
815 return [nullid]
814 ishead = [1] * (count + 1)
816 ishead = [1] * (count + 1)
815 index = self.index
817 index = self.index
816 for r in xrange(count):
818 for r in xrange(count):
817 e = index[r]
819 e = index[r]
818 ishead[e[5]] = ishead[e[6]] = 0
820 ishead[e[5]] = ishead[e[6]] = 0
819 return [self.node(r) for r in xrange(count) if ishead[r]]
821 return [self.node(r) for r in xrange(count) if ishead[r]]
820
822
821 if start is None:
823 if start is None:
822 start = nullid
824 start = nullid
823 if stop is None:
825 if stop is None:
824 stop = []
826 stop = []
825 stoprevs = set([self.rev(n) for n in stop])
827 stoprevs = set([self.rev(n) for n in stop])
826 startrev = self.rev(start)
828 startrev = self.rev(start)
827 reachable = set((startrev,))
829 reachable = set((startrev,))
828 heads = set((startrev,))
830 heads = set((startrev,))
829
831
830 parentrevs = self.parentrevs
832 parentrevs = self.parentrevs
831 for r in xrange(startrev + 1, len(self)):
833 for r in xrange(startrev + 1, len(self)):
832 for p in parentrevs(r):
834 for p in parentrevs(r):
833 if p in reachable:
835 if p in reachable:
834 if r not in stoprevs:
836 if r not in stoprevs:
835 reachable.add(r)
837 reachable.add(r)
836 heads.add(r)
838 heads.add(r)
837 if p in heads and p not in stoprevs:
839 if p in heads and p not in stoprevs:
838 heads.remove(p)
840 heads.remove(p)
839
841
840 return [self.node(r) for r in heads]
842 return [self.node(r) for r in heads]
841
843
842 def children(self, node):
844 def children(self, node):
843 """find the children of a given node"""
845 """find the children of a given node"""
844 c = []
846 c = []
845 p = self.rev(node)
847 p = self.rev(node)
846 for r in range(p + 1, len(self)):
848 for r in range(p + 1, len(self)):
847 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
849 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
848 if prevs:
850 if prevs:
849 for pr in prevs:
851 for pr in prevs:
850 if pr == p:
852 if pr == p:
851 c.append(self.node(r))
853 c.append(self.node(r))
852 elif p == nullrev:
854 elif p == nullrev:
853 c.append(self.node(r))
855 c.append(self.node(r))
854 return c
856 return c
855
857
856 def descendant(self, start, end):
858 def descendant(self, start, end):
857 for i in self.descendants(start):
859 for i in self.descendants(start):
858 if i == end:
860 if i == end:
859 return True
861 return True
860 elif i > end:
862 elif i > end:
861 break
863 break
862 return False
864 return False
863
865
864 def ancestor(self, a, b):
866 def ancestor(self, a, b):
865 """calculate the least common ancestor of nodes a and b"""
867 """calculate the least common ancestor of nodes a and b"""
866
868
867 # fast path, check if it is a descendant
869 # fast path, check if it is a descendant
868 a, b = self.rev(a), self.rev(b)
870 a, b = self.rev(a), self.rev(b)
869 start, end = sorted((a, b))
871 start, end = sorted((a, b))
870 if self.descendant(start, end):
872 if self.descendant(start, end):
871 return self.node(start)
873 return self.node(start)
872
874
873 def parents(rev):
875 def parents(rev):
874 return [p for p in self.parentrevs(rev) if p != nullrev]
876 return [p for p in self.parentrevs(rev) if p != nullrev]
875
877
876 c = ancestor.ancestor(a, b, parents)
878 c = ancestor.ancestor(a, b, parents)
877 if c is None:
879 if c is None:
878 return nullid
880 return nullid
879
881
880 return self.node(c)
882 return self.node(c)
881
883
882 def _match(self, id):
884 def _match(self, id):
883 if isinstance(id, (long, int)):
885 if isinstance(id, (long, int)):
884 # rev
886 # rev
885 return self.node(id)
887 return self.node(id)
886 if len(id) == 20:
888 if len(id) == 20:
887 # possibly a binary node
889 # possibly a binary node
888 # odds of a binary node being all hex in ASCII are 1 in 10**25
890 # odds of a binary node being all hex in ASCII are 1 in 10**25
889 try:
891 try:
890 node = id
892 node = id
891 self.rev(node) # quick search the index
893 self.rev(node) # quick search the index
892 return node
894 return node
893 except LookupError:
895 except LookupError:
894 pass # may be partial hex id
896 pass # may be partial hex id
895 try:
897 try:
896 # str(rev)
898 # str(rev)
897 rev = int(id)
899 rev = int(id)
898 if str(rev) != id:
900 if str(rev) != id:
899 raise ValueError
901 raise ValueError
900 if rev < 0:
902 if rev < 0:
901 rev = len(self) + rev
903 rev = len(self) + rev
902 if rev < 0 or rev >= len(self):
904 if rev < 0 or rev >= len(self):
903 raise ValueError
905 raise ValueError
904 return self.node(rev)
906 return self.node(rev)
905 except (ValueError, OverflowError):
907 except (ValueError, OverflowError):
906 pass
908 pass
907 if len(id) == 40:
909 if len(id) == 40:
908 try:
910 try:
909 # a full hex nodeid?
911 # a full hex nodeid?
910 node = bin(id)
912 node = bin(id)
911 self.rev(node)
913 self.rev(node)
912 return node
914 return node
913 except (TypeError, LookupError):
915 except (TypeError, LookupError):
914 pass
916 pass
915
917
916 def _partialmatch(self, id):
918 def _partialmatch(self, id):
917 if len(id) < 40:
919 if len(id) < 40:
918 try:
920 try:
919 # hex(node)[:...]
921 # hex(node)[:...]
920 l = len(id) // 2 # grab an even number of digits
922 l = len(id) // 2 # grab an even number of digits
921 bin_id = bin(id[:l * 2])
923 bin_id = bin(id[:l * 2])
922 nl = [n for n in self.nodemap if n[:l] == bin_id]
924 nl = [n for n in self.nodemap if n[:l] == bin_id]
923 nl = [n for n in nl if hex(n).startswith(id)]
925 nl = [n for n in nl if hex(n).startswith(id)]
924 if len(nl) > 0:
926 if len(nl) > 0:
925 if len(nl) == 1:
927 if len(nl) == 1:
926 return nl[0]
928 return nl[0]
927 raise LookupError(id, self.indexfile,
929 raise LookupError(id, self.indexfile,
928 _('ambiguous identifier'))
930 _('ambiguous identifier'))
929 return None
931 return None
930 except TypeError:
932 except TypeError:
931 pass
933 pass
932
934
933 def lookup(self, id):
935 def lookup(self, id):
934 """locate a node based on:
936 """locate a node based on:
935 - revision number or str(revision number)
937 - revision number or str(revision number)
936 - nodeid or subset of hex nodeid
938 - nodeid or subset of hex nodeid
937 """
939 """
938 n = self._match(id)
940 n = self._match(id)
939 if n is not None:
941 if n is not None:
940 return n
942 return n
941 n = self._partialmatch(id)
943 n = self._partialmatch(id)
942 if n:
944 if n:
943 return n
945 return n
944
946
945 raise LookupError(id, self.indexfile, _('no match found'))
947 raise LookupError(id, self.indexfile, _('no match found'))
946
948
947 def cmp(self, node, text):
949 def cmp(self, node, text):
948 """compare text with a given file revision
950 """compare text with a given file revision
949
951
950 returns True if text is different than what is stored.
952 returns True if text is different than what is stored.
951 """
953 """
952 p1, p2 = self.parents(node)
954 p1, p2 = self.parents(node)
953 return hash(text, p1, p2) != node
955 return hash(text, p1, p2) != node
954
956
955 def _addchunk(self, offset, data):
957 def _addchunk(self, offset, data):
956 o, d = self._chunkcache
958 o, d = self._chunkcache
957 # try to add to existing cache
959 # try to add to existing cache
958 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
960 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
959 self._chunkcache = o, d + data
961 self._chunkcache = o, d + data
960 else:
962 else:
961 self._chunkcache = offset, data
963 self._chunkcache = offset, data
962
964
963 def _loadchunk(self, offset, length):
965 def _loadchunk(self, offset, length):
964 if self._inline:
966 if self._inline:
965 df = self.opener(self.indexfile)
967 df = self.opener(self.indexfile)
966 else:
968 else:
967 df = self.opener(self.datafile)
969 df = self.opener(self.datafile)
968
970
969 readahead = max(65536, length)
971 readahead = max(65536, length)
970 df.seek(offset)
972 df.seek(offset)
971 d = df.read(readahead)
973 d = df.read(readahead)
972 self._addchunk(offset, d)
974 self._addchunk(offset, d)
973 if readahead > length:
975 if readahead > length:
974 return d[:length]
976 return d[:length]
975 return d
977 return d
976
978
977 def _getchunk(self, offset, length):
979 def _getchunk(self, offset, length):
978 o, d = self._chunkcache
980 o, d = self._chunkcache
979 l = len(d)
981 l = len(d)
980
982
981 # is it in the cache?
983 # is it in the cache?
982 cachestart = offset - o
984 cachestart = offset - o
983 cacheend = cachestart + length
985 cacheend = cachestart + length
984 if cachestart >= 0 and cacheend <= l:
986 if cachestart >= 0 and cacheend <= l:
985 if cachestart == 0 and cacheend == l:
987 if cachestart == 0 and cacheend == l:
986 return d # avoid a copy
988 return d # avoid a copy
987 return d[cachestart:cacheend]
989 return d[cachestart:cacheend]
988
990
989 return self._loadchunk(offset, length)
991 return self._loadchunk(offset, length)
990
992
991 def _chunkraw(self, startrev, endrev):
993 def _chunkraw(self, startrev, endrev):
992 start = self.start(startrev)
994 start = self.start(startrev)
993 length = self.end(endrev) - start
995 length = self.end(endrev) - start
994 if self._inline:
996 if self._inline:
995 start += (startrev + 1) * self._io.size
997 start += (startrev + 1) * self._io.size
996 return self._getchunk(start, length)
998 return self._getchunk(start, length)
997
999
998 def _chunk(self, rev):
1000 def _chunk(self, rev):
999 return decompress(self._chunkraw(rev, rev))
1001 return decompress(self._chunkraw(rev, rev))
1000
1002
1001 def _chunkclear(self):
1003 def _chunkclear(self):
1002 self._chunkcache = (0, '')
1004 self._chunkcache = (0, '')
1003
1005
1004 def revdiff(self, rev1, rev2):
1006 def revdiff(self, rev1, rev2):
1005 """return or calculate a delta between two revisions"""
1007 """return or calculate a delta between two revisions"""
1006 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1008 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1007 return self._chunk(rev2)
1009 return self._chunk(rev2)
1008
1010
1009 return mdiff.textdiff(self.revision(self.node(rev1)),
1011 return mdiff.textdiff(self.revision(self.node(rev1)),
1010 self.revision(self.node(rev2)))
1012 self.revision(self.node(rev2)))
1011
1013
1012 def revision(self, node):
1014 def revision(self, node):
1013 """return an uncompressed revision of a given node"""
1015 """return an uncompressed revision of a given node"""
1014 if node == nullid:
1016 if node == nullid:
1015 return ""
1017 return ""
1016 if self._cache and self._cache[0] == node:
1018 if self._cache and self._cache[0] == node:
1017 return self._cache[2]
1019 return self._cache[2]
1018
1020
1019 # look up what we need to read
1021 # look up what we need to read
1020 text = None
1022 text = None
1021 rev = self.rev(node)
1023 rev = self.rev(node)
1022 base = self.base(rev)
1024 base = self.base(rev)
1023
1025
1024 # check rev flags
1026 # check rev flags
1025 if self.flags(rev):
1027 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1026 raise RevlogError(_('incompatible revision flag %x') %
1028 raise RevlogError(_('incompatible revision flag %x') %
1027 (self.flags(rev)))
1029 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1028
1030
1029 # do we have useful data cached?
1031 # do we have useful data cached?
1030 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1032 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1031 base = self._cache[1]
1033 base = self._cache[1]
1032 text = self._cache[2]
1034 text = self._cache[2]
1033
1035
1034 self._loadindex(base, rev + 1)
1036 self._loadindex(base, rev + 1)
1035 self._chunkraw(base, rev)
1037 self._chunkraw(base, rev)
1036 if text is None:
1038 if text is None:
1037 text = self._chunk(base)
1039 text = self._chunk(base)
1038
1040
1039 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1041 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1040 text = mdiff.patches(text, bins)
1042 text = mdiff.patches(text, bins)
1041 p1, p2 = self.parents(node)
1043 p1, p2 = self.parents(node)
1042 if node != hash(text, p1, p2):
1044 if (node != hash(text, p1, p2) and
1045 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1043 raise RevlogError(_("integrity check failed on %s:%d")
1046 raise RevlogError(_("integrity check failed on %s:%d")
1044 % (self.indexfile, rev))
1047 % (self.indexfile, rev))
1045
1048
1046 self._cache = (node, rev, text)
1049 self._cache = (node, rev, text)
1047 return text
1050 return text
1048
1051
1049 def checkinlinesize(self, tr, fp=None):
1052 def checkinlinesize(self, tr, fp=None):
1050 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1053 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1051 return
1054 return
1052
1055
1053 trinfo = tr.find(self.indexfile)
1056 trinfo = tr.find(self.indexfile)
1054 if trinfo is None:
1057 if trinfo is None:
1055 raise RevlogError(_("%s not found in the transaction")
1058 raise RevlogError(_("%s not found in the transaction")
1056 % self.indexfile)
1059 % self.indexfile)
1057
1060
1058 trindex = trinfo[2]
1061 trindex = trinfo[2]
1059 dataoff = self.start(trindex)
1062 dataoff = self.start(trindex)
1060
1063
1061 tr.add(self.datafile, dataoff)
1064 tr.add(self.datafile, dataoff)
1062
1065
1063 if fp:
1066 if fp:
1064 fp.flush()
1067 fp.flush()
1065 fp.close()
1068 fp.close()
1066
1069
1067 df = self.opener(self.datafile, 'w')
1070 df = self.opener(self.datafile, 'w')
1068 try:
1071 try:
1069 for r in self:
1072 for r in self:
1070 df.write(self._chunkraw(r, r))
1073 df.write(self._chunkraw(r, r))
1071 finally:
1074 finally:
1072 df.close()
1075 df.close()
1073
1076
1074 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1077 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1075 self.version &= ~(REVLOGNGINLINEDATA)
1078 self.version &= ~(REVLOGNGINLINEDATA)
1076 self._inline = False
1079 self._inline = False
1077 for i in self:
1080 for i in self:
1078 e = self._io.packentry(self.index[i], self.node, self.version, i)
1081 e = self._io.packentry(self.index[i], self.node, self.version, i)
1079 fp.write(e)
1082 fp.write(e)
1080
1083
1081 # if we don't call rename, the temp file will never replace the
1084 # if we don't call rename, the temp file will never replace the
1082 # real index
1085 # real index
1083 fp.rename()
1086 fp.rename()
1084
1087
1085 tr.replace(self.indexfile, trindex * self._io.size)
1088 tr.replace(self.indexfile, trindex * self._io.size)
1086 self._chunkclear()
1089 self._chunkclear()
1087
1090
1088 def addrevision(self, text, transaction, link, p1, p2, d=None):
1091 def addrevision(self, text, transaction, link, p1, p2, d=None):
1089 """add a revision to the log
1092 """add a revision to the log
1090
1093
1091 text - the revision data to add
1094 text - the revision data to add
1092 transaction - the transaction object used for rollback
1095 transaction - the transaction object used for rollback
1093 link - the linkrev data to add
1096 link - the linkrev data to add
1094 p1, p2 - the parent nodeids of the revision
1097 p1, p2 - the parent nodeids of the revision
1095 d - an optional precomputed delta
1098 d - an optional precomputed delta
1096 """
1099 """
1097 dfh = None
1100 dfh = None
1098 if not self._inline:
1101 if not self._inline:
1099 dfh = self.opener(self.datafile, "a")
1102 dfh = self.opener(self.datafile, "a")
1100 ifh = self.opener(self.indexfile, "a+")
1103 ifh = self.opener(self.indexfile, "a+")
1101 try:
1104 try:
1102 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1105 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1103 finally:
1106 finally:
1104 if dfh:
1107 if dfh:
1105 dfh.close()
1108 dfh.close()
1106 ifh.close()
1109 ifh.close()
1107
1110
1108 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1111 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1109 node = hash(text, p1, p2)
1112 node = hash(text, p1, p2)
1110 if node in self.nodemap:
1113 if node in self.nodemap:
1111 return node
1114 return node
1112
1115
1113 curr = len(self)
1116 curr = len(self)
1114 prev = curr - 1
1117 prev = curr - 1
1115 base = self.base(prev)
1118 base = self.base(prev)
1116 offset = self.end(prev)
1119 offset = self.end(prev)
1117
1120
1118 if curr:
1121 if curr:
1119 if not d:
1122 if not d:
1120 ptext = self.revision(self.node(prev))
1123 ptext = self.revision(self.node(prev))
1121 d = mdiff.textdiff(ptext, text)
1124 d = mdiff.textdiff(ptext, text)
1122 data = compress(d)
1125 data = compress(d)
1123 l = len(data[1]) + len(data[0])
1126 l = len(data[1]) + len(data[0])
1124 dist = l + offset - self.start(base)
1127 dist = l + offset - self.start(base)
1125
1128
1126 # full versions are inserted when the needed deltas
1129 # full versions are inserted when the needed deltas
1127 # become comparable to the uncompressed text
1130 # become comparable to the uncompressed text
1128 if not curr or dist > len(text) * 2:
1131 # or the base revision is punched
1132 if (not curr or dist > len(text) * 2 or
1133 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1129 data = compress(text)
1134 data = compress(text)
1130 l = len(data[1]) + len(data[0])
1135 l = len(data[1]) + len(data[0])
1131 base = curr
1136 base = curr
1132
1137
1133 e = (offset_type(offset, 0), l, len(text),
1138 e = (offset_type(offset, 0), l, len(text),
1134 base, link, self.rev(p1), self.rev(p2), node)
1139 base, link, self.rev(p1), self.rev(p2), node)
1135 self.index.insert(-1, e)
1140 self.index.insert(-1, e)
1136 self.nodemap[node] = curr
1141 self.nodemap[node] = curr
1137
1142
1138 entry = self._io.packentry(e, self.node, self.version, curr)
1143 entry = self._io.packentry(e, self.node, self.version, curr)
1139 if not self._inline:
1144 if not self._inline:
1140 transaction.add(self.datafile, offset)
1145 transaction.add(self.datafile, offset)
1141 transaction.add(self.indexfile, curr * len(entry))
1146 transaction.add(self.indexfile, curr * len(entry))
1142 if data[0]:
1147 if data[0]:
1143 dfh.write(data[0])
1148 dfh.write(data[0])
1144 dfh.write(data[1])
1149 dfh.write(data[1])
1145 dfh.flush()
1150 dfh.flush()
1146 ifh.write(entry)
1151 ifh.write(entry)
1147 else:
1152 else:
1148 offset += curr * self._io.size
1153 offset += curr * self._io.size
1149 transaction.add(self.indexfile, offset, curr)
1154 transaction.add(self.indexfile, offset, curr)
1150 ifh.write(entry)
1155 ifh.write(entry)
1151 ifh.write(data[0])
1156 ifh.write(data[0])
1152 ifh.write(data[1])
1157 ifh.write(data[1])
1153 self.checkinlinesize(transaction, ifh)
1158 self.checkinlinesize(transaction, ifh)
1154
1159
1155 if type(text) == str: # only accept immutable objects
1160 if type(text) == str: # only accept immutable objects
1156 self._cache = (node, curr, text)
1161 self._cache = (node, curr, text)
1157 return node
1162 return node
1158
1163
1159 def group(self, nodelist, lookup, infocollect=None):
1164 def group(self, nodelist, lookup, infocollect=None):
1160 """Calculate a delta group, yielding a sequence of changegroup chunks
1165 """Calculate a delta group, yielding a sequence of changegroup chunks
1161 (strings).
1166 (strings).
1162
1167
1163 Given a list of changeset revs, return a set of deltas and
1168 Given a list of changeset revs, return a set of deltas and
1164 metadata corresponding to nodes. the first delta is
1169 metadata corresponding to nodes. the first delta is
1165 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1170 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1166 have this parent as it has all history before these
1171 have this parent as it has all history before these
1167 changesets. parent is parent[0]
1172 changesets. parent is parent[0]
1168 """
1173 """
1169
1174
1170 revs = [self.rev(n) for n in nodelist]
1175 revs = [self.rev(n) for n in nodelist]
1171
1176
1172 # if we don't have any revisions touched by these changesets, bail
1177 # if we don't have any revisions touched by these changesets, bail
1173 if not revs:
1178 if not revs:
1174 yield changegroup.closechunk()
1179 yield changegroup.closechunk()
1175 return
1180 return
1176
1181
1177 # add the parent of the first rev
1182 # add the parent of the first rev
1178 p = self.parentrevs(revs[0])[0]
1183 p = self.parentrevs(revs[0])[0]
1179 revs.insert(0, p)
1184 revs.insert(0, p)
1180
1185
1181 # build deltas
1186 # build deltas
1182 for d in xrange(len(revs) - 1):
1187 for d in xrange(len(revs) - 1):
1183 a, b = revs[d], revs[d + 1]
1188 a, b = revs[d], revs[d + 1]
1184 nb = self.node(b)
1189 nb = self.node(b)
1185
1190
1186 if infocollect is not None:
1191 if infocollect is not None:
1187 infocollect(nb)
1192 infocollect(nb)
1188
1193
1189 p = self.parents(nb)
1194 p = self.parents(nb)
1190 meta = nb + p[0] + p[1] + lookup(nb)
1195 meta = nb + p[0] + p[1] + lookup(nb)
1191 if a == -1:
1196 if a == -1:
1192 d = self.revision(nb)
1197 d = self.revision(nb)
1193 meta += mdiff.trivialdiffheader(len(d))
1198 meta += mdiff.trivialdiffheader(len(d))
1194 else:
1199 else:
1195 d = self.revdiff(a, b)
1200 d = self.revdiff(a, b)
1196 yield changegroup.chunkheader(len(meta) + len(d))
1201 yield changegroup.chunkheader(len(meta) + len(d))
1197 yield meta
1202 yield meta
1198 yield d
1203 yield d
1199
1204
1200 yield changegroup.closechunk()
1205 yield changegroup.closechunk()
1201
1206
1202 def addgroup(self, revs, linkmapper, transaction):
1207 def addgroup(self, revs, linkmapper, transaction):
1203 """
1208 """
1204 add a delta group
1209 add a delta group
1205
1210
1206 given a set of deltas, add them to the revision log. the
1211 given a set of deltas, add them to the revision log. the
1207 first delta is against its parent, which should be in our
1212 first delta is against its parent, which should be in our
1208 log, the rest are against the previous delta.
1213 log, the rest are against the previous delta.
1209 """
1214 """
1210
1215
1211 #track the base of the current delta log
1216 #track the base of the current delta log
1212 r = len(self)
1217 r = len(self)
1213 t = r - 1
1218 t = r - 1
1214 node = None
1219 node = None
1215
1220
1216 base = prev = nullrev
1221 base = prev = nullrev
1217 start = end = textlen = 0
1222 start = end = textlen = 0
1218 if r:
1223 if r:
1219 end = self.end(t)
1224 end = self.end(t)
1220
1225
1221 ifh = self.opener(self.indexfile, "a+")
1226 ifh = self.opener(self.indexfile, "a+")
1222 isize = r * self._io.size
1227 isize = r * self._io.size
1223 if self._inline:
1228 if self._inline:
1224 transaction.add(self.indexfile, end + isize, r)
1229 transaction.add(self.indexfile, end + isize, r)
1225 dfh = None
1230 dfh = None
1226 else:
1231 else:
1227 transaction.add(self.indexfile, isize, r)
1232 transaction.add(self.indexfile, isize, r)
1228 transaction.add(self.datafile, end)
1233 transaction.add(self.datafile, end)
1229 dfh = self.opener(self.datafile, "a")
1234 dfh = self.opener(self.datafile, "a")
1230
1235
1231 try:
1236 try:
1232 # loop through our set of deltas
1237 # loop through our set of deltas
1233 chain = None
1238 chain = None
1234 for chunk in revs:
1239 for chunk in revs:
1235 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1240 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1236 link = linkmapper(cs)
1241 link = linkmapper(cs)
1237 if node in self.nodemap:
1242 if node in self.nodemap:
1238 # this can happen if two branches make the same change
1243 # this can happen if two branches make the same change
1239 chain = node
1244 chain = node
1240 continue
1245 continue
1241 delta = buffer(chunk, 80)
1246 delta = buffer(chunk, 80)
1242 del chunk
1247 del chunk
1243
1248
1244 for p in (p1, p2):
1249 for p in (p1, p2):
1245 if not p in self.nodemap:
1250 if not p in self.nodemap:
1246 raise LookupError(p, self.indexfile, _('unknown parent'))
1251 raise LookupError(p, self.indexfile, _('unknown parent'))
1247
1252
1248 if not chain:
1253 if not chain:
1249 # retrieve the parent revision of the delta chain
1254 # retrieve the parent revision of the delta chain
1250 chain = p1
1255 chain = p1
1251 if not chain in self.nodemap:
1256 if not chain in self.nodemap:
1252 raise LookupError(chain, self.indexfile, _('unknown base'))
1257 raise LookupError(chain, self.indexfile, _('unknown base'))
1253
1258
1254 # full versions are inserted when the needed deltas become
1259 # full versions are inserted when the needed deltas become
1255 # comparable to the uncompressed text or when the previous
1260 # comparable to the uncompressed text or when the previous
1256 # version is not the one we have a delta against. We use
1261 # version is not the one we have a delta against. We use
1257 # the size of the previous full rev as a proxy for the
1262 # the size of the previous full rev as a proxy for the
1258 # current size.
1263 # current size.
1259
1264
1260 if chain == prev:
1265 if chain == prev:
1261 cdelta = compress(delta)
1266 cdelta = compress(delta)
1262 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1267 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1263 textlen = mdiff.patchedsize(textlen, delta)
1268 textlen = mdiff.patchedsize(textlen, delta)
1264
1269
1265 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1270 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1266 # flush our writes here so we can read it in revision
1271 # flush our writes here so we can read it in revision
1267 if dfh:
1272 if dfh:
1268 dfh.flush()
1273 dfh.flush()
1269 ifh.flush()
1274 ifh.flush()
1270 text = self.revision(chain)
1275 text = self.revision(chain)
1271 if len(text) == 0:
1276 if len(text) == 0:
1272 # skip over trivial delta header
1277 # skip over trivial delta header
1273 text = buffer(delta, 12)
1278 text = buffer(delta, 12)
1274 else:
1279 else:
1275 text = mdiff.patches(text, [delta])
1280 text = mdiff.patches(text, [delta])
1276 del delta
1281 del delta
1277 chk = self._addrevision(text, transaction, link, p1, p2, None,
1282 chk = self._addrevision(text, transaction, link, p1, p2, None,
1278 ifh, dfh)
1283 ifh, dfh)
1279 if not dfh and not self._inline:
1284 if not dfh and not self._inline:
1280 # addrevision switched from inline to conventional
1285 # addrevision switched from inline to conventional
1281 # reopen the index
1286 # reopen the index
1282 dfh = self.opener(self.datafile, "a")
1287 dfh = self.opener(self.datafile, "a")
1283 ifh = self.opener(self.indexfile, "a")
1288 ifh = self.opener(self.indexfile, "a")
1284 if chk != node:
1289 if chk != node:
1285 raise RevlogError(_("consistency error adding group"))
1290 raise RevlogError(_("consistency error adding group"))
1286 textlen = len(text)
1291 textlen = len(text)
1287 else:
1292 else:
1288 e = (offset_type(end, 0), cdeltalen, textlen, base,
1293 e = (offset_type(end, 0), cdeltalen, textlen, base,
1289 link, self.rev(p1), self.rev(p2), node)
1294 link, self.rev(p1), self.rev(p2), node)
1290 self.index.insert(-1, e)
1295 self.index.insert(-1, e)
1291 self.nodemap[node] = r
1296 self.nodemap[node] = r
1292 entry = self._io.packentry(e, self.node, self.version, r)
1297 entry = self._io.packentry(e, self.node, self.version, r)
1293 if self._inline:
1298 if self._inline:
1294 ifh.write(entry)
1299 ifh.write(entry)
1295 ifh.write(cdelta[0])
1300 ifh.write(cdelta[0])
1296 ifh.write(cdelta[1])
1301 ifh.write(cdelta[1])
1297 self.checkinlinesize(transaction, ifh)
1302 self.checkinlinesize(transaction, ifh)
1298 if not self._inline:
1303 if not self._inline:
1299 dfh = self.opener(self.datafile, "a")
1304 dfh = self.opener(self.datafile, "a")
1300 ifh = self.opener(self.indexfile, "a")
1305 ifh = self.opener(self.indexfile, "a")
1301 else:
1306 else:
1302 dfh.write(cdelta[0])
1307 dfh.write(cdelta[0])
1303 dfh.write(cdelta[1])
1308 dfh.write(cdelta[1])
1304 ifh.write(entry)
1309 ifh.write(entry)
1305
1310
1306 t, r, chain, prev = r, r + 1, node, node
1311 t, r, chain, prev = r, r + 1, node, node
1307 base = self.base(t)
1312 base = self.base(t)
1308 start = self.start(base)
1313 start = self.start(base)
1309 end = self.end(t)
1314 end = self.end(t)
1310 finally:
1315 finally:
1311 if dfh:
1316 if dfh:
1312 dfh.close()
1317 dfh.close()
1313 ifh.close()
1318 ifh.close()
1314
1319
1315 return node
1320 return node
1316
1321
1317 def strip(self, minlink, transaction):
1322 def strip(self, minlink, transaction):
1318 """truncate the revlog on the first revision with a linkrev >= minlink
1323 """truncate the revlog on the first revision with a linkrev >= minlink
1319
1324
1320 This function is called when we're stripping revision minlink and
1325 This function is called when we're stripping revision minlink and
1321 its descendants from the repository.
1326 its descendants from the repository.
1322
1327
1323 We have to remove all revisions with linkrev >= minlink, because
1328 We have to remove all revisions with linkrev >= minlink, because
1324 the equivalent changelog revisions will be renumbered after the
1329 the equivalent changelog revisions will be renumbered after the
1325 strip.
1330 strip.
1326
1331
1327 So we truncate the revlog on the first of these revisions, and
1332 So we truncate the revlog on the first of these revisions, and
1328 trust that the caller has saved the revisions that shouldn't be
1333 trust that the caller has saved the revisions that shouldn't be
1329 removed and that it'll readd them after this truncation.
1334 removed and that it'll readd them after this truncation.
1330 """
1335 """
1331 if len(self) == 0:
1336 if len(self) == 0:
1332 return
1337 return
1333
1338
1334 if isinstance(self.index, lazyindex):
1339 if isinstance(self.index, lazyindex):
1335 self._loadindexmap()
1340 self._loadindexmap()
1336
1341
1337 for rev in self:
1342 for rev in self:
1338 if self.index[rev][4] >= minlink:
1343 if self.index[rev][4] >= minlink:
1339 break
1344 break
1340 else:
1345 else:
1341 return
1346 return
1342
1347
1343 # first truncate the files on disk
1348 # first truncate the files on disk
1344 end = self.start(rev)
1349 end = self.start(rev)
1345 if not self._inline:
1350 if not self._inline:
1346 transaction.add(self.datafile, end)
1351 transaction.add(self.datafile, end)
1347 end = rev * self._io.size
1352 end = rev * self._io.size
1348 else:
1353 else:
1349 end += rev * self._io.size
1354 end += rev * self._io.size
1350
1355
1351 transaction.add(self.indexfile, end)
1356 transaction.add(self.indexfile, end)
1352
1357
1353 # then reset internal state in memory to forget those revisions
1358 # then reset internal state in memory to forget those revisions
1354 self._cache = None
1359 self._cache = None
1355 self._chunkclear()
1360 self._chunkclear()
1356 for x in xrange(rev, len(self)):
1361 for x in xrange(rev, len(self)):
1357 del self.nodemap[self.node(x)]
1362 del self.nodemap[self.node(x)]
1358
1363
1359 del self.index[rev:-1]
1364 del self.index[rev:-1]
1360
1365
1361 def checksize(self):
1366 def checksize(self):
1362 expected = 0
1367 expected = 0
1363 if len(self):
1368 if len(self):
1364 expected = max(0, self.end(len(self) - 1))
1369 expected = max(0, self.end(len(self) - 1))
1365
1370
1366 try:
1371 try:
1367 f = self.opener(self.datafile)
1372 f = self.opener(self.datafile)
1368 f.seek(0, 2)
1373 f.seek(0, 2)
1369 actual = f.tell()
1374 actual = f.tell()
1370 dd = actual - expected
1375 dd = actual - expected
1371 except IOError, inst:
1376 except IOError, inst:
1372 if inst.errno != errno.ENOENT:
1377 if inst.errno != errno.ENOENT:
1373 raise
1378 raise
1374 dd = 0
1379 dd = 0
1375
1380
1376 try:
1381 try:
1377 f = self.opener(self.indexfile)
1382 f = self.opener(self.indexfile)
1378 f.seek(0, 2)
1383 f.seek(0, 2)
1379 actual = f.tell()
1384 actual = f.tell()
1380 s = self._io.size
1385 s = self._io.size
1381 i = max(0, actual // s)
1386 i = max(0, actual // s)
1382 di = actual - (i * s)
1387 di = actual - (i * s)
1383 if self._inline:
1388 if self._inline:
1384 databytes = 0
1389 databytes = 0
1385 for r in self:
1390 for r in self:
1386 databytes += max(0, self.length(r))
1391 databytes += max(0, self.length(r))
1387 dd = 0
1392 dd = 0
1388 di = actual - len(self) * s - databytes
1393 di = actual - len(self) * s - databytes
1389 except IOError, inst:
1394 except IOError, inst:
1390 if inst.errno != errno.ENOENT:
1395 if inst.errno != errno.ENOENT:
1391 raise
1396 raise
1392 di = 0
1397 di = 0
1393
1398
1394 return (dd, di)
1399 return (dd, di)
1395
1400
1396 def files(self):
1401 def files(self):
1397 res = [self.indexfile]
1402 res = [self.indexfile]
1398 if not self._inline:
1403 if not self._inline:
1399 res.append(self.datafile)
1404 res.append(self.datafile)
1400 return res
1405 return res
General Comments 0
You need to be logged in to leave comments. Login now