##// END OF EJS Templates
revlog: add shallow header flag...
Vishakh H -
r11746:46ac30b1 default
parent child Browse files
Show More
@@ -1,1405 +1,1413 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGSHALLOW = (1 << 17)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35
36 # revlog index flags
33 REVIDX_PUNCHED_FLAG = 2
37 REVIDX_PUNCHED_FLAG = 2
34 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG
38 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG
35
39
36 # amount of data read unconditionally, should be >= 4
40 # amount of data read unconditionally, should be >= 4
37 # when not inline: threshold for using lazy index
41 # when not inline: threshold for using lazy index
38 _prereadsize = 1048576
42 _prereadsize = 1048576
39 # max size of revlog with inline data
43 # max size of revlog with inline data
40 _maxinline = 131072
44 _maxinline = 131072
41
45
42 RevlogError = error.RevlogError
46 RevlogError = error.RevlogError
43 LookupError = error.LookupError
47 LookupError = error.LookupError
44
48
45 def getoffset(q):
49 def getoffset(q):
46 return int(q >> 16)
50 return int(q >> 16)
47
51
48 def gettype(q):
52 def gettype(q):
49 return int(q & 0xFFFF)
53 return int(q & 0xFFFF)
50
54
51 def offset_type(offset, type):
55 def offset_type(offset, type):
52 return long(long(offset) << 16 | type)
56 return long(long(offset) << 16 | type)
53
57
54 nullhash = _sha(nullid)
58 nullhash = _sha(nullid)
55
59
56 def hash(text, p1, p2):
60 def hash(text, p1, p2):
57 """generate a hash from the given text and its parent hashes
61 """generate a hash from the given text and its parent hashes
58
62
59 This hash combines both the current file contents and its history
63 This hash combines both the current file contents and its history
60 in a manner that makes it easy to distinguish nodes with the same
64 in a manner that makes it easy to distinguish nodes with the same
61 content in the revision graph.
65 content in the revision graph.
62 """
66 """
63 # As of now, if one of the parent node is null, p2 is null
67 # As of now, if one of the parent node is null, p2 is null
64 if p2 == nullid:
68 if p2 == nullid:
65 # deep copy of a hash is faster than creating one
69 # deep copy of a hash is faster than creating one
66 s = nullhash.copy()
70 s = nullhash.copy()
67 s.update(p1)
71 s.update(p1)
68 else:
72 else:
69 # none of the parent nodes are nullid
73 # none of the parent nodes are nullid
70 l = [p1, p2]
74 l = [p1, p2]
71 l.sort()
75 l.sort()
72 s = _sha(l[0])
76 s = _sha(l[0])
73 s.update(l[1])
77 s.update(l[1])
74 s.update(text)
78 s.update(text)
75 return s.digest()
79 return s.digest()
76
80
77 def compress(text):
81 def compress(text):
78 """ generate a possibly-compressed representation of text """
82 """ generate a possibly-compressed representation of text """
79 if not text:
83 if not text:
80 return ("", text)
84 return ("", text)
81 l = len(text)
85 l = len(text)
82 bin = None
86 bin = None
83 if l < 44:
87 if l < 44:
84 pass
88 pass
85 elif l > 1000000:
89 elif l > 1000000:
86 # zlib makes an internal copy, thus doubling memory usage for
90 # zlib makes an internal copy, thus doubling memory usage for
87 # large files, so lets do this in pieces
91 # large files, so lets do this in pieces
88 z = zlib.compressobj()
92 z = zlib.compressobj()
89 p = []
93 p = []
90 pos = 0
94 pos = 0
91 while pos < l:
95 while pos < l:
92 pos2 = pos + 2**20
96 pos2 = pos + 2**20
93 p.append(z.compress(text[pos:pos2]))
97 p.append(z.compress(text[pos:pos2]))
94 pos = pos2
98 pos = pos2
95 p.append(z.flush())
99 p.append(z.flush())
96 if sum(map(len, p)) < l:
100 if sum(map(len, p)) < l:
97 bin = "".join(p)
101 bin = "".join(p)
98 else:
102 else:
99 bin = _compress(text)
103 bin = _compress(text)
100 if bin is None or len(bin) > l:
104 if bin is None or len(bin) > l:
101 if text[0] == '\0':
105 if text[0] == '\0':
102 return ("", text)
106 return ("", text)
103 return ('u', text)
107 return ('u', text)
104 return ("", bin)
108 return ("", bin)
105
109
106 def decompress(bin):
110 def decompress(bin):
107 """ decompress the given input """
111 """ decompress the given input """
108 if not bin:
112 if not bin:
109 return bin
113 return bin
110 t = bin[0]
114 t = bin[0]
111 if t == '\0':
115 if t == '\0':
112 return bin
116 return bin
113 if t == 'x':
117 if t == 'x':
114 return _decompress(bin)
118 return _decompress(bin)
115 if t == 'u':
119 if t == 'u':
116 return bin[1:]
120 return bin[1:]
117 raise RevlogError(_("unknown compression type %r") % t)
121 raise RevlogError(_("unknown compression type %r") % t)
118
122
119 class lazyparser(object):
123 class lazyparser(object):
120 """
124 """
121 this class avoids the need to parse the entirety of large indices
125 this class avoids the need to parse the entirety of large indices
122 """
126 """
123
127
124 # lazyparser is not safe to use on windows if win32 extensions not
128 # lazyparser is not safe to use on windows if win32 extensions not
125 # available. it keeps file handle open, which make it not possible
129 # available. it keeps file handle open, which make it not possible
126 # to break hardlinks on local cloned repos.
130 # to break hardlinks on local cloned repos.
127
131
128 def __init__(self, dataf):
132 def __init__(self, dataf):
129 try:
133 try:
130 size = util.fstat(dataf).st_size
134 size = util.fstat(dataf).st_size
131 except AttributeError:
135 except AttributeError:
132 size = 0
136 size = 0
133 self.dataf = dataf
137 self.dataf = dataf
134 self.s = struct.calcsize(indexformatng)
138 self.s = struct.calcsize(indexformatng)
135 self.datasize = size
139 self.datasize = size
136 self.l = size // self.s
140 self.l = size // self.s
137 self.index = [None] * self.l
141 self.index = [None] * self.l
138 self.map = {nullid: nullrev}
142 self.map = {nullid: nullrev}
139 self.allmap = 0
143 self.allmap = 0
140 self.all = 0
144 self.all = 0
141 self.mapfind_count = 0
145 self.mapfind_count = 0
142
146
143 def loadmap(self):
147 def loadmap(self):
144 """
148 """
145 during a commit, we need to make sure the rev being added is
149 during a commit, we need to make sure the rev being added is
146 not a duplicate. This requires loading the entire index,
150 not a duplicate. This requires loading the entire index,
147 which is fairly slow. loadmap can load up just the node map,
151 which is fairly slow. loadmap can load up just the node map,
148 which takes much less time.
152 which takes much less time.
149 """
153 """
150 if self.allmap:
154 if self.allmap:
151 return
155 return
152 end = self.datasize
156 end = self.datasize
153 self.allmap = 1
157 self.allmap = 1
154 cur = 0
158 cur = 0
155 count = 0
159 count = 0
156 blocksize = self.s * 256
160 blocksize = self.s * 256
157 self.dataf.seek(0)
161 self.dataf.seek(0)
158 while cur < end:
162 while cur < end:
159 data = self.dataf.read(blocksize)
163 data = self.dataf.read(blocksize)
160 off = 0
164 off = 0
161 for x in xrange(256):
165 for x in xrange(256):
162 n = data[off + ngshaoffset:off + ngshaoffset + 20]
166 n = data[off + ngshaoffset:off + ngshaoffset + 20]
163 self.map[n] = count
167 self.map[n] = count
164 count += 1
168 count += 1
165 if count >= self.l:
169 if count >= self.l:
166 break
170 break
167 off += self.s
171 off += self.s
168 cur += blocksize
172 cur += blocksize
169
173
170 def loadblock(self, blockstart, blocksize, data=None):
174 def loadblock(self, blockstart, blocksize, data=None):
171 if self.all:
175 if self.all:
172 return
176 return
173 if data is None:
177 if data is None:
174 self.dataf.seek(blockstart)
178 self.dataf.seek(blockstart)
175 if blockstart + blocksize > self.datasize:
179 if blockstart + blocksize > self.datasize:
176 # the revlog may have grown since we've started running,
180 # the revlog may have grown since we've started running,
177 # but we don't have space in self.index for more entries.
181 # but we don't have space in self.index for more entries.
178 # limit blocksize so that we don't get too much data.
182 # limit blocksize so that we don't get too much data.
179 blocksize = max(self.datasize - blockstart, 0)
183 blocksize = max(self.datasize - blockstart, 0)
180 data = self.dataf.read(blocksize)
184 data = self.dataf.read(blocksize)
181 lend = len(data) // self.s
185 lend = len(data) // self.s
182 i = blockstart // self.s
186 i = blockstart // self.s
183 off = 0
187 off = 0
184 # lazyindex supports __delitem__
188 # lazyindex supports __delitem__
185 if lend > len(self.index) - i:
189 if lend > len(self.index) - i:
186 lend = len(self.index) - i
190 lend = len(self.index) - i
187 for x in xrange(lend):
191 for x in xrange(lend):
188 if self.index[i + x] is None:
192 if self.index[i + x] is None:
189 b = data[off : off + self.s]
193 b = data[off : off + self.s]
190 self.index[i + x] = b
194 self.index[i + x] = b
191 n = b[ngshaoffset:ngshaoffset + 20]
195 n = b[ngshaoffset:ngshaoffset + 20]
192 self.map[n] = i + x
196 self.map[n] = i + x
193 off += self.s
197 off += self.s
194
198
195 def findnode(self, node):
199 def findnode(self, node):
196 """search backwards through the index file for a specific node"""
200 """search backwards through the index file for a specific node"""
197 if self.allmap:
201 if self.allmap:
198 return None
202 return None
199
203
200 # hg log will cause many many searches for the manifest
204 # hg log will cause many many searches for the manifest
201 # nodes. After we get called a few times, just load the whole
205 # nodes. After we get called a few times, just load the whole
202 # thing.
206 # thing.
203 if self.mapfind_count > 8:
207 if self.mapfind_count > 8:
204 self.loadmap()
208 self.loadmap()
205 if node in self.map:
209 if node in self.map:
206 return node
210 return node
207 return None
211 return None
208 self.mapfind_count += 1
212 self.mapfind_count += 1
209 last = self.l - 1
213 last = self.l - 1
210 while self.index[last] != None:
214 while self.index[last] != None:
211 if last == 0:
215 if last == 0:
212 self.all = 1
216 self.all = 1
213 self.allmap = 1
217 self.allmap = 1
214 return None
218 return None
215 last -= 1
219 last -= 1
216 end = (last + 1) * self.s
220 end = (last + 1) * self.s
217 blocksize = self.s * 256
221 blocksize = self.s * 256
218 while end >= 0:
222 while end >= 0:
219 start = max(end - blocksize, 0)
223 start = max(end - blocksize, 0)
220 self.dataf.seek(start)
224 self.dataf.seek(start)
221 data = self.dataf.read(end - start)
225 data = self.dataf.read(end - start)
222 findend = end - start
226 findend = end - start
223 while True:
227 while True:
224 # we're searching backwards, so we have to make sure
228 # we're searching backwards, so we have to make sure
225 # we don't find a changeset where this node is a parent
229 # we don't find a changeset where this node is a parent
226 off = data.find(node, 0, findend)
230 off = data.find(node, 0, findend)
227 findend = off
231 findend = off
228 if off >= 0:
232 if off >= 0:
229 i = off / self.s
233 i = off / self.s
230 off = i * self.s
234 off = i * self.s
231 n = data[off + ngshaoffset:off + ngshaoffset + 20]
235 n = data[off + ngshaoffset:off + ngshaoffset + 20]
232 if n == node:
236 if n == node:
233 self.map[n] = i + start / self.s
237 self.map[n] = i + start / self.s
234 return node
238 return node
235 else:
239 else:
236 break
240 break
237 end -= blocksize
241 end -= blocksize
238 return None
242 return None
239
243
240 def loadindex(self, i=None, end=None):
244 def loadindex(self, i=None, end=None):
241 if self.all:
245 if self.all:
242 return
246 return
243 all = False
247 all = False
244 if i is None:
248 if i is None:
245 blockstart = 0
249 blockstart = 0
246 blocksize = (65536 / self.s) * self.s
250 blocksize = (65536 / self.s) * self.s
247 end = self.datasize
251 end = self.datasize
248 all = True
252 all = True
249 else:
253 else:
250 if end:
254 if end:
251 blockstart = i * self.s
255 blockstart = i * self.s
252 end = end * self.s
256 end = end * self.s
253 blocksize = end - blockstart
257 blocksize = end - blockstart
254 else:
258 else:
255 blockstart = (i & ~1023) * self.s
259 blockstart = (i & ~1023) * self.s
256 blocksize = self.s * 1024
260 blocksize = self.s * 1024
257 end = blockstart + blocksize
261 end = blockstart + blocksize
258 while blockstart < end:
262 while blockstart < end:
259 self.loadblock(blockstart, blocksize)
263 self.loadblock(blockstart, blocksize)
260 blockstart += blocksize
264 blockstart += blocksize
261 if all:
265 if all:
262 self.all = True
266 self.all = True
263
267
264 class lazyindex(object):
268 class lazyindex(object):
265 """a lazy version of the index array"""
269 """a lazy version of the index array"""
266 def __init__(self, parser):
270 def __init__(self, parser):
267 self.p = parser
271 self.p = parser
268 def __len__(self):
272 def __len__(self):
269 return len(self.p.index)
273 return len(self.p.index)
270 def load(self, pos):
274 def load(self, pos):
271 if pos < 0:
275 if pos < 0:
272 pos += len(self.p.index)
276 pos += len(self.p.index)
273 self.p.loadindex(pos)
277 self.p.loadindex(pos)
274 return self.p.index[pos]
278 return self.p.index[pos]
275 def __getitem__(self, pos):
279 def __getitem__(self, pos):
276 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
280 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
277 def __setitem__(self, pos, item):
281 def __setitem__(self, pos, item):
278 self.p.index[pos] = _pack(indexformatng, *item)
282 self.p.index[pos] = _pack(indexformatng, *item)
279 def __delitem__(self, pos):
283 def __delitem__(self, pos):
280 del self.p.index[pos]
284 del self.p.index[pos]
281 def insert(self, pos, e):
285 def insert(self, pos, e):
282 self.p.index.insert(pos, _pack(indexformatng, *e))
286 self.p.index.insert(pos, _pack(indexformatng, *e))
283 def append(self, e):
287 def append(self, e):
284 self.p.index.append(_pack(indexformatng, *e))
288 self.p.index.append(_pack(indexformatng, *e))
285
289
286 class lazymap(object):
290 class lazymap(object):
287 """a lazy version of the node map"""
291 """a lazy version of the node map"""
288 def __init__(self, parser):
292 def __init__(self, parser):
289 self.p = parser
293 self.p = parser
290 def load(self, key):
294 def load(self, key):
291 n = self.p.findnode(key)
295 n = self.p.findnode(key)
292 if n is None:
296 if n is None:
293 raise KeyError(key)
297 raise KeyError(key)
294 def __contains__(self, key):
298 def __contains__(self, key):
295 if key in self.p.map:
299 if key in self.p.map:
296 return True
300 return True
297 self.p.loadmap()
301 self.p.loadmap()
298 return key in self.p.map
302 return key in self.p.map
299 def __iter__(self):
303 def __iter__(self):
300 yield nullid
304 yield nullid
301 for i, ret in enumerate(self.p.index):
305 for i, ret in enumerate(self.p.index):
302 if not ret:
306 if not ret:
303 self.p.loadindex(i)
307 self.p.loadindex(i)
304 ret = self.p.index[i]
308 ret = self.p.index[i]
305 if isinstance(ret, str):
309 if isinstance(ret, str):
306 ret = _unpack(indexformatng, ret)
310 ret = _unpack(indexformatng, ret)
307 yield ret[7]
311 yield ret[7]
308 def __getitem__(self, key):
312 def __getitem__(self, key):
309 try:
313 try:
310 return self.p.map[key]
314 return self.p.map[key]
311 except KeyError:
315 except KeyError:
312 try:
316 try:
313 self.load(key)
317 self.load(key)
314 return self.p.map[key]
318 return self.p.map[key]
315 except KeyError:
319 except KeyError:
316 raise KeyError("node " + hex(key))
320 raise KeyError("node " + hex(key))
317 def __setitem__(self, key, val):
321 def __setitem__(self, key, val):
318 self.p.map[key] = val
322 self.p.map[key] = val
319 def __delitem__(self, key):
323 def __delitem__(self, key):
320 del self.p.map[key]
324 del self.p.map[key]
321
325
322 indexformatv0 = ">4l20s20s20s"
326 indexformatv0 = ">4l20s20s20s"
323 v0shaoffset = 56
327 v0shaoffset = 56
324
328
325 class revlogoldio(object):
329 class revlogoldio(object):
326 def __init__(self):
330 def __init__(self):
327 self.size = struct.calcsize(indexformatv0)
331 self.size = struct.calcsize(indexformatv0)
328
332
329 def parseindex(self, fp, data, inline):
333 def parseindex(self, fp, data, inline):
330 s = self.size
334 s = self.size
331 index = []
335 index = []
332 nodemap = {nullid: nullrev}
336 nodemap = {nullid: nullrev}
333 n = off = 0
337 n = off = 0
334 if len(data) == _prereadsize:
338 if len(data) == _prereadsize:
335 data += fp.read() # read the rest
339 data += fp.read() # read the rest
336 l = len(data)
340 l = len(data)
337 while off + s <= l:
341 while off + s <= l:
338 cur = data[off:off + s]
342 cur = data[off:off + s]
339 off += s
343 off += s
340 e = _unpack(indexformatv0, cur)
344 e = _unpack(indexformatv0, cur)
341 # transform to revlogv1 format
345 # transform to revlogv1 format
342 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
346 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
343 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
347 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
344 index.append(e2)
348 index.append(e2)
345 nodemap[e[6]] = n
349 nodemap[e[6]] = n
346 n += 1
350 n += 1
347
351
348 return index, nodemap, None
352 return index, nodemap, None
349
353
350 def packentry(self, entry, node, version, rev):
354 def packentry(self, entry, node, version, rev):
351 if gettype(entry[0]):
355 if gettype(entry[0]):
352 raise RevlogError(_("index entry flags need RevlogNG"))
356 raise RevlogError(_("index entry flags need RevlogNG"))
353 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
357 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
354 node(entry[5]), node(entry[6]), entry[7])
358 node(entry[5]), node(entry[6]), entry[7])
355 return _pack(indexformatv0, *e2)
359 return _pack(indexformatv0, *e2)
356
360
357 # index ng:
361 # index ng:
358 # 6 bytes: offset
362 # 6 bytes: offset
359 # 2 bytes: flags
363 # 2 bytes: flags
360 # 4 bytes: compressed length
364 # 4 bytes: compressed length
361 # 4 bytes: uncompressed length
365 # 4 bytes: uncompressed length
362 # 4 bytes: base rev
366 # 4 bytes: base rev
363 # 4 bytes: link rev
367 # 4 bytes: link rev
364 # 4 bytes: parent 1 rev
368 # 4 bytes: parent 1 rev
365 # 4 bytes: parent 2 rev
369 # 4 bytes: parent 2 rev
366 # 32 bytes: nodeid
370 # 32 bytes: nodeid
367 indexformatng = ">Qiiiiii20s12x"
371 indexformatng = ">Qiiiiii20s12x"
368 ngshaoffset = 32
372 ngshaoffset = 32
369 versionformat = ">I"
373 versionformat = ">I"
370
374
371 class revlogio(object):
375 class revlogio(object):
372 def __init__(self):
376 def __init__(self):
373 self.size = struct.calcsize(indexformatng)
377 self.size = struct.calcsize(indexformatng)
374
378
375 def parseindex(self, fp, data, inline):
379 def parseindex(self, fp, data, inline):
376 if len(data) == _prereadsize:
380 if len(data) == _prereadsize:
377 if util.openhardlinks() and not inline:
381 if util.openhardlinks() and not inline:
378 # big index, let's parse it on demand
382 # big index, let's parse it on demand
379 parser = lazyparser(fp)
383 parser = lazyparser(fp)
380 index = lazyindex(parser)
384 index = lazyindex(parser)
381 nodemap = lazymap(parser)
385 nodemap = lazymap(parser)
382 e = list(index[0])
386 e = list(index[0])
383 type = gettype(e[0])
387 type = gettype(e[0])
384 e[0] = offset_type(0, type)
388 e[0] = offset_type(0, type)
385 index[0] = e
389 index[0] = e
386 return index, nodemap, None
390 return index, nodemap, None
387 else:
391 else:
388 data += fp.read()
392 data += fp.read()
389
393
390 # call the C implementation to parse the index data
394 # call the C implementation to parse the index data
391 index, nodemap, cache = parsers.parse_index(data, inline)
395 index, nodemap, cache = parsers.parse_index(data, inline)
392 return index, nodemap, cache
396 return index, nodemap, cache
393
397
394 def packentry(self, entry, node, version, rev):
398 def packentry(self, entry, node, version, rev):
395 p = _pack(indexformatng, *entry)
399 p = _pack(indexformatng, *entry)
396 if rev == 0:
400 if rev == 0:
397 p = _pack(versionformat, version) + p[4:]
401 p = _pack(versionformat, version) + p[4:]
398 return p
402 return p
399
403
400 class revlog(object):
404 class revlog(object):
401 """
405 """
402 the underlying revision storage object
406 the underlying revision storage object
403
407
404 A revlog consists of two parts, an index and the revision data.
408 A revlog consists of two parts, an index and the revision data.
405
409
406 The index is a file with a fixed record size containing
410 The index is a file with a fixed record size containing
407 information on each revision, including its nodeid (hash), the
411 information on each revision, including its nodeid (hash), the
408 nodeids of its parents, the position and offset of its data within
412 nodeids of its parents, the position and offset of its data within
409 the data file, and the revision it's based on. Finally, each entry
413 the data file, and the revision it's based on. Finally, each entry
410 contains a linkrev entry that can serve as a pointer to external
414 contains a linkrev entry that can serve as a pointer to external
411 data.
415 data.
412
416
413 The revision data itself is a linear collection of data chunks.
417 The revision data itself is a linear collection of data chunks.
414 Each chunk represents a revision and is usually represented as a
418 Each chunk represents a revision and is usually represented as a
415 delta against the previous chunk. To bound lookup time, runs of
419 delta against the previous chunk. To bound lookup time, runs of
416 deltas are limited to about 2 times the length of the original
420 deltas are limited to about 2 times the length of the original
417 version data. This makes retrieval of a version proportional to
421 version data. This makes retrieval of a version proportional to
418 its size, or O(1) relative to the number of revisions.
422 its size, or O(1) relative to the number of revisions.
419
423
420 Both pieces of the revlog are written to in an append-only
424 Both pieces of the revlog are written to in an append-only
421 fashion, which means we never need to rewrite a file to insert or
425 fashion, which means we never need to rewrite a file to insert or
422 remove data, and can use some simple techniques to avoid the need
426 remove data, and can use some simple techniques to avoid the need
423 for locking while reading.
427 for locking while reading.
424 """
428 """
425 def __init__(self, opener, indexfile):
429 def __init__(self, opener, indexfile, shallowroot=None):
426 """
430 """
427 create a revlog object
431 create a revlog object
428
432
429 opener is a function that abstracts the file opening operation
433 opener is a function that abstracts the file opening operation
430 and can be used to implement COW semantics or the like.
434 and can be used to implement COW semantics or the like.
431 """
435 """
432 self.indexfile = indexfile
436 self.indexfile = indexfile
433 self.datafile = indexfile[:-2] + ".d"
437 self.datafile = indexfile[:-2] + ".d"
434 self.opener = opener
438 self.opener = opener
435 self._cache = None
439 self._cache = None
436 self._chunkcache = (0, '')
440 self._chunkcache = (0, '')
437 self.nodemap = {nullid: nullrev}
441 self.nodemap = {nullid: nullrev}
438 self.index = []
442 self.index = []
443 self._shallowroot = shallowroot
439
444
440 v = REVLOG_DEFAULT_VERSION
445 v = REVLOG_DEFAULT_VERSION
441 if hasattr(opener, 'options') and 'defversion' in opener.options:
446 if hasattr(opener, 'options') and 'defversion' in opener.options:
442 v = opener.options['defversion']
447 v = opener.options['defversion']
443 if v & REVLOGNG:
448 if v & REVLOGNG:
444 v |= REVLOGNGINLINEDATA
449 v |= REVLOGNGINLINEDATA
450 if shallowroot:
451 v |= REVLOGSHALLOW
445
452
446 i = ''
453 i = ''
447 try:
454 try:
448 f = self.opener(self.indexfile)
455 f = self.opener(self.indexfile)
449 if "nonlazy" in getattr(self.opener, 'options', {}):
456 if "nonlazy" in getattr(self.opener, 'options', {}):
450 i = f.read()
457 i = f.read()
451 else:
458 else:
452 i = f.read(_prereadsize)
459 i = f.read(_prereadsize)
453 if len(i) > 0:
460 if len(i) > 0:
454 v = struct.unpack(versionformat, i[:4])[0]
461 v = struct.unpack(versionformat, i[:4])[0]
455 except IOError, inst:
462 except IOError, inst:
456 if inst.errno != errno.ENOENT:
463 if inst.errno != errno.ENOENT:
457 raise
464 raise
458
465
459 self.version = v
466 self.version = v
460 self._inline = v & REVLOGNGINLINEDATA
467 self._inline = v & REVLOGNGINLINEDATA
468 self._shallow = v & REVLOGSHALLOW
461 flags = v & ~0xFFFF
469 flags = v & ~0xFFFF
462 fmt = v & 0xFFFF
470 fmt = v & 0xFFFF
463 if fmt == REVLOGV0 and flags:
471 if fmt == REVLOGV0 and flags:
464 raise RevlogError(_("index %s unknown flags %#04x for format v0")
472 raise RevlogError(_("index %s unknown flags %#04x for format v0")
465 % (self.indexfile, flags >> 16))
473 % (self.indexfile, flags >> 16))
466 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
474 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
467 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
475 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
468 % (self.indexfile, flags >> 16))
476 % (self.indexfile, flags >> 16))
469 elif fmt > REVLOGNG:
477 elif fmt > REVLOGNG:
470 raise RevlogError(_("index %s unknown format %d")
478 raise RevlogError(_("index %s unknown format %d")
471 % (self.indexfile, fmt))
479 % (self.indexfile, fmt))
472
480
473 self._io = revlogio()
481 self._io = revlogio()
474 if self.version == REVLOGV0:
482 if self.version == REVLOGV0:
475 self._io = revlogoldio()
483 self._io = revlogoldio()
476 if i:
484 if i:
477 try:
485 try:
478 d = self._io.parseindex(f, i, self._inline)
486 d = self._io.parseindex(f, i, self._inline)
479 except (ValueError, IndexError):
487 except (ValueError, IndexError):
480 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
488 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
481 self.index, self.nodemap, self._chunkcache = d
489 self.index, self.nodemap, self._chunkcache = d
482 if not self._chunkcache:
490 if not self._chunkcache:
483 self._chunkclear()
491 self._chunkclear()
484
492
485 # add the magic null revision at -1 (if it hasn't been done already)
493 # add the magic null revision at -1 (if it hasn't been done already)
486 if (self.index == [] or isinstance(self.index, lazyindex) or
494 if (self.index == [] or isinstance(self.index, lazyindex) or
487 self.index[-1][7] != nullid) :
495 self.index[-1][7] != nullid) :
488 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
496 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
489
497
490 def _loadindex(self, start, end):
498 def _loadindex(self, start, end):
491 """load a block of indexes all at once from the lazy parser"""
499 """load a block of indexes all at once from the lazy parser"""
492 if isinstance(self.index, lazyindex):
500 if isinstance(self.index, lazyindex):
493 self.index.p.loadindex(start, end)
501 self.index.p.loadindex(start, end)
494
502
495 def _loadindexmap(self):
503 def _loadindexmap(self):
496 """loads both the map and the index from the lazy parser"""
504 """loads both the map and the index from the lazy parser"""
497 if isinstance(self.index, lazyindex):
505 if isinstance(self.index, lazyindex):
498 p = self.index.p
506 p = self.index.p
499 p.loadindex()
507 p.loadindex()
500 self.nodemap = p.map
508 self.nodemap = p.map
501
509
502 def _loadmap(self):
510 def _loadmap(self):
503 """loads the map from the lazy parser"""
511 """loads the map from the lazy parser"""
504 if isinstance(self.nodemap, lazymap):
512 if isinstance(self.nodemap, lazymap):
505 self.nodemap.p.loadmap()
513 self.nodemap.p.loadmap()
506 self.nodemap = self.nodemap.p.map
514 self.nodemap = self.nodemap.p.map
507
515
508 def tip(self):
516 def tip(self):
509 return self.node(len(self.index) - 2)
517 return self.node(len(self.index) - 2)
510 def __len__(self):
518 def __len__(self):
511 return len(self.index) - 1
519 return len(self.index) - 1
512 def __iter__(self):
520 def __iter__(self):
513 for i in xrange(len(self)):
521 for i in xrange(len(self)):
514 yield i
522 yield i
515 def rev(self, node):
523 def rev(self, node):
516 try:
524 try:
517 return self.nodemap[node]
525 return self.nodemap[node]
518 except KeyError:
526 except KeyError:
519 raise LookupError(node, self.indexfile, _('no node'))
527 raise LookupError(node, self.indexfile, _('no node'))
520 def node(self, rev):
528 def node(self, rev):
521 return self.index[rev][7]
529 return self.index[rev][7]
522 def linkrev(self, rev):
530 def linkrev(self, rev):
523 return self.index[rev][4]
531 return self.index[rev][4]
524 def parents(self, node):
532 def parents(self, node):
525 i = self.index
533 i = self.index
526 d = i[self.rev(node)]
534 d = i[self.rev(node)]
527 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
535 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
528 def parentrevs(self, rev):
536 def parentrevs(self, rev):
529 return self.index[rev][5:7]
537 return self.index[rev][5:7]
530 def start(self, rev):
538 def start(self, rev):
531 return int(self.index[rev][0] >> 16)
539 return int(self.index[rev][0] >> 16)
532 def end(self, rev):
540 def end(self, rev):
533 return self.start(rev) + self.length(rev)
541 return self.start(rev) + self.length(rev)
534 def length(self, rev):
542 def length(self, rev):
535 return self.index[rev][1]
543 return self.index[rev][1]
536 def base(self, rev):
544 def base(self, rev):
537 return self.index[rev][3]
545 return self.index[rev][3]
538 def flags(self, rev):
546 def flags(self, rev):
539 return self.index[rev][0] & 0xFFFF
547 return self.index[rev][0] & 0xFFFF
540
548
541 def size(self, rev):
549 def size(self, rev):
542 """return the length of the uncompressed text for a given revision"""
550 """return the length of the uncompressed text for a given revision"""
543 l = self.index[rev][2]
551 l = self.index[rev][2]
544 if l >= 0:
552 if l >= 0:
545 return l
553 return l
546
554
547 t = self.revision(self.node(rev))
555 t = self.revision(self.node(rev))
548 return len(t)
556 return len(t)
549
557
550 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
551 """return the set of all nodes ancestral to a given node, including
559 """return the set of all nodes ancestral to a given node, including
552 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
553 reachable = set((node,))
561 reachable = set((node,))
554 visit = [node]
562 visit = [node]
555 if stop:
563 if stop:
556 stopn = self.rev(stop)
564 stopn = self.rev(stop)
557 else:
565 else:
558 stopn = 0
566 stopn = 0
559 while visit:
567 while visit:
560 n = visit.pop(0)
568 n = visit.pop(0)
561 if n == stop:
569 if n == stop:
562 continue
570 continue
563 if n == nullid:
571 if n == nullid:
564 continue
572 continue
565 for p in self.parents(n):
573 for p in self.parents(n):
566 if self.rev(p) < stopn:
574 if self.rev(p) < stopn:
567 continue
575 continue
568 if p not in reachable:
576 if p not in reachable:
569 reachable.add(p)
577 reachable.add(p)
570 visit.append(p)
578 visit.append(p)
571 return reachable
579 return reachable
572
580
573 def ancestors(self, *revs):
581 def ancestors(self, *revs):
574 """Generate the ancestors of 'revs' in reverse topological order.
582 """Generate the ancestors of 'revs' in reverse topological order.
575
583
576 Yield a sequence of revision numbers starting with the parents
584 Yield a sequence of revision numbers starting with the parents
577 of each revision in revs, i.e., each revision is *not* considered
585 of each revision in revs, i.e., each revision is *not* considered
578 an ancestor of itself. Results are in breadth-first order:
586 an ancestor of itself. Results are in breadth-first order:
579 parents of each rev in revs, then parents of those, etc. Result
587 parents of each rev in revs, then parents of those, etc. Result
580 does not include the null revision."""
588 does not include the null revision."""
581 visit = list(revs)
589 visit = list(revs)
582 seen = set([nullrev])
590 seen = set([nullrev])
583 while visit:
591 while visit:
584 for parent in self.parentrevs(visit.pop(0)):
592 for parent in self.parentrevs(visit.pop(0)):
585 if parent not in seen:
593 if parent not in seen:
586 visit.append(parent)
594 visit.append(parent)
587 seen.add(parent)
595 seen.add(parent)
588 yield parent
596 yield parent
589
597
590 def descendants(self, *revs):
598 def descendants(self, *revs):
591 """Generate the descendants of 'revs' in revision order.
599 """Generate the descendants of 'revs' in revision order.
592
600
593 Yield a sequence of revision numbers starting with a child of
601 Yield a sequence of revision numbers starting with a child of
594 some rev in revs, i.e., each revision is *not* considered a
602 some rev in revs, i.e., each revision is *not* considered a
595 descendant of itself. Results are ordered by revision number (a
603 descendant of itself. Results are ordered by revision number (a
596 topological sort)."""
604 topological sort)."""
597 seen = set(revs)
605 seen = set(revs)
598 for i in xrange(min(revs) + 1, len(self)):
606 for i in xrange(min(revs) + 1, len(self)):
599 for x in self.parentrevs(i):
607 for x in self.parentrevs(i):
600 if x != nullrev and x in seen:
608 if x != nullrev and x in seen:
601 seen.add(i)
609 seen.add(i)
602 yield i
610 yield i
603 break
611 break
604
612
605 def findmissing(self, common=None, heads=None):
613 def findmissing(self, common=None, heads=None):
606 """Return the ancestors of heads that are not ancestors of common.
614 """Return the ancestors of heads that are not ancestors of common.
607
615
608 More specifically, return a list of nodes N such that every N
616 More specifically, return a list of nodes N such that every N
609 satisfies the following constraints:
617 satisfies the following constraints:
610
618
611 1. N is an ancestor of some node in 'heads'
619 1. N is an ancestor of some node in 'heads'
612 2. N is not an ancestor of any node in 'common'
620 2. N is not an ancestor of any node in 'common'
613
621
614 The list is sorted by revision number, meaning it is
622 The list is sorted by revision number, meaning it is
615 topologically sorted.
623 topologically sorted.
616
624
617 'heads' and 'common' are both lists of node IDs. If heads is
625 'heads' and 'common' are both lists of node IDs. If heads is
618 not supplied, uses all of the revlog's heads. If common is not
626 not supplied, uses all of the revlog's heads. If common is not
619 supplied, uses nullid."""
627 supplied, uses nullid."""
620 if common is None:
628 if common is None:
621 common = [nullid]
629 common = [nullid]
622 if heads is None:
630 if heads is None:
623 heads = self.heads()
631 heads = self.heads()
624
632
625 common = [self.rev(n) for n in common]
633 common = [self.rev(n) for n in common]
626 heads = [self.rev(n) for n in heads]
634 heads = [self.rev(n) for n in heads]
627
635
628 # we want the ancestors, but inclusive
636 # we want the ancestors, but inclusive
629 has = set(self.ancestors(*common))
637 has = set(self.ancestors(*common))
630 has.add(nullrev)
638 has.add(nullrev)
631 has.update(common)
639 has.update(common)
632
640
633 # take all ancestors from heads that aren't in has
641 # take all ancestors from heads that aren't in has
634 missing = set()
642 missing = set()
635 visit = [r for r in heads if r not in has]
643 visit = [r for r in heads if r not in has]
636 while visit:
644 while visit:
637 r = visit.pop(0)
645 r = visit.pop(0)
638 if r in missing:
646 if r in missing:
639 continue
647 continue
640 else:
648 else:
641 missing.add(r)
649 missing.add(r)
642 for p in self.parentrevs(r):
650 for p in self.parentrevs(r):
643 if p not in has:
651 if p not in has:
644 visit.append(p)
652 visit.append(p)
645 missing = list(missing)
653 missing = list(missing)
646 missing.sort()
654 missing.sort()
647 return [self.node(r) for r in missing]
655 return [self.node(r) for r in missing]
648
656
649 def nodesbetween(self, roots=None, heads=None):
657 def nodesbetween(self, roots=None, heads=None):
650 """Return a topological path from 'roots' to 'heads'.
658 """Return a topological path from 'roots' to 'heads'.
651
659
652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
660 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
653 topologically sorted list of all nodes N that satisfy both of
661 topologically sorted list of all nodes N that satisfy both of
654 these constraints:
662 these constraints:
655
663
656 1. N is a descendant of some node in 'roots'
664 1. N is a descendant of some node in 'roots'
657 2. N is an ancestor of some node in 'heads'
665 2. N is an ancestor of some node in 'heads'
658
666
659 Every node is considered to be both a descendant and an ancestor
667 Every node is considered to be both a descendant and an ancestor
660 of itself, so every reachable node in 'roots' and 'heads' will be
668 of itself, so every reachable node in 'roots' and 'heads' will be
661 included in 'nodes'.
669 included in 'nodes'.
662
670
663 'outroots' is the list of reachable nodes in 'roots', i.e., the
671 'outroots' is the list of reachable nodes in 'roots', i.e., the
664 subset of 'roots' that is returned in 'nodes'. Likewise,
672 subset of 'roots' that is returned in 'nodes'. Likewise,
665 'outheads' is the subset of 'heads' that is also in 'nodes'.
673 'outheads' is the subset of 'heads' that is also in 'nodes'.
666
674
667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
675 'roots' and 'heads' are both lists of node IDs. If 'roots' is
668 unspecified, uses nullid as the only root. If 'heads' is
676 unspecified, uses nullid as the only root. If 'heads' is
669 unspecified, uses list of all of the revlog's heads."""
677 unspecified, uses list of all of the revlog's heads."""
670 nonodes = ([], [], [])
678 nonodes = ([], [], [])
671 if roots is not None:
679 if roots is not None:
672 roots = list(roots)
680 roots = list(roots)
673 if not roots:
681 if not roots:
674 return nonodes
682 return nonodes
675 lowestrev = min([self.rev(n) for n in roots])
683 lowestrev = min([self.rev(n) for n in roots])
676 else:
684 else:
677 roots = [nullid] # Everybody's a descendent of nullid
685 roots = [nullid] # Everybody's a descendent of nullid
678 lowestrev = nullrev
686 lowestrev = nullrev
679 if (lowestrev == nullrev) and (heads is None):
687 if (lowestrev == nullrev) and (heads is None):
680 # We want _all_ the nodes!
688 # We want _all_ the nodes!
681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
689 return ([self.node(r) for r in self], [nullid], list(self.heads()))
682 if heads is None:
690 if heads is None:
683 # All nodes are ancestors, so the latest ancestor is the last
691 # All nodes are ancestors, so the latest ancestor is the last
684 # node.
692 # node.
685 highestrev = len(self) - 1
693 highestrev = len(self) - 1
686 # Set ancestors to None to signal that every node is an ancestor.
694 # Set ancestors to None to signal that every node is an ancestor.
687 ancestors = None
695 ancestors = None
688 # Set heads to an empty dictionary for later discovery of heads
696 # Set heads to an empty dictionary for later discovery of heads
689 heads = {}
697 heads = {}
690 else:
698 else:
691 heads = list(heads)
699 heads = list(heads)
692 if not heads:
700 if not heads:
693 return nonodes
701 return nonodes
694 ancestors = set()
702 ancestors = set()
695 # Turn heads into a dictionary so we can remove 'fake' heads.
703 # Turn heads into a dictionary so we can remove 'fake' heads.
696 # Also, later we will be using it to filter out the heads we can't
704 # Also, later we will be using it to filter out the heads we can't
697 # find from roots.
705 # find from roots.
698 heads = dict.fromkeys(heads, 0)
706 heads = dict.fromkeys(heads, 0)
699 # Start at the top and keep marking parents until we're done.
707 # Start at the top and keep marking parents until we're done.
700 nodestotag = set(heads)
708 nodestotag = set(heads)
701 # Remember where the top was so we can use it as a limit later.
709 # Remember where the top was so we can use it as a limit later.
702 highestrev = max([self.rev(n) for n in nodestotag])
710 highestrev = max([self.rev(n) for n in nodestotag])
703 while nodestotag:
711 while nodestotag:
704 # grab a node to tag
712 # grab a node to tag
705 n = nodestotag.pop()
713 n = nodestotag.pop()
706 # Never tag nullid
714 # Never tag nullid
707 if n == nullid:
715 if n == nullid:
708 continue
716 continue
709 # A node's revision number represents its place in a
717 # A node's revision number represents its place in a
710 # topologically sorted list of nodes.
718 # topologically sorted list of nodes.
711 r = self.rev(n)
719 r = self.rev(n)
712 if r >= lowestrev:
720 if r >= lowestrev:
713 if n not in ancestors:
721 if n not in ancestors:
714 # If we are possibly a descendent of one of the roots
722 # If we are possibly a descendent of one of the roots
715 # and we haven't already been marked as an ancestor
723 # and we haven't already been marked as an ancestor
716 ancestors.add(n) # Mark as ancestor
724 ancestors.add(n) # Mark as ancestor
717 # Add non-nullid parents to list of nodes to tag.
725 # Add non-nullid parents to list of nodes to tag.
718 nodestotag.update([p for p in self.parents(n) if
726 nodestotag.update([p for p in self.parents(n) if
719 p != nullid])
727 p != nullid])
720 elif n in heads: # We've seen it before, is it a fake head?
728 elif n in heads: # We've seen it before, is it a fake head?
721 # So it is, real heads should not be the ancestors of
729 # So it is, real heads should not be the ancestors of
722 # any other heads.
730 # any other heads.
723 heads.pop(n)
731 heads.pop(n)
724 if not ancestors:
732 if not ancestors:
725 return nonodes
733 return nonodes
726 # Now that we have our set of ancestors, we want to remove any
734 # Now that we have our set of ancestors, we want to remove any
727 # roots that are not ancestors.
735 # roots that are not ancestors.
728
736
729 # If one of the roots was nullid, everything is included anyway.
737 # If one of the roots was nullid, everything is included anyway.
730 if lowestrev > nullrev:
738 if lowestrev > nullrev:
731 # But, since we weren't, let's recompute the lowest rev to not
739 # But, since we weren't, let's recompute the lowest rev to not
732 # include roots that aren't ancestors.
740 # include roots that aren't ancestors.
733
741
734 # Filter out roots that aren't ancestors of heads
742 # Filter out roots that aren't ancestors of heads
735 roots = [n for n in roots if n in ancestors]
743 roots = [n for n in roots if n in ancestors]
736 # Recompute the lowest revision
744 # Recompute the lowest revision
737 if roots:
745 if roots:
738 lowestrev = min([self.rev(n) for n in roots])
746 lowestrev = min([self.rev(n) for n in roots])
739 else:
747 else:
740 # No more roots? Return empty list
748 # No more roots? Return empty list
741 return nonodes
749 return nonodes
742 else:
750 else:
743 # We are descending from nullid, and don't need to care about
751 # We are descending from nullid, and don't need to care about
744 # any other roots.
752 # any other roots.
745 lowestrev = nullrev
753 lowestrev = nullrev
746 roots = [nullid]
754 roots = [nullid]
747 # Transform our roots list into a set.
755 # Transform our roots list into a set.
748 descendents = set(roots)
756 descendents = set(roots)
749 # Also, keep the original roots so we can filter out roots that aren't
757 # Also, keep the original roots so we can filter out roots that aren't
750 # 'real' roots (i.e. are descended from other roots).
758 # 'real' roots (i.e. are descended from other roots).
751 roots = descendents.copy()
759 roots = descendents.copy()
752 # Our topologically sorted list of output nodes.
760 # Our topologically sorted list of output nodes.
753 orderedout = []
761 orderedout = []
754 # Don't start at nullid since we don't want nullid in our output list,
762 # Don't start at nullid since we don't want nullid in our output list,
755 # and if nullid shows up in descedents, empty parents will look like
763 # and if nullid shows up in descedents, empty parents will look like
756 # they're descendents.
764 # they're descendents.
757 for r in xrange(max(lowestrev, 0), highestrev + 1):
765 for r in xrange(max(lowestrev, 0), highestrev + 1):
758 n = self.node(r)
766 n = self.node(r)
759 isdescendent = False
767 isdescendent = False
760 if lowestrev == nullrev: # Everybody is a descendent of nullid
768 if lowestrev == nullrev: # Everybody is a descendent of nullid
761 isdescendent = True
769 isdescendent = True
762 elif n in descendents:
770 elif n in descendents:
763 # n is already a descendent
771 # n is already a descendent
764 isdescendent = True
772 isdescendent = True
765 # This check only needs to be done here because all the roots
773 # This check only needs to be done here because all the roots
766 # will start being marked is descendents before the loop.
774 # will start being marked is descendents before the loop.
767 if n in roots:
775 if n in roots:
768 # If n was a root, check if it's a 'real' root.
776 # If n was a root, check if it's a 'real' root.
769 p = tuple(self.parents(n))
777 p = tuple(self.parents(n))
770 # If any of its parents are descendents, it's not a root.
778 # If any of its parents are descendents, it's not a root.
771 if (p[0] in descendents) or (p[1] in descendents):
779 if (p[0] in descendents) or (p[1] in descendents):
772 roots.remove(n)
780 roots.remove(n)
773 else:
781 else:
774 p = tuple(self.parents(n))
782 p = tuple(self.parents(n))
775 # A node is a descendent if either of its parents are
783 # A node is a descendent if either of its parents are
776 # descendents. (We seeded the dependents list with the roots
784 # descendents. (We seeded the dependents list with the roots
777 # up there, remember?)
785 # up there, remember?)
778 if (p[0] in descendents) or (p[1] in descendents):
786 if (p[0] in descendents) or (p[1] in descendents):
779 descendents.add(n)
787 descendents.add(n)
780 isdescendent = True
788 isdescendent = True
781 if isdescendent and ((ancestors is None) or (n in ancestors)):
789 if isdescendent and ((ancestors is None) or (n in ancestors)):
782 # Only include nodes that are both descendents and ancestors.
790 # Only include nodes that are both descendents and ancestors.
783 orderedout.append(n)
791 orderedout.append(n)
784 if (ancestors is not None) and (n in heads):
792 if (ancestors is not None) and (n in heads):
785 # We're trying to figure out which heads are reachable
793 # We're trying to figure out which heads are reachable
786 # from roots.
794 # from roots.
787 # Mark this head as having been reached
795 # Mark this head as having been reached
788 heads[n] = 1
796 heads[n] = 1
789 elif ancestors is None:
797 elif ancestors is None:
790 # Otherwise, we're trying to discover the heads.
798 # Otherwise, we're trying to discover the heads.
791 # Assume this is a head because if it isn't, the next step
799 # Assume this is a head because if it isn't, the next step
792 # will eventually remove it.
800 # will eventually remove it.
793 heads[n] = 1
801 heads[n] = 1
794 # But, obviously its parents aren't.
802 # But, obviously its parents aren't.
795 for p in self.parents(n):
803 for p in self.parents(n):
796 heads.pop(p, None)
804 heads.pop(p, None)
797 heads = [n for n in heads.iterkeys() if heads[n] != 0]
805 heads = [n for n in heads.iterkeys() if heads[n] != 0]
798 roots = list(roots)
806 roots = list(roots)
799 assert orderedout
807 assert orderedout
800 assert roots
808 assert roots
801 assert heads
809 assert heads
802 return (orderedout, roots, heads)
810 return (orderedout, roots, heads)
803
811
804 def heads(self, start=None, stop=None):
812 def heads(self, start=None, stop=None):
805 """return the list of all nodes that have no children
813 """return the list of all nodes that have no children
806
814
807 if start is specified, only heads that are descendants of
815 if start is specified, only heads that are descendants of
808 start will be returned
816 start will be returned
809 if stop is specified, it will consider all the revs from stop
817 if stop is specified, it will consider all the revs from stop
810 as if they had no children
818 as if they had no children
811 """
819 """
812 if start is None and stop is None:
820 if start is None and stop is None:
813 count = len(self)
821 count = len(self)
814 if not count:
822 if not count:
815 return [nullid]
823 return [nullid]
816 ishead = [1] * (count + 1)
824 ishead = [1] * (count + 1)
817 index = self.index
825 index = self.index
818 for r in xrange(count):
826 for r in xrange(count):
819 e = index[r]
827 e = index[r]
820 ishead[e[5]] = ishead[e[6]] = 0
828 ishead[e[5]] = ishead[e[6]] = 0
821 return [self.node(r) for r in xrange(count) if ishead[r]]
829 return [self.node(r) for r in xrange(count) if ishead[r]]
822
830
823 if start is None:
831 if start is None:
824 start = nullid
832 start = nullid
825 if stop is None:
833 if stop is None:
826 stop = []
834 stop = []
827 stoprevs = set([self.rev(n) for n in stop])
835 stoprevs = set([self.rev(n) for n in stop])
828 startrev = self.rev(start)
836 startrev = self.rev(start)
829 reachable = set((startrev,))
837 reachable = set((startrev,))
830 heads = set((startrev,))
838 heads = set((startrev,))
831
839
832 parentrevs = self.parentrevs
840 parentrevs = self.parentrevs
833 for r in xrange(startrev + 1, len(self)):
841 for r in xrange(startrev + 1, len(self)):
834 for p in parentrevs(r):
842 for p in parentrevs(r):
835 if p in reachable:
843 if p in reachable:
836 if r not in stoprevs:
844 if r not in stoprevs:
837 reachable.add(r)
845 reachable.add(r)
838 heads.add(r)
846 heads.add(r)
839 if p in heads and p not in stoprevs:
847 if p in heads and p not in stoprevs:
840 heads.remove(p)
848 heads.remove(p)
841
849
842 return [self.node(r) for r in heads]
850 return [self.node(r) for r in heads]
843
851
844 def children(self, node):
852 def children(self, node):
845 """find the children of a given node"""
853 """find the children of a given node"""
846 c = []
854 c = []
847 p = self.rev(node)
855 p = self.rev(node)
848 for r in range(p + 1, len(self)):
856 for r in range(p + 1, len(self)):
849 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
857 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
850 if prevs:
858 if prevs:
851 for pr in prevs:
859 for pr in prevs:
852 if pr == p:
860 if pr == p:
853 c.append(self.node(r))
861 c.append(self.node(r))
854 elif p == nullrev:
862 elif p == nullrev:
855 c.append(self.node(r))
863 c.append(self.node(r))
856 return c
864 return c
857
865
858 def descendant(self, start, end):
866 def descendant(self, start, end):
859 for i in self.descendants(start):
867 for i in self.descendants(start):
860 if i == end:
868 if i == end:
861 return True
869 return True
862 elif i > end:
870 elif i > end:
863 break
871 break
864 return False
872 return False
865
873
866 def ancestor(self, a, b):
874 def ancestor(self, a, b):
867 """calculate the least common ancestor of nodes a and b"""
875 """calculate the least common ancestor of nodes a and b"""
868
876
869 # fast path, check if it is a descendant
877 # fast path, check if it is a descendant
870 a, b = self.rev(a), self.rev(b)
878 a, b = self.rev(a), self.rev(b)
871 start, end = sorted((a, b))
879 start, end = sorted((a, b))
872 if self.descendant(start, end):
880 if self.descendant(start, end):
873 return self.node(start)
881 return self.node(start)
874
882
875 def parents(rev):
883 def parents(rev):
876 return [p for p in self.parentrevs(rev) if p != nullrev]
884 return [p for p in self.parentrevs(rev) if p != nullrev]
877
885
878 c = ancestor.ancestor(a, b, parents)
886 c = ancestor.ancestor(a, b, parents)
879 if c is None:
887 if c is None:
880 return nullid
888 return nullid
881
889
882 return self.node(c)
890 return self.node(c)
883
891
884 def _match(self, id):
892 def _match(self, id):
885 if isinstance(id, (long, int)):
893 if isinstance(id, (long, int)):
886 # rev
894 # rev
887 return self.node(id)
895 return self.node(id)
888 if len(id) == 20:
896 if len(id) == 20:
889 # possibly a binary node
897 # possibly a binary node
890 # odds of a binary node being all hex in ASCII are 1 in 10**25
898 # odds of a binary node being all hex in ASCII are 1 in 10**25
891 try:
899 try:
892 node = id
900 node = id
893 self.rev(node) # quick search the index
901 self.rev(node) # quick search the index
894 return node
902 return node
895 except LookupError:
903 except LookupError:
896 pass # may be partial hex id
904 pass # may be partial hex id
897 try:
905 try:
898 # str(rev)
906 # str(rev)
899 rev = int(id)
907 rev = int(id)
900 if str(rev) != id:
908 if str(rev) != id:
901 raise ValueError
909 raise ValueError
902 if rev < 0:
910 if rev < 0:
903 rev = len(self) + rev
911 rev = len(self) + rev
904 if rev < 0 or rev >= len(self):
912 if rev < 0 or rev >= len(self):
905 raise ValueError
913 raise ValueError
906 return self.node(rev)
914 return self.node(rev)
907 except (ValueError, OverflowError):
915 except (ValueError, OverflowError):
908 pass
916 pass
909 if len(id) == 40:
917 if len(id) == 40:
910 try:
918 try:
911 # a full hex nodeid?
919 # a full hex nodeid?
912 node = bin(id)
920 node = bin(id)
913 self.rev(node)
921 self.rev(node)
914 return node
922 return node
915 except (TypeError, LookupError):
923 except (TypeError, LookupError):
916 pass
924 pass
917
925
918 def _partialmatch(self, id):
926 def _partialmatch(self, id):
919 if len(id) < 40:
927 if len(id) < 40:
920 try:
928 try:
921 # hex(node)[:...]
929 # hex(node)[:...]
922 l = len(id) // 2 # grab an even number of digits
930 l = len(id) // 2 # grab an even number of digits
923 bin_id = bin(id[:l * 2])
931 bin_id = bin(id[:l * 2])
924 nl = [n for n in self.nodemap if n[:l] == bin_id]
932 nl = [n for n in self.nodemap if n[:l] == bin_id]
925 nl = [n for n in nl if hex(n).startswith(id)]
933 nl = [n for n in nl if hex(n).startswith(id)]
926 if len(nl) > 0:
934 if len(nl) > 0:
927 if len(nl) == 1:
935 if len(nl) == 1:
928 return nl[0]
936 return nl[0]
929 raise LookupError(id, self.indexfile,
937 raise LookupError(id, self.indexfile,
930 _('ambiguous identifier'))
938 _('ambiguous identifier'))
931 return None
939 return None
932 except TypeError:
940 except TypeError:
933 pass
941 pass
934
942
935 def lookup(self, id):
943 def lookup(self, id):
936 """locate a node based on:
944 """locate a node based on:
937 - revision number or str(revision number)
945 - revision number or str(revision number)
938 - nodeid or subset of hex nodeid
946 - nodeid or subset of hex nodeid
939 """
947 """
940 n = self._match(id)
948 n = self._match(id)
941 if n is not None:
949 if n is not None:
942 return n
950 return n
943 n = self._partialmatch(id)
951 n = self._partialmatch(id)
944 if n:
952 if n:
945 return n
953 return n
946
954
947 raise LookupError(id, self.indexfile, _('no match found'))
955 raise LookupError(id, self.indexfile, _('no match found'))
948
956
949 def cmp(self, node, text):
957 def cmp(self, node, text):
950 """compare text with a given file revision
958 """compare text with a given file revision
951
959
952 returns True if text is different than what is stored.
960 returns True if text is different than what is stored.
953 """
961 """
954 p1, p2 = self.parents(node)
962 p1, p2 = self.parents(node)
955 return hash(text, p1, p2) != node
963 return hash(text, p1, p2) != node
956
964
957 def _addchunk(self, offset, data):
965 def _addchunk(self, offset, data):
958 o, d = self._chunkcache
966 o, d = self._chunkcache
959 # try to add to existing cache
967 # try to add to existing cache
960 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
968 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
961 self._chunkcache = o, d + data
969 self._chunkcache = o, d + data
962 else:
970 else:
963 self._chunkcache = offset, data
971 self._chunkcache = offset, data
964
972
965 def _loadchunk(self, offset, length):
973 def _loadchunk(self, offset, length):
966 if self._inline:
974 if self._inline:
967 df = self.opener(self.indexfile)
975 df = self.opener(self.indexfile)
968 else:
976 else:
969 df = self.opener(self.datafile)
977 df = self.opener(self.datafile)
970
978
971 readahead = max(65536, length)
979 readahead = max(65536, length)
972 df.seek(offset)
980 df.seek(offset)
973 d = df.read(readahead)
981 d = df.read(readahead)
974 self._addchunk(offset, d)
982 self._addchunk(offset, d)
975 if readahead > length:
983 if readahead > length:
976 return d[:length]
984 return d[:length]
977 return d
985 return d
978
986
979 def _getchunk(self, offset, length):
987 def _getchunk(self, offset, length):
980 o, d = self._chunkcache
988 o, d = self._chunkcache
981 l = len(d)
989 l = len(d)
982
990
983 # is it in the cache?
991 # is it in the cache?
984 cachestart = offset - o
992 cachestart = offset - o
985 cacheend = cachestart + length
993 cacheend = cachestart + length
986 if cachestart >= 0 and cacheend <= l:
994 if cachestart >= 0 and cacheend <= l:
987 if cachestart == 0 and cacheend == l:
995 if cachestart == 0 and cacheend == l:
988 return d # avoid a copy
996 return d # avoid a copy
989 return d[cachestart:cacheend]
997 return d[cachestart:cacheend]
990
998
991 return self._loadchunk(offset, length)
999 return self._loadchunk(offset, length)
992
1000
993 def _chunkraw(self, startrev, endrev):
1001 def _chunkraw(self, startrev, endrev):
994 start = self.start(startrev)
1002 start = self.start(startrev)
995 length = self.end(endrev) - start
1003 length = self.end(endrev) - start
996 if self._inline:
1004 if self._inline:
997 start += (startrev + 1) * self._io.size
1005 start += (startrev + 1) * self._io.size
998 return self._getchunk(start, length)
1006 return self._getchunk(start, length)
999
1007
1000 def _chunk(self, rev):
1008 def _chunk(self, rev):
1001 return decompress(self._chunkraw(rev, rev))
1009 return decompress(self._chunkraw(rev, rev))
1002
1010
1003 def _chunkclear(self):
1011 def _chunkclear(self):
1004 self._chunkcache = (0, '')
1012 self._chunkcache = (0, '')
1005
1013
1006 def revdiff(self, rev1, rev2):
1014 def revdiff(self, rev1, rev2):
1007 """return or calculate a delta between two revisions"""
1015 """return or calculate a delta between two revisions"""
1008 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1016 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1009 return self._chunk(rev2)
1017 return self._chunk(rev2)
1010
1018
1011 return mdiff.textdiff(self.revision(self.node(rev1)),
1019 return mdiff.textdiff(self.revision(self.node(rev1)),
1012 self.revision(self.node(rev2)))
1020 self.revision(self.node(rev2)))
1013
1021
1014 def revision(self, node):
1022 def revision(self, node):
1015 """return an uncompressed revision of a given node"""
1023 """return an uncompressed revision of a given node"""
1016 if node == nullid:
1024 if node == nullid:
1017 return ""
1025 return ""
1018 if self._cache and self._cache[0] == node:
1026 if self._cache and self._cache[0] == node:
1019 return self._cache[2]
1027 return self._cache[2]
1020
1028
1021 # look up what we need to read
1029 # look up what we need to read
1022 text = None
1030 text = None
1023 rev = self.rev(node)
1031 rev = self.rev(node)
1024 base = self.base(rev)
1032 base = self.base(rev)
1025
1033
1026 # check rev flags
1034 # check rev flags
1027 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1035 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1028 raise RevlogError(_('incompatible revision flag %x') %
1036 raise RevlogError(_('incompatible revision flag %x') %
1029 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1037 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1030
1038
1031 # do we have useful data cached?
1039 # do we have useful data cached?
1032 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1040 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1033 base = self._cache[1]
1041 base = self._cache[1]
1034 text = self._cache[2]
1042 text = self._cache[2]
1035
1043
1036 self._loadindex(base, rev + 1)
1044 self._loadindex(base, rev + 1)
1037 self._chunkraw(base, rev)
1045 self._chunkraw(base, rev)
1038 if text is None:
1046 if text is None:
1039 text = self._chunk(base)
1047 text = self._chunk(base)
1040
1048
1041 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1049 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1042 text = mdiff.patches(text, bins)
1050 text = mdiff.patches(text, bins)
1043 p1, p2 = self.parents(node)
1051 p1, p2 = self.parents(node)
1044 if (node != hash(text, p1, p2) and
1052 if (node != hash(text, p1, p2) and
1045 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1053 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1046 raise RevlogError(_("integrity check failed on %s:%d")
1054 raise RevlogError(_("integrity check failed on %s:%d")
1047 % (self.indexfile, rev))
1055 % (self.indexfile, rev))
1048
1056
1049 self._cache = (node, rev, text)
1057 self._cache = (node, rev, text)
1050 return text
1058 return text
1051
1059
1052 def checkinlinesize(self, tr, fp=None):
1060 def checkinlinesize(self, tr, fp=None):
1053 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1061 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1054 return
1062 return
1055
1063
1056 trinfo = tr.find(self.indexfile)
1064 trinfo = tr.find(self.indexfile)
1057 if trinfo is None:
1065 if trinfo is None:
1058 raise RevlogError(_("%s not found in the transaction")
1066 raise RevlogError(_("%s not found in the transaction")
1059 % self.indexfile)
1067 % self.indexfile)
1060
1068
1061 trindex = trinfo[2]
1069 trindex = trinfo[2]
1062 dataoff = self.start(trindex)
1070 dataoff = self.start(trindex)
1063
1071
1064 tr.add(self.datafile, dataoff)
1072 tr.add(self.datafile, dataoff)
1065
1073
1066 if fp:
1074 if fp:
1067 fp.flush()
1075 fp.flush()
1068 fp.close()
1076 fp.close()
1069
1077
1070 df = self.opener(self.datafile, 'w')
1078 df = self.opener(self.datafile, 'w')
1071 try:
1079 try:
1072 for r in self:
1080 for r in self:
1073 df.write(self._chunkraw(r, r))
1081 df.write(self._chunkraw(r, r))
1074 finally:
1082 finally:
1075 df.close()
1083 df.close()
1076
1084
1077 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1085 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1078 self.version &= ~(REVLOGNGINLINEDATA)
1086 self.version &= ~(REVLOGNGINLINEDATA)
1079 self._inline = False
1087 self._inline = False
1080 for i in self:
1088 for i in self:
1081 e = self._io.packentry(self.index[i], self.node, self.version, i)
1089 e = self._io.packentry(self.index[i], self.node, self.version, i)
1082 fp.write(e)
1090 fp.write(e)
1083
1091
1084 # if we don't call rename, the temp file will never replace the
1092 # if we don't call rename, the temp file will never replace the
1085 # real index
1093 # real index
1086 fp.rename()
1094 fp.rename()
1087
1095
1088 tr.replace(self.indexfile, trindex * self._io.size)
1096 tr.replace(self.indexfile, trindex * self._io.size)
1089 self._chunkclear()
1097 self._chunkclear()
1090
1098
1091 def addrevision(self, text, transaction, link, p1, p2, d=None):
1099 def addrevision(self, text, transaction, link, p1, p2, d=None):
1092 """add a revision to the log
1100 """add a revision to the log
1093
1101
1094 text - the revision data to add
1102 text - the revision data to add
1095 transaction - the transaction object used for rollback
1103 transaction - the transaction object used for rollback
1096 link - the linkrev data to add
1104 link - the linkrev data to add
1097 p1, p2 - the parent nodeids of the revision
1105 p1, p2 - the parent nodeids of the revision
1098 d - an optional precomputed delta
1106 d - an optional precomputed delta
1099 """
1107 """
1100 dfh = None
1108 dfh = None
1101 if not self._inline:
1109 if not self._inline:
1102 dfh = self.opener(self.datafile, "a")
1110 dfh = self.opener(self.datafile, "a")
1103 ifh = self.opener(self.indexfile, "a+")
1111 ifh = self.opener(self.indexfile, "a+")
1104 try:
1112 try:
1105 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1113 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1106 finally:
1114 finally:
1107 if dfh:
1115 if dfh:
1108 dfh.close()
1116 dfh.close()
1109 ifh.close()
1117 ifh.close()
1110
1118
1111 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1119 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1112 node = hash(text, p1, p2)
1120 node = hash(text, p1, p2)
1113 if node in self.nodemap:
1121 if node in self.nodemap:
1114 return node
1122 return node
1115
1123
1116 curr = len(self)
1124 curr = len(self)
1117 prev = curr - 1
1125 prev = curr - 1
1118 base = self.base(prev)
1126 base = self.base(prev)
1119 offset = self.end(prev)
1127 offset = self.end(prev)
1120
1128
1121 if curr:
1129 if curr:
1122 if not d:
1130 if not d:
1123 ptext = self.revision(self.node(prev))
1131 ptext = self.revision(self.node(prev))
1124 d = mdiff.textdiff(ptext, text)
1132 d = mdiff.textdiff(ptext, text)
1125 data = compress(d)
1133 data = compress(d)
1126 l = len(data[1]) + len(data[0])
1134 l = len(data[1]) + len(data[0])
1127 dist = l + offset - self.start(base)
1135 dist = l + offset - self.start(base)
1128
1136
1129 # full versions are inserted when the needed deltas
1137 # full versions are inserted when the needed deltas
1130 # become comparable to the uncompressed text
1138 # become comparable to the uncompressed text
1131 # or the base revision is punched
1139 # or the base revision is punched
1132 if (not curr or dist > len(text) * 2 or
1140 if (not curr or dist > len(text) * 2 or
1133 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1141 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1134 data = compress(text)
1142 data = compress(text)
1135 l = len(data[1]) + len(data[0])
1143 l = len(data[1]) + len(data[0])
1136 base = curr
1144 base = curr
1137
1145
1138 e = (offset_type(offset, 0), l, len(text),
1146 e = (offset_type(offset, 0), l, len(text),
1139 base, link, self.rev(p1), self.rev(p2), node)
1147 base, link, self.rev(p1), self.rev(p2), node)
1140 self.index.insert(-1, e)
1148 self.index.insert(-1, e)
1141 self.nodemap[node] = curr
1149 self.nodemap[node] = curr
1142
1150
1143 entry = self._io.packentry(e, self.node, self.version, curr)
1151 entry = self._io.packentry(e, self.node, self.version, curr)
1144 if not self._inline:
1152 if not self._inline:
1145 transaction.add(self.datafile, offset)
1153 transaction.add(self.datafile, offset)
1146 transaction.add(self.indexfile, curr * len(entry))
1154 transaction.add(self.indexfile, curr * len(entry))
1147 if data[0]:
1155 if data[0]:
1148 dfh.write(data[0])
1156 dfh.write(data[0])
1149 dfh.write(data[1])
1157 dfh.write(data[1])
1150 dfh.flush()
1158 dfh.flush()
1151 ifh.write(entry)
1159 ifh.write(entry)
1152 else:
1160 else:
1153 offset += curr * self._io.size
1161 offset += curr * self._io.size
1154 transaction.add(self.indexfile, offset, curr)
1162 transaction.add(self.indexfile, offset, curr)
1155 ifh.write(entry)
1163 ifh.write(entry)
1156 ifh.write(data[0])
1164 ifh.write(data[0])
1157 ifh.write(data[1])
1165 ifh.write(data[1])
1158 self.checkinlinesize(transaction, ifh)
1166 self.checkinlinesize(transaction, ifh)
1159
1167
1160 if type(text) == str: # only accept immutable objects
1168 if type(text) == str: # only accept immutable objects
1161 self._cache = (node, curr, text)
1169 self._cache = (node, curr, text)
1162 return node
1170 return node
1163
1171
1164 def group(self, nodelist, lookup, infocollect=None):
1172 def group(self, nodelist, lookup, infocollect=None):
1165 """Calculate a delta group, yielding a sequence of changegroup chunks
1173 """Calculate a delta group, yielding a sequence of changegroup chunks
1166 (strings).
1174 (strings).
1167
1175
1168 Given a list of changeset revs, return a set of deltas and
1176 Given a list of changeset revs, return a set of deltas and
1169 metadata corresponding to nodes. the first delta is
1177 metadata corresponding to nodes. the first delta is
1170 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1178 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1171 have this parent as it has all history before these
1179 have this parent as it has all history before these
1172 changesets. parent is parent[0]
1180 changesets. parent is parent[0]
1173 """
1181 """
1174
1182
1175 revs = [self.rev(n) for n in nodelist]
1183 revs = [self.rev(n) for n in nodelist]
1176
1184
1177 # if we don't have any revisions touched by these changesets, bail
1185 # if we don't have any revisions touched by these changesets, bail
1178 if not revs:
1186 if not revs:
1179 yield changegroup.closechunk()
1187 yield changegroup.closechunk()
1180 return
1188 return
1181
1189
1182 # add the parent of the first rev
1190 # add the parent of the first rev
1183 p = self.parentrevs(revs[0])[0]
1191 p = self.parentrevs(revs[0])[0]
1184 revs.insert(0, p)
1192 revs.insert(0, p)
1185
1193
1186 # build deltas
1194 # build deltas
1187 for d in xrange(len(revs) - 1):
1195 for d in xrange(len(revs) - 1):
1188 a, b = revs[d], revs[d + 1]
1196 a, b = revs[d], revs[d + 1]
1189 nb = self.node(b)
1197 nb = self.node(b)
1190
1198
1191 if infocollect is not None:
1199 if infocollect is not None:
1192 infocollect(nb)
1200 infocollect(nb)
1193
1201
1194 p = self.parents(nb)
1202 p = self.parents(nb)
1195 meta = nb + p[0] + p[1] + lookup(nb)
1203 meta = nb + p[0] + p[1] + lookup(nb)
1196 if a == -1:
1204 if a == -1:
1197 d = self.revision(nb)
1205 d = self.revision(nb)
1198 meta += mdiff.trivialdiffheader(len(d))
1206 meta += mdiff.trivialdiffheader(len(d))
1199 else:
1207 else:
1200 d = self.revdiff(a, b)
1208 d = self.revdiff(a, b)
1201 yield changegroup.chunkheader(len(meta) + len(d))
1209 yield changegroup.chunkheader(len(meta) + len(d))
1202 yield meta
1210 yield meta
1203 yield d
1211 yield d
1204
1212
1205 yield changegroup.closechunk()
1213 yield changegroup.closechunk()
1206
1214
1207 def addgroup(self, revs, linkmapper, transaction):
1215 def addgroup(self, revs, linkmapper, transaction):
1208 """
1216 """
1209 add a delta group
1217 add a delta group
1210
1218
1211 given a set of deltas, add them to the revision log. the
1219 given a set of deltas, add them to the revision log. the
1212 first delta is against its parent, which should be in our
1220 first delta is against its parent, which should be in our
1213 log, the rest are against the previous delta.
1221 log, the rest are against the previous delta.
1214 """
1222 """
1215
1223
1216 #track the base of the current delta log
1224 #track the base of the current delta log
1217 r = len(self)
1225 r = len(self)
1218 t = r - 1
1226 t = r - 1
1219 node = None
1227 node = None
1220
1228
1221 base = prev = nullrev
1229 base = prev = nullrev
1222 start = end = textlen = 0
1230 start = end = textlen = 0
1223 if r:
1231 if r:
1224 end = self.end(t)
1232 end = self.end(t)
1225
1233
1226 ifh = self.opener(self.indexfile, "a+")
1234 ifh = self.opener(self.indexfile, "a+")
1227 isize = r * self._io.size
1235 isize = r * self._io.size
1228 if self._inline:
1236 if self._inline:
1229 transaction.add(self.indexfile, end + isize, r)
1237 transaction.add(self.indexfile, end + isize, r)
1230 dfh = None
1238 dfh = None
1231 else:
1239 else:
1232 transaction.add(self.indexfile, isize, r)
1240 transaction.add(self.indexfile, isize, r)
1233 transaction.add(self.datafile, end)
1241 transaction.add(self.datafile, end)
1234 dfh = self.opener(self.datafile, "a")
1242 dfh = self.opener(self.datafile, "a")
1235
1243
1236 try:
1244 try:
1237 # loop through our set of deltas
1245 # loop through our set of deltas
1238 chain = None
1246 chain = None
1239 for chunk in revs:
1247 for chunk in revs:
1240 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1248 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1241 link = linkmapper(cs)
1249 link = linkmapper(cs)
1242 if node in self.nodemap:
1250 if node in self.nodemap:
1243 # this can happen if two branches make the same change
1251 # this can happen if two branches make the same change
1244 chain = node
1252 chain = node
1245 continue
1253 continue
1246 delta = buffer(chunk, 80)
1254 delta = buffer(chunk, 80)
1247 del chunk
1255 del chunk
1248
1256
1249 for p in (p1, p2):
1257 for p in (p1, p2):
1250 if not p in self.nodemap:
1258 if not p in self.nodemap:
1251 raise LookupError(p, self.indexfile, _('unknown parent'))
1259 raise LookupError(p, self.indexfile, _('unknown parent'))
1252
1260
1253 if not chain:
1261 if not chain:
1254 # retrieve the parent revision of the delta chain
1262 # retrieve the parent revision of the delta chain
1255 chain = p1
1263 chain = p1
1256 if not chain in self.nodemap:
1264 if not chain in self.nodemap:
1257 raise LookupError(chain, self.indexfile, _('unknown base'))
1265 raise LookupError(chain, self.indexfile, _('unknown base'))
1258
1266
1259 # full versions are inserted when the needed deltas become
1267 # full versions are inserted when the needed deltas become
1260 # comparable to the uncompressed text or when the previous
1268 # comparable to the uncompressed text or when the previous
1261 # version is not the one we have a delta against. We use
1269 # version is not the one we have a delta against. We use
1262 # the size of the previous full rev as a proxy for the
1270 # the size of the previous full rev as a proxy for the
1263 # current size.
1271 # current size.
1264
1272
1265 if chain == prev:
1273 if chain == prev:
1266 cdelta = compress(delta)
1274 cdelta = compress(delta)
1267 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1275 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1268 textlen = mdiff.patchedsize(textlen, delta)
1276 textlen = mdiff.patchedsize(textlen, delta)
1269
1277
1270 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1278 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1271 # flush our writes here so we can read it in revision
1279 # flush our writes here so we can read it in revision
1272 if dfh:
1280 if dfh:
1273 dfh.flush()
1281 dfh.flush()
1274 ifh.flush()
1282 ifh.flush()
1275 text = self.revision(chain)
1283 text = self.revision(chain)
1276 if len(text) == 0:
1284 if len(text) == 0:
1277 # skip over trivial delta header
1285 # skip over trivial delta header
1278 text = buffer(delta, 12)
1286 text = buffer(delta, 12)
1279 else:
1287 else:
1280 text = mdiff.patches(text, [delta])
1288 text = mdiff.patches(text, [delta])
1281 del delta
1289 del delta
1282 chk = self._addrevision(text, transaction, link, p1, p2, None,
1290 chk = self._addrevision(text, transaction, link, p1, p2, None,
1283 ifh, dfh)
1291 ifh, dfh)
1284 if not dfh and not self._inline:
1292 if not dfh and not self._inline:
1285 # addrevision switched from inline to conventional
1293 # addrevision switched from inline to conventional
1286 # reopen the index
1294 # reopen the index
1287 dfh = self.opener(self.datafile, "a")
1295 dfh = self.opener(self.datafile, "a")
1288 ifh = self.opener(self.indexfile, "a")
1296 ifh = self.opener(self.indexfile, "a")
1289 if chk != node:
1297 if chk != node:
1290 raise RevlogError(_("consistency error adding group"))
1298 raise RevlogError(_("consistency error adding group"))
1291 textlen = len(text)
1299 textlen = len(text)
1292 else:
1300 else:
1293 e = (offset_type(end, 0), cdeltalen, textlen, base,
1301 e = (offset_type(end, 0), cdeltalen, textlen, base,
1294 link, self.rev(p1), self.rev(p2), node)
1302 link, self.rev(p1), self.rev(p2), node)
1295 self.index.insert(-1, e)
1303 self.index.insert(-1, e)
1296 self.nodemap[node] = r
1304 self.nodemap[node] = r
1297 entry = self._io.packentry(e, self.node, self.version, r)
1305 entry = self._io.packentry(e, self.node, self.version, r)
1298 if self._inline:
1306 if self._inline:
1299 ifh.write(entry)
1307 ifh.write(entry)
1300 ifh.write(cdelta[0])
1308 ifh.write(cdelta[0])
1301 ifh.write(cdelta[1])
1309 ifh.write(cdelta[1])
1302 self.checkinlinesize(transaction, ifh)
1310 self.checkinlinesize(transaction, ifh)
1303 if not self._inline:
1311 if not self._inline:
1304 dfh = self.opener(self.datafile, "a")
1312 dfh = self.opener(self.datafile, "a")
1305 ifh = self.opener(self.indexfile, "a")
1313 ifh = self.opener(self.indexfile, "a")
1306 else:
1314 else:
1307 dfh.write(cdelta[0])
1315 dfh.write(cdelta[0])
1308 dfh.write(cdelta[1])
1316 dfh.write(cdelta[1])
1309 ifh.write(entry)
1317 ifh.write(entry)
1310
1318
1311 t, r, chain, prev = r, r + 1, node, node
1319 t, r, chain, prev = r, r + 1, node, node
1312 base = self.base(t)
1320 base = self.base(t)
1313 start = self.start(base)
1321 start = self.start(base)
1314 end = self.end(t)
1322 end = self.end(t)
1315 finally:
1323 finally:
1316 if dfh:
1324 if dfh:
1317 dfh.close()
1325 dfh.close()
1318 ifh.close()
1326 ifh.close()
1319
1327
1320 return node
1328 return node
1321
1329
1322 def strip(self, minlink, transaction):
1330 def strip(self, minlink, transaction):
1323 """truncate the revlog on the first revision with a linkrev >= minlink
1331 """truncate the revlog on the first revision with a linkrev >= minlink
1324
1332
1325 This function is called when we're stripping revision minlink and
1333 This function is called when we're stripping revision minlink and
1326 its descendants from the repository.
1334 its descendants from the repository.
1327
1335
1328 We have to remove all revisions with linkrev >= minlink, because
1336 We have to remove all revisions with linkrev >= minlink, because
1329 the equivalent changelog revisions will be renumbered after the
1337 the equivalent changelog revisions will be renumbered after the
1330 strip.
1338 strip.
1331
1339
1332 So we truncate the revlog on the first of these revisions, and
1340 So we truncate the revlog on the first of these revisions, and
1333 trust that the caller has saved the revisions that shouldn't be
1341 trust that the caller has saved the revisions that shouldn't be
1334 removed and that it'll readd them after this truncation.
1342 removed and that it'll readd them after this truncation.
1335 """
1343 """
1336 if len(self) == 0:
1344 if len(self) == 0:
1337 return
1345 return
1338
1346
1339 if isinstance(self.index, lazyindex):
1347 if isinstance(self.index, lazyindex):
1340 self._loadindexmap()
1348 self._loadindexmap()
1341
1349
1342 for rev in self:
1350 for rev in self:
1343 if self.index[rev][4] >= minlink:
1351 if self.index[rev][4] >= minlink:
1344 break
1352 break
1345 else:
1353 else:
1346 return
1354 return
1347
1355
1348 # first truncate the files on disk
1356 # first truncate the files on disk
1349 end = self.start(rev)
1357 end = self.start(rev)
1350 if not self._inline:
1358 if not self._inline:
1351 transaction.add(self.datafile, end)
1359 transaction.add(self.datafile, end)
1352 end = rev * self._io.size
1360 end = rev * self._io.size
1353 else:
1361 else:
1354 end += rev * self._io.size
1362 end += rev * self._io.size
1355
1363
1356 transaction.add(self.indexfile, end)
1364 transaction.add(self.indexfile, end)
1357
1365
1358 # then reset internal state in memory to forget those revisions
1366 # then reset internal state in memory to forget those revisions
1359 self._cache = None
1367 self._cache = None
1360 self._chunkclear()
1368 self._chunkclear()
1361 for x in xrange(rev, len(self)):
1369 for x in xrange(rev, len(self)):
1362 del self.nodemap[self.node(x)]
1370 del self.nodemap[self.node(x)]
1363
1371
1364 del self.index[rev:-1]
1372 del self.index[rev:-1]
1365
1373
1366 def checksize(self):
1374 def checksize(self):
1367 expected = 0
1375 expected = 0
1368 if len(self):
1376 if len(self):
1369 expected = max(0, self.end(len(self) - 1))
1377 expected = max(0, self.end(len(self) - 1))
1370
1378
1371 try:
1379 try:
1372 f = self.opener(self.datafile)
1380 f = self.opener(self.datafile)
1373 f.seek(0, 2)
1381 f.seek(0, 2)
1374 actual = f.tell()
1382 actual = f.tell()
1375 dd = actual - expected
1383 dd = actual - expected
1376 except IOError, inst:
1384 except IOError, inst:
1377 if inst.errno != errno.ENOENT:
1385 if inst.errno != errno.ENOENT:
1378 raise
1386 raise
1379 dd = 0
1387 dd = 0
1380
1388
1381 try:
1389 try:
1382 f = self.opener(self.indexfile)
1390 f = self.opener(self.indexfile)
1383 f.seek(0, 2)
1391 f.seek(0, 2)
1384 actual = f.tell()
1392 actual = f.tell()
1385 s = self._io.size
1393 s = self._io.size
1386 i = max(0, actual // s)
1394 i = max(0, actual // s)
1387 di = actual - (i * s)
1395 di = actual - (i * s)
1388 if self._inline:
1396 if self._inline:
1389 databytes = 0
1397 databytes = 0
1390 for r in self:
1398 for r in self:
1391 databytes += max(0, self.length(r))
1399 databytes += max(0, self.length(r))
1392 dd = 0
1400 dd = 0
1393 di = actual - len(self) * s - databytes
1401 di = actual - len(self) * s - databytes
1394 except IOError, inst:
1402 except IOError, inst:
1395 if inst.errno != errno.ENOENT:
1403 if inst.errno != errno.ENOENT:
1396 raise
1404 raise
1397 di = 0
1405 di = 0
1398
1406
1399 return (dd, di)
1407 return (dd, di)
1400
1408
1401 def files(self):
1409 def files(self):
1402 res = [self.indexfile]
1410 res = [self.indexfile]
1403 if not self._inline:
1411 if not self._inline:
1404 res.append(self.datafile)
1412 res.append(self.datafile)
1405 return res
1413 return res
General Comments 0
You need to be logged in to leave comments. Login now