##// END OF EJS Templates
static-http: disable lazy parsing...
Matt Mackall -
r11155:245a67fe default
parent child Browse files
Show More
@@ -1,1399 +1,1402
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 # amount of data read unconditionally, should be >= 4
34 # amount of data read unconditionally, should be >= 4
35 # when not inline: threshold for using lazy index
35 # when not inline: threshold for using lazy index
36 _prereadsize = 1048576
36 _prereadsize = 1048576
37 # max size of revlog with inline data
37 # max size of revlog with inline data
38 _maxinline = 131072
38 _maxinline = 131072
39
39
40 RevlogError = error.RevlogError
40 RevlogError = error.RevlogError
41 LookupError = error.LookupError
41 LookupError = error.LookupError
42
42
43 def getoffset(q):
43 def getoffset(q):
44 return int(q >> 16)
44 return int(q >> 16)
45
45
46 def gettype(q):
46 def gettype(q):
47 return int(q & 0xFFFF)
47 return int(q & 0xFFFF)
48
48
49 def offset_type(offset, type):
49 def offset_type(offset, type):
50 return long(long(offset) << 16 | type)
50 return long(long(offset) << 16 | type)
51
51
52 nullhash = _sha(nullid)
52 nullhash = _sha(nullid)
53
53
54 def hash(text, p1, p2):
54 def hash(text, p1, p2):
55 """generate a hash from the given text and its parent hashes
55 """generate a hash from the given text and its parent hashes
56
56
57 This hash combines both the current file contents and its history
57 This hash combines both the current file contents and its history
58 in a manner that makes it easy to distinguish nodes with the same
58 in a manner that makes it easy to distinguish nodes with the same
59 content in the revision graph.
59 content in the revision graph.
60 """
60 """
61 # As of now, if one of the parent node is null, p2 is null
61 # As of now, if one of the parent node is null, p2 is null
62 if p2 == nullid:
62 if p2 == nullid:
63 # deep copy of a hash is faster than creating one
63 # deep copy of a hash is faster than creating one
64 s = nullhash.copy()
64 s = nullhash.copy()
65 s.update(p1)
65 s.update(p1)
66 else:
66 else:
67 # none of the parent nodes are nullid
67 # none of the parent nodes are nullid
68 l = [p1, p2]
68 l = [p1, p2]
69 l.sort()
69 l.sort()
70 s = _sha(l[0])
70 s = _sha(l[0])
71 s.update(l[1])
71 s.update(l[1])
72 s.update(text)
72 s.update(text)
73 return s.digest()
73 return s.digest()
74
74
75 def compress(text):
75 def compress(text):
76 """ generate a possibly-compressed representation of text """
76 """ generate a possibly-compressed representation of text """
77 if not text:
77 if not text:
78 return ("", text)
78 return ("", text)
79 l = len(text)
79 l = len(text)
80 bin = None
80 bin = None
81 if l < 44:
81 if l < 44:
82 pass
82 pass
83 elif l > 1000000:
83 elif l > 1000000:
84 # zlib makes an internal copy, thus doubling memory usage for
84 # zlib makes an internal copy, thus doubling memory usage for
85 # large files, so lets do this in pieces
85 # large files, so lets do this in pieces
86 z = zlib.compressobj()
86 z = zlib.compressobj()
87 p = []
87 p = []
88 pos = 0
88 pos = 0
89 while pos < l:
89 while pos < l:
90 pos2 = pos + 2**20
90 pos2 = pos + 2**20
91 p.append(z.compress(text[pos:pos2]))
91 p.append(z.compress(text[pos:pos2]))
92 pos = pos2
92 pos = pos2
93 p.append(z.flush())
93 p.append(z.flush())
94 if sum(map(len, p)) < l:
94 if sum(map(len, p)) < l:
95 bin = "".join(p)
95 bin = "".join(p)
96 else:
96 else:
97 bin = _compress(text)
97 bin = _compress(text)
98 if bin is None or len(bin) > l:
98 if bin is None or len(bin) > l:
99 if text[0] == '\0':
99 if text[0] == '\0':
100 return ("", text)
100 return ("", text)
101 return ('u', text)
101 return ('u', text)
102 return ("", bin)
102 return ("", bin)
103
103
104 def decompress(bin):
104 def decompress(bin):
105 """ decompress the given input """
105 """ decompress the given input """
106 if not bin:
106 if not bin:
107 return bin
107 return bin
108 t = bin[0]
108 t = bin[0]
109 if t == '\0':
109 if t == '\0':
110 return bin
110 return bin
111 if t == 'x':
111 if t == 'x':
112 return _decompress(bin)
112 return _decompress(bin)
113 if t == 'u':
113 if t == 'u':
114 return bin[1:]
114 return bin[1:]
115 raise RevlogError(_("unknown compression type %r") % t)
115 raise RevlogError(_("unknown compression type %r") % t)
116
116
117 class lazyparser(object):
117 class lazyparser(object):
118 """
118 """
119 this class avoids the need to parse the entirety of large indices
119 this class avoids the need to parse the entirety of large indices
120 """
120 """
121
121
122 # lazyparser is not safe to use on windows if win32 extensions not
122 # lazyparser is not safe to use on windows if win32 extensions not
123 # available. it keeps file handle open, which make it not possible
123 # available. it keeps file handle open, which make it not possible
124 # to break hardlinks on local cloned repos.
124 # to break hardlinks on local cloned repos.
125
125
126 def __init__(self, dataf):
126 def __init__(self, dataf):
127 try:
127 try:
128 size = util.fstat(dataf).st_size
128 size = util.fstat(dataf).st_size
129 except AttributeError:
129 except AttributeError:
130 size = 0
130 size = 0
131 self.dataf = dataf
131 self.dataf = dataf
132 self.s = struct.calcsize(indexformatng)
132 self.s = struct.calcsize(indexformatng)
133 self.datasize = size
133 self.datasize = size
134 self.l = size / self.s
134 self.l = size / self.s
135 self.index = [None] * self.l
135 self.index = [None] * self.l
136 self.map = {nullid: nullrev}
136 self.map = {nullid: nullrev}
137 self.allmap = 0
137 self.allmap = 0
138 self.all = 0
138 self.all = 0
139 self.mapfind_count = 0
139 self.mapfind_count = 0
140
140
141 def loadmap(self):
141 def loadmap(self):
142 """
142 """
143 during a commit, we need to make sure the rev being added is
143 during a commit, we need to make sure the rev being added is
144 not a duplicate. This requires loading the entire index,
144 not a duplicate. This requires loading the entire index,
145 which is fairly slow. loadmap can load up just the node map,
145 which is fairly slow. loadmap can load up just the node map,
146 which takes much less time.
146 which takes much less time.
147 """
147 """
148 if self.allmap:
148 if self.allmap:
149 return
149 return
150 end = self.datasize
150 end = self.datasize
151 self.allmap = 1
151 self.allmap = 1
152 cur = 0
152 cur = 0
153 count = 0
153 count = 0
154 blocksize = self.s * 256
154 blocksize = self.s * 256
155 self.dataf.seek(0)
155 self.dataf.seek(0)
156 while cur < end:
156 while cur < end:
157 data = self.dataf.read(blocksize)
157 data = self.dataf.read(blocksize)
158 off = 0
158 off = 0
159 for x in xrange(256):
159 for x in xrange(256):
160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
161 self.map[n] = count
161 self.map[n] = count
162 count += 1
162 count += 1
163 if count >= self.l:
163 if count >= self.l:
164 break
164 break
165 off += self.s
165 off += self.s
166 cur += blocksize
166 cur += blocksize
167
167
168 def loadblock(self, blockstart, blocksize, data=None):
168 def loadblock(self, blockstart, blocksize, data=None):
169 if self.all:
169 if self.all:
170 return
170 return
171 if data is None:
171 if data is None:
172 self.dataf.seek(blockstart)
172 self.dataf.seek(blockstart)
173 if blockstart + blocksize > self.datasize:
173 if blockstart + blocksize > self.datasize:
174 # the revlog may have grown since we've started running,
174 # the revlog may have grown since we've started running,
175 # but we don't have space in self.index for more entries.
175 # but we don't have space in self.index for more entries.
176 # limit blocksize so that we don't get too much data.
176 # limit blocksize so that we don't get too much data.
177 blocksize = max(self.datasize - blockstart, 0)
177 blocksize = max(self.datasize - blockstart, 0)
178 data = self.dataf.read(blocksize)
178 data = self.dataf.read(blocksize)
179 lend = len(data) / self.s
179 lend = len(data) / self.s
180 i = blockstart / self.s
180 i = blockstart / self.s
181 off = 0
181 off = 0
182 # lazyindex supports __delitem__
182 # lazyindex supports __delitem__
183 if lend > len(self.index) - i:
183 if lend > len(self.index) - i:
184 lend = len(self.index) - i
184 lend = len(self.index) - i
185 for x in xrange(lend):
185 for x in xrange(lend):
186 if self.index[i + x] is None:
186 if self.index[i + x] is None:
187 b = data[off : off + self.s]
187 b = data[off : off + self.s]
188 self.index[i + x] = b
188 self.index[i + x] = b
189 n = b[ngshaoffset:ngshaoffset + 20]
189 n = b[ngshaoffset:ngshaoffset + 20]
190 self.map[n] = i + x
190 self.map[n] = i + x
191 off += self.s
191 off += self.s
192
192
193 def findnode(self, node):
193 def findnode(self, node):
194 """search backwards through the index file for a specific node"""
194 """search backwards through the index file for a specific node"""
195 if self.allmap:
195 if self.allmap:
196 return None
196 return None
197
197
198 # hg log will cause many many searches for the manifest
198 # hg log will cause many many searches for the manifest
199 # nodes. After we get called a few times, just load the whole
199 # nodes. After we get called a few times, just load the whole
200 # thing.
200 # thing.
201 if self.mapfind_count > 8:
201 if self.mapfind_count > 8:
202 self.loadmap()
202 self.loadmap()
203 if node in self.map:
203 if node in self.map:
204 return node
204 return node
205 return None
205 return None
206 self.mapfind_count += 1
206 self.mapfind_count += 1
207 last = self.l - 1
207 last = self.l - 1
208 while self.index[last] != None:
208 while self.index[last] != None:
209 if last == 0:
209 if last == 0:
210 self.all = 1
210 self.all = 1
211 self.allmap = 1
211 self.allmap = 1
212 return None
212 return None
213 last -= 1
213 last -= 1
214 end = (last + 1) * self.s
214 end = (last + 1) * self.s
215 blocksize = self.s * 256
215 blocksize = self.s * 256
216 while end >= 0:
216 while end >= 0:
217 start = max(end - blocksize, 0)
217 start = max(end - blocksize, 0)
218 self.dataf.seek(start)
218 self.dataf.seek(start)
219 data = self.dataf.read(end - start)
219 data = self.dataf.read(end - start)
220 findend = end - start
220 findend = end - start
221 while True:
221 while True:
222 # we're searching backwards, so we have to make sure
222 # we're searching backwards, so we have to make sure
223 # we don't find a changeset where this node is a parent
223 # we don't find a changeset where this node is a parent
224 off = data.find(node, 0, findend)
224 off = data.find(node, 0, findend)
225 findend = off
225 findend = off
226 if off >= 0:
226 if off >= 0:
227 i = off / self.s
227 i = off / self.s
228 off = i * self.s
228 off = i * self.s
229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
230 if n == node:
230 if n == node:
231 self.map[n] = i + start / self.s
231 self.map[n] = i + start / self.s
232 return node
232 return node
233 else:
233 else:
234 break
234 break
235 end -= blocksize
235 end -= blocksize
236 return None
236 return None
237
237
238 def loadindex(self, i=None, end=None):
238 def loadindex(self, i=None, end=None):
239 if self.all:
239 if self.all:
240 return
240 return
241 all = False
241 all = False
242 if i is None:
242 if i is None:
243 blockstart = 0
243 blockstart = 0
244 blocksize = (65536 / self.s) * self.s
244 blocksize = (65536 / self.s) * self.s
245 end = self.datasize
245 end = self.datasize
246 all = True
246 all = True
247 else:
247 else:
248 if end:
248 if end:
249 blockstart = i * self.s
249 blockstart = i * self.s
250 end = end * self.s
250 end = end * self.s
251 blocksize = end - blockstart
251 blocksize = end - blockstart
252 else:
252 else:
253 blockstart = (i & ~1023) * self.s
253 blockstart = (i & ~1023) * self.s
254 blocksize = self.s * 1024
254 blocksize = self.s * 1024
255 end = blockstart + blocksize
255 end = blockstart + blocksize
256 while blockstart < end:
256 while blockstart < end:
257 self.loadblock(blockstart, blocksize)
257 self.loadblock(blockstart, blocksize)
258 blockstart += blocksize
258 blockstart += blocksize
259 if all:
259 if all:
260 self.all = True
260 self.all = True
261
261
262 class lazyindex(object):
262 class lazyindex(object):
263 """a lazy version of the index array"""
263 """a lazy version of the index array"""
264 def __init__(self, parser):
264 def __init__(self, parser):
265 self.p = parser
265 self.p = parser
266 def __len__(self):
266 def __len__(self):
267 return len(self.p.index)
267 return len(self.p.index)
268 def load(self, pos):
268 def load(self, pos):
269 if pos < 0:
269 if pos < 0:
270 pos += len(self.p.index)
270 pos += len(self.p.index)
271 self.p.loadindex(pos)
271 self.p.loadindex(pos)
272 return self.p.index[pos]
272 return self.p.index[pos]
273 def __getitem__(self, pos):
273 def __getitem__(self, pos):
274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
275 def __setitem__(self, pos, item):
275 def __setitem__(self, pos, item):
276 self.p.index[pos] = _pack(indexformatng, *item)
276 self.p.index[pos] = _pack(indexformatng, *item)
277 def __delitem__(self, pos):
277 def __delitem__(self, pos):
278 del self.p.index[pos]
278 del self.p.index[pos]
279 def insert(self, pos, e):
279 def insert(self, pos, e):
280 self.p.index.insert(pos, _pack(indexformatng, *e))
280 self.p.index.insert(pos, _pack(indexformatng, *e))
281 def append(self, e):
281 def append(self, e):
282 self.p.index.append(_pack(indexformatng, *e))
282 self.p.index.append(_pack(indexformatng, *e))
283
283
284 class lazymap(object):
284 class lazymap(object):
285 """a lazy version of the node map"""
285 """a lazy version of the node map"""
286 def __init__(self, parser):
286 def __init__(self, parser):
287 self.p = parser
287 self.p = parser
288 def load(self, key):
288 def load(self, key):
289 n = self.p.findnode(key)
289 n = self.p.findnode(key)
290 if n is None:
290 if n is None:
291 raise KeyError(key)
291 raise KeyError(key)
292 def __contains__(self, key):
292 def __contains__(self, key):
293 if key in self.p.map:
293 if key in self.p.map:
294 return True
294 return True
295 self.p.loadmap()
295 self.p.loadmap()
296 return key in self.p.map
296 return key in self.p.map
297 def __iter__(self):
297 def __iter__(self):
298 yield nullid
298 yield nullid
299 for i, ret in enumerate(self.p.index):
299 for i, ret in enumerate(self.p.index):
300 if not ret:
300 if not ret:
301 self.p.loadindex(i)
301 self.p.loadindex(i)
302 ret = self.p.index[i]
302 ret = self.p.index[i]
303 if isinstance(ret, str):
303 if isinstance(ret, str):
304 ret = _unpack(indexformatng, ret)
304 ret = _unpack(indexformatng, ret)
305 yield ret[7]
305 yield ret[7]
306 def __getitem__(self, key):
306 def __getitem__(self, key):
307 try:
307 try:
308 return self.p.map[key]
308 return self.p.map[key]
309 except KeyError:
309 except KeyError:
310 try:
310 try:
311 self.load(key)
311 self.load(key)
312 return self.p.map[key]
312 return self.p.map[key]
313 except KeyError:
313 except KeyError:
314 raise KeyError("node " + hex(key))
314 raise KeyError("node " + hex(key))
315 def __setitem__(self, key, val):
315 def __setitem__(self, key, val):
316 self.p.map[key] = val
316 self.p.map[key] = val
317 def __delitem__(self, key):
317 def __delitem__(self, key):
318 del self.p.map[key]
318 del self.p.map[key]
319
319
320 indexformatv0 = ">4l20s20s20s"
320 indexformatv0 = ">4l20s20s20s"
321 v0shaoffset = 56
321 v0shaoffset = 56
322
322
323 class revlogoldio(object):
323 class revlogoldio(object):
324 def __init__(self):
324 def __init__(self):
325 self.size = struct.calcsize(indexformatv0)
325 self.size = struct.calcsize(indexformatv0)
326
326
327 def parseindex(self, fp, data, inline):
327 def parseindex(self, fp, data, inline):
328 s = self.size
328 s = self.size
329 index = []
329 index = []
330 nodemap = {nullid: nullrev}
330 nodemap = {nullid: nullrev}
331 n = off = 0
331 n = off = 0
332 if len(data) == _prereadsize:
332 if len(data) == _prereadsize:
333 data += fp.read() # read the rest
333 data += fp.read() # read the rest
334 l = len(data)
334 l = len(data)
335 while off + s <= l:
335 while off + s <= l:
336 cur = data[off:off + s]
336 cur = data[off:off + s]
337 off += s
337 off += s
338 e = _unpack(indexformatv0, cur)
338 e = _unpack(indexformatv0, cur)
339 # transform to revlogv1 format
339 # transform to revlogv1 format
340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
342 index.append(e2)
342 index.append(e2)
343 nodemap[e[6]] = n
343 nodemap[e[6]] = n
344 n += 1
344 n += 1
345
345
346 return index, nodemap, None
346 return index, nodemap, None
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 if gettype(entry[0]):
349 if gettype(entry[0]):
350 raise RevlogError(_("index entry flags need RevlogNG"))
350 raise RevlogError(_("index entry flags need RevlogNG"))
351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
352 node(entry[5]), node(entry[6]), entry[7])
352 node(entry[5]), node(entry[6]), entry[7])
353 return _pack(indexformatv0, *e2)
353 return _pack(indexformatv0, *e2)
354
354
355 # index ng:
355 # index ng:
356 # 6 bytes offset
356 # 6 bytes offset
357 # 2 bytes flags
357 # 2 bytes flags
358 # 4 bytes compressed length
358 # 4 bytes compressed length
359 # 4 bytes uncompressed length
359 # 4 bytes uncompressed length
360 # 4 bytes: base rev
360 # 4 bytes: base rev
361 # 4 bytes link rev
361 # 4 bytes link rev
362 # 4 bytes parent 1 rev
362 # 4 bytes parent 1 rev
363 # 4 bytes parent 2 rev
363 # 4 bytes parent 2 rev
364 # 32 bytes: nodeid
364 # 32 bytes: nodeid
365 indexformatng = ">Qiiiiii20s12x"
365 indexformatng = ">Qiiiiii20s12x"
366 ngshaoffset = 32
366 ngshaoffset = 32
367 versionformat = ">I"
367 versionformat = ">I"
368
368
369 class revlogio(object):
369 class revlogio(object):
370 def __init__(self):
370 def __init__(self):
371 self.size = struct.calcsize(indexformatng)
371 self.size = struct.calcsize(indexformatng)
372
372
373 def parseindex(self, fp, data, inline):
373 def parseindex(self, fp, data, inline):
374 if len(data) == _prereadsize:
374 if len(data) == _prereadsize:
375 if util.openhardlinks() and not inline:
375 if util.openhardlinks() and not inline:
376 # big index, let's parse it on demand
376 # big index, let's parse it on demand
377 parser = lazyparser(fp)
377 parser = lazyparser(fp)
378 index = lazyindex(parser)
378 index = lazyindex(parser)
379 nodemap = lazymap(parser)
379 nodemap = lazymap(parser)
380 e = list(index[0])
380 e = list(index[0])
381 type = gettype(e[0])
381 type = gettype(e[0])
382 e[0] = offset_type(0, type)
382 e[0] = offset_type(0, type)
383 index[0] = e
383 index[0] = e
384 return index, nodemap, None
384 return index, nodemap, None
385 else:
385 else:
386 data += fp.read()
386 data += fp.read()
387
387
388 # call the C implementation to parse the index data
388 # call the C implementation to parse the index data
389 index, nodemap, cache = parsers.parse_index(data, inline)
389 index, nodemap, cache = parsers.parse_index(data, inline)
390 return index, nodemap, cache
390 return index, nodemap, cache
391
391
392 def packentry(self, entry, node, version, rev):
392 def packentry(self, entry, node, version, rev):
393 p = _pack(indexformatng, *entry)
393 p = _pack(indexformatng, *entry)
394 if rev == 0:
394 if rev == 0:
395 p = _pack(versionformat, version) + p[4:]
395 p = _pack(versionformat, version) + p[4:]
396 return p
396 return p
397
397
398 class revlog(object):
398 class revlog(object):
399 """
399 """
400 the underlying revision storage object
400 the underlying revision storage object
401
401
402 A revlog consists of two parts, an index and the revision data.
402 A revlog consists of two parts, an index and the revision data.
403
403
404 The index is a file with a fixed record size containing
404 The index is a file with a fixed record size containing
405 information on each revision, including its nodeid (hash), the
405 information on each revision, including its nodeid (hash), the
406 nodeids of its parents, the position and offset of its data within
406 nodeids of its parents, the position and offset of its data within
407 the data file, and the revision it's based on. Finally, each entry
407 the data file, and the revision it's based on. Finally, each entry
408 contains a linkrev entry that can serve as a pointer to external
408 contains a linkrev entry that can serve as a pointer to external
409 data.
409 data.
410
410
411 The revision data itself is a linear collection of data chunks.
411 The revision data itself is a linear collection of data chunks.
412 Each chunk represents a revision and is usually represented as a
412 Each chunk represents a revision and is usually represented as a
413 delta against the previous chunk. To bound lookup time, runs of
413 delta against the previous chunk. To bound lookup time, runs of
414 deltas are limited to about 2 times the length of the original
414 deltas are limited to about 2 times the length of the original
415 version data. This makes retrieval of a version proportional to
415 version data. This makes retrieval of a version proportional to
416 its size, or O(1) relative to the number of revisions.
416 its size, or O(1) relative to the number of revisions.
417
417
418 Both pieces of the revlog are written to in an append-only
418 Both pieces of the revlog are written to in an append-only
419 fashion, which means we never need to rewrite a file to insert or
419 fashion, which means we never need to rewrite a file to insert or
420 remove data, and can use some simple techniques to avoid the need
420 remove data, and can use some simple techniques to avoid the need
421 for locking while reading.
421 for locking while reading.
422 """
422 """
423 def __init__(self, opener, indexfile):
423 def __init__(self, opener, indexfile):
424 """
424 """
425 create a revlog object
425 create a revlog object
426
426
427 opener is a function that abstracts the file opening operation
427 opener is a function that abstracts the file opening operation
428 and can be used to implement COW semantics or the like.
428 and can be used to implement COW semantics or the like.
429 """
429 """
430 self.indexfile = indexfile
430 self.indexfile = indexfile
431 self.datafile = indexfile[:-2] + ".d"
431 self.datafile = indexfile[:-2] + ".d"
432 self.opener = opener
432 self.opener = opener
433 self._cache = None
433 self._cache = None
434 self._chunkcache = (0, '')
434 self._chunkcache = (0, '')
435 self.nodemap = {nullid: nullrev}
435 self.nodemap = {nullid: nullrev}
436 self.index = []
436 self.index = []
437
437
438 v = REVLOG_DEFAULT_VERSION
438 v = REVLOG_DEFAULT_VERSION
439 if hasattr(opener, 'options') and 'defversion' in opener.options:
439 if hasattr(opener, 'options') and 'defversion' in opener.options:
440 v = opener.options['defversion']
440 v = opener.options['defversion']
441 if v & REVLOGNG:
441 if v & REVLOGNG:
442 v |= REVLOGNGINLINEDATA
442 v |= REVLOGNGINLINEDATA
443
443
444 i = ''
444 i = ''
445 try:
445 try:
446 f = self.opener(self.indexfile)
446 f = self.opener(self.indexfile)
447 i = f.read(_prereadsize)
447 if "nonlazy" in getattr(self.opener, 'options', {}):
448 i = f.read()
449 else:
450 i = f.read(_prereadsize)
448 if len(i) > 0:
451 if len(i) > 0:
449 v = struct.unpack(versionformat, i[:4])[0]
452 v = struct.unpack(versionformat, i[:4])[0]
450 except IOError, inst:
453 except IOError, inst:
451 if inst.errno != errno.ENOENT:
454 if inst.errno != errno.ENOENT:
452 raise
455 raise
453
456
454 self.version = v
457 self.version = v
455 self._inline = v & REVLOGNGINLINEDATA
458 self._inline = v & REVLOGNGINLINEDATA
456 flags = v & ~0xFFFF
459 flags = v & ~0xFFFF
457 fmt = v & 0xFFFF
460 fmt = v & 0xFFFF
458 if fmt == REVLOGV0 and flags:
461 if fmt == REVLOGV0 and flags:
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
462 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 % (self.indexfile, flags >> 16))
463 % (self.indexfile, flags >> 16))
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
464 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
465 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 % (self.indexfile, flags >> 16))
466 % (self.indexfile, flags >> 16))
464 elif fmt > REVLOGNG:
467 elif fmt > REVLOGNG:
465 raise RevlogError(_("index %s unknown format %d")
468 raise RevlogError(_("index %s unknown format %d")
466 % (self.indexfile, fmt))
469 % (self.indexfile, fmt))
467
470
468 self._io = revlogio()
471 self._io = revlogio()
469 if self.version == REVLOGV0:
472 if self.version == REVLOGV0:
470 self._io = revlogoldio()
473 self._io = revlogoldio()
471 if i:
474 if i:
472 try:
475 try:
473 d = self._io.parseindex(f, i, self._inline)
476 d = self._io.parseindex(f, i, self._inline)
474 except (ValueError, IndexError):
477 except (ValueError, IndexError):
475 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
478 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
476 self.index, self.nodemap, self._chunkcache = d
479 self.index, self.nodemap, self._chunkcache = d
477 if not self._chunkcache:
480 if not self._chunkcache:
478 self._chunkclear()
481 self._chunkclear()
479
482
480 # add the magic null revision at -1 (if it hasn't been done already)
483 # add the magic null revision at -1 (if it hasn't been done already)
481 if (self.index == [] or isinstance(self.index, lazyindex) or
484 if (self.index == [] or isinstance(self.index, lazyindex) or
482 self.index[-1][7] != nullid) :
485 self.index[-1][7] != nullid) :
483 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
486 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
484
487
485 def _loadindex(self, start, end):
488 def _loadindex(self, start, end):
486 """load a block of indexes all at once from the lazy parser"""
489 """load a block of indexes all at once from the lazy parser"""
487 if isinstance(self.index, lazyindex):
490 if isinstance(self.index, lazyindex):
488 self.index.p.loadindex(start, end)
491 self.index.p.loadindex(start, end)
489
492
490 def _loadindexmap(self):
493 def _loadindexmap(self):
491 """loads both the map and the index from the lazy parser"""
494 """loads both the map and the index from the lazy parser"""
492 if isinstance(self.index, lazyindex):
495 if isinstance(self.index, lazyindex):
493 p = self.index.p
496 p = self.index.p
494 p.loadindex()
497 p.loadindex()
495 self.nodemap = p.map
498 self.nodemap = p.map
496
499
497 def _loadmap(self):
500 def _loadmap(self):
498 """loads the map from the lazy parser"""
501 """loads the map from the lazy parser"""
499 if isinstance(self.nodemap, lazymap):
502 if isinstance(self.nodemap, lazymap):
500 self.nodemap.p.loadmap()
503 self.nodemap.p.loadmap()
501 self.nodemap = self.nodemap.p.map
504 self.nodemap = self.nodemap.p.map
502
505
503 def tip(self):
506 def tip(self):
504 return self.node(len(self.index) - 2)
507 return self.node(len(self.index) - 2)
505 def __len__(self):
508 def __len__(self):
506 return len(self.index) - 1
509 return len(self.index) - 1
507 def __iter__(self):
510 def __iter__(self):
508 for i in xrange(len(self)):
511 for i in xrange(len(self)):
509 yield i
512 yield i
510 def rev(self, node):
513 def rev(self, node):
511 try:
514 try:
512 return self.nodemap[node]
515 return self.nodemap[node]
513 except KeyError:
516 except KeyError:
514 raise LookupError(node, self.indexfile, _('no node'))
517 raise LookupError(node, self.indexfile, _('no node'))
515 def node(self, rev):
518 def node(self, rev):
516 return self.index[rev][7]
519 return self.index[rev][7]
517 def linkrev(self, rev):
520 def linkrev(self, rev):
518 return self.index[rev][4]
521 return self.index[rev][4]
519 def parents(self, node):
522 def parents(self, node):
520 i = self.index
523 i = self.index
521 d = i[self.rev(node)]
524 d = i[self.rev(node)]
522 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
525 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
523 def parentrevs(self, rev):
526 def parentrevs(self, rev):
524 return self.index[rev][5:7]
527 return self.index[rev][5:7]
525 def start(self, rev):
528 def start(self, rev):
526 return int(self.index[rev][0] >> 16)
529 return int(self.index[rev][0] >> 16)
527 def end(self, rev):
530 def end(self, rev):
528 return self.start(rev) + self.length(rev)
531 return self.start(rev) + self.length(rev)
529 def length(self, rev):
532 def length(self, rev):
530 return self.index[rev][1]
533 return self.index[rev][1]
531 def base(self, rev):
534 def base(self, rev):
532 return self.index[rev][3]
535 return self.index[rev][3]
533
536
534 def size(self, rev):
537 def size(self, rev):
535 """return the length of the uncompressed text for a given revision"""
538 """return the length of the uncompressed text for a given revision"""
536 l = self.index[rev][2]
539 l = self.index[rev][2]
537 if l >= 0:
540 if l >= 0:
538 return l
541 return l
539
542
540 t = self.revision(self.node(rev))
543 t = self.revision(self.node(rev))
541 return len(t)
544 return len(t)
542
545
543 def reachable(self, node, stop=None):
546 def reachable(self, node, stop=None):
544 """return the set of all nodes ancestral to a given node, including
547 """return the set of all nodes ancestral to a given node, including
545 the node itself, stopping when stop is matched"""
548 the node itself, stopping when stop is matched"""
546 reachable = set((node,))
549 reachable = set((node,))
547 visit = [node]
550 visit = [node]
548 if stop:
551 if stop:
549 stopn = self.rev(stop)
552 stopn = self.rev(stop)
550 else:
553 else:
551 stopn = 0
554 stopn = 0
552 while visit:
555 while visit:
553 n = visit.pop(0)
556 n = visit.pop(0)
554 if n == stop:
557 if n == stop:
555 continue
558 continue
556 if n == nullid:
559 if n == nullid:
557 continue
560 continue
558 for p in self.parents(n):
561 for p in self.parents(n):
559 if self.rev(p) < stopn:
562 if self.rev(p) < stopn:
560 continue
563 continue
561 if p not in reachable:
564 if p not in reachable:
562 reachable.add(p)
565 reachable.add(p)
563 visit.append(p)
566 visit.append(p)
564 return reachable
567 return reachable
565
568
566 def ancestors(self, *revs):
569 def ancestors(self, *revs):
567 """Generate the ancestors of 'revs' in reverse topological order.
570 """Generate the ancestors of 'revs' in reverse topological order.
568
571
569 Yield a sequence of revision numbers starting with the parents
572 Yield a sequence of revision numbers starting with the parents
570 of each revision in revs, i.e., each revision is *not* considered
573 of each revision in revs, i.e., each revision is *not* considered
571 an ancestor of itself. Results are in breadth-first order:
574 an ancestor of itself. Results are in breadth-first order:
572 parents of each rev in revs, then parents of those, etc. Result
575 parents of each rev in revs, then parents of those, etc. Result
573 does not include the null revision."""
576 does not include the null revision."""
574 visit = list(revs)
577 visit = list(revs)
575 seen = set([nullrev])
578 seen = set([nullrev])
576 while visit:
579 while visit:
577 for parent in self.parentrevs(visit.pop(0)):
580 for parent in self.parentrevs(visit.pop(0)):
578 if parent not in seen:
581 if parent not in seen:
579 visit.append(parent)
582 visit.append(parent)
580 seen.add(parent)
583 seen.add(parent)
581 yield parent
584 yield parent
582
585
583 def descendants(self, *revs):
586 def descendants(self, *revs):
584 """Generate the descendants of 'revs' in revision order.
587 """Generate the descendants of 'revs' in revision order.
585
588
586 Yield a sequence of revision numbers starting with a child of
589 Yield a sequence of revision numbers starting with a child of
587 some rev in revs, i.e., each revision is *not* considered a
590 some rev in revs, i.e., each revision is *not* considered a
588 descendant of itself. Results are ordered by revision number (a
591 descendant of itself. Results are ordered by revision number (a
589 topological sort)."""
592 topological sort)."""
590 seen = set(revs)
593 seen = set(revs)
591 for i in xrange(min(revs) + 1, len(self)):
594 for i in xrange(min(revs) + 1, len(self)):
592 for x in self.parentrevs(i):
595 for x in self.parentrevs(i):
593 if x != nullrev and x in seen:
596 if x != nullrev and x in seen:
594 seen.add(i)
597 seen.add(i)
595 yield i
598 yield i
596 break
599 break
597
600
598 def findmissing(self, common=None, heads=None):
601 def findmissing(self, common=None, heads=None):
599 """Return the ancestors of heads that are not ancestors of common.
602 """Return the ancestors of heads that are not ancestors of common.
600
603
601 More specifically, return a list of nodes N such that every N
604 More specifically, return a list of nodes N such that every N
602 satisfies the following constraints:
605 satisfies the following constraints:
603
606
604 1. N is an ancestor of some node in 'heads'
607 1. N is an ancestor of some node in 'heads'
605 2. N is not an ancestor of any node in 'common'
608 2. N is not an ancestor of any node in 'common'
606
609
607 The list is sorted by revision number, meaning it is
610 The list is sorted by revision number, meaning it is
608 topologically sorted.
611 topologically sorted.
609
612
610 'heads' and 'common' are both lists of node IDs. If heads is
613 'heads' and 'common' are both lists of node IDs. If heads is
611 not supplied, uses all of the revlog's heads. If common is not
614 not supplied, uses all of the revlog's heads. If common is not
612 supplied, uses nullid."""
615 supplied, uses nullid."""
613 if common is None:
616 if common is None:
614 common = [nullid]
617 common = [nullid]
615 if heads is None:
618 if heads is None:
616 heads = self.heads()
619 heads = self.heads()
617
620
618 common = [self.rev(n) for n in common]
621 common = [self.rev(n) for n in common]
619 heads = [self.rev(n) for n in heads]
622 heads = [self.rev(n) for n in heads]
620
623
621 # we want the ancestors, but inclusive
624 # we want the ancestors, but inclusive
622 has = set(self.ancestors(*common))
625 has = set(self.ancestors(*common))
623 has.add(nullrev)
626 has.add(nullrev)
624 has.update(common)
627 has.update(common)
625
628
626 # take all ancestors from heads that aren't in has
629 # take all ancestors from heads that aren't in has
627 missing = set()
630 missing = set()
628 visit = [r for r in heads if r not in has]
631 visit = [r for r in heads if r not in has]
629 while visit:
632 while visit:
630 r = visit.pop(0)
633 r = visit.pop(0)
631 if r in missing:
634 if r in missing:
632 continue
635 continue
633 else:
636 else:
634 missing.add(r)
637 missing.add(r)
635 for p in self.parentrevs(r):
638 for p in self.parentrevs(r):
636 if p not in has:
639 if p not in has:
637 visit.append(p)
640 visit.append(p)
638 missing = list(missing)
641 missing = list(missing)
639 missing.sort()
642 missing.sort()
640 return [self.node(r) for r in missing]
643 return [self.node(r) for r in missing]
641
644
642 def nodesbetween(self, roots=None, heads=None):
645 def nodesbetween(self, roots=None, heads=None):
643 """Return a topological path from 'roots' to 'heads'.
646 """Return a topological path from 'roots' to 'heads'.
644
647
645 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
648 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
646 topologically sorted list of all nodes N that satisfy both of
649 topologically sorted list of all nodes N that satisfy both of
647 these constraints:
650 these constraints:
648
651
649 1. N is a descendant of some node in 'roots'
652 1. N is a descendant of some node in 'roots'
650 2. N is an ancestor of some node in 'heads'
653 2. N is an ancestor of some node in 'heads'
651
654
652 Every node is considered to be both a descendant and an ancestor
655 Every node is considered to be both a descendant and an ancestor
653 of itself, so every reachable node in 'roots' and 'heads' will be
656 of itself, so every reachable node in 'roots' and 'heads' will be
654 included in 'nodes'.
657 included in 'nodes'.
655
658
656 'outroots' is the list of reachable nodes in 'roots', i.e., the
659 'outroots' is the list of reachable nodes in 'roots', i.e., the
657 subset of 'roots' that is returned in 'nodes'. Likewise,
660 subset of 'roots' that is returned in 'nodes'. Likewise,
658 'outheads' is the subset of 'heads' that is also in 'nodes'.
661 'outheads' is the subset of 'heads' that is also in 'nodes'.
659
662
660 'roots' and 'heads' are both lists of node IDs. If 'roots' is
663 'roots' and 'heads' are both lists of node IDs. If 'roots' is
661 unspecified, uses nullid as the only root. If 'heads' is
664 unspecified, uses nullid as the only root. If 'heads' is
662 unspecified, uses list of all of the revlog's heads."""
665 unspecified, uses list of all of the revlog's heads."""
663 nonodes = ([], [], [])
666 nonodes = ([], [], [])
664 if roots is not None:
667 if roots is not None:
665 roots = list(roots)
668 roots = list(roots)
666 if not roots:
669 if not roots:
667 return nonodes
670 return nonodes
668 lowestrev = min([self.rev(n) for n in roots])
671 lowestrev = min([self.rev(n) for n in roots])
669 else:
672 else:
670 roots = [nullid] # Everybody's a descendent of nullid
673 roots = [nullid] # Everybody's a descendent of nullid
671 lowestrev = nullrev
674 lowestrev = nullrev
672 if (lowestrev == nullrev) and (heads is None):
675 if (lowestrev == nullrev) and (heads is None):
673 # We want _all_ the nodes!
676 # We want _all_ the nodes!
674 return ([self.node(r) for r in self], [nullid], list(self.heads()))
677 return ([self.node(r) for r in self], [nullid], list(self.heads()))
675 if heads is None:
678 if heads is None:
676 # All nodes are ancestors, so the latest ancestor is the last
679 # All nodes are ancestors, so the latest ancestor is the last
677 # node.
680 # node.
678 highestrev = len(self) - 1
681 highestrev = len(self) - 1
679 # Set ancestors to None to signal that every node is an ancestor.
682 # Set ancestors to None to signal that every node is an ancestor.
680 ancestors = None
683 ancestors = None
681 # Set heads to an empty dictionary for later discovery of heads
684 # Set heads to an empty dictionary for later discovery of heads
682 heads = {}
685 heads = {}
683 else:
686 else:
684 heads = list(heads)
687 heads = list(heads)
685 if not heads:
688 if not heads:
686 return nonodes
689 return nonodes
687 ancestors = set()
690 ancestors = set()
688 # Turn heads into a dictionary so we can remove 'fake' heads.
691 # Turn heads into a dictionary so we can remove 'fake' heads.
689 # Also, later we will be using it to filter out the heads we can't
692 # Also, later we will be using it to filter out the heads we can't
690 # find from roots.
693 # find from roots.
691 heads = dict.fromkeys(heads, 0)
694 heads = dict.fromkeys(heads, 0)
692 # Start at the top and keep marking parents until we're done.
695 # Start at the top and keep marking parents until we're done.
693 nodestotag = set(heads)
696 nodestotag = set(heads)
694 # Remember where the top was so we can use it as a limit later.
697 # Remember where the top was so we can use it as a limit later.
695 highestrev = max([self.rev(n) for n in nodestotag])
698 highestrev = max([self.rev(n) for n in nodestotag])
696 while nodestotag:
699 while nodestotag:
697 # grab a node to tag
700 # grab a node to tag
698 n = nodestotag.pop()
701 n = nodestotag.pop()
699 # Never tag nullid
702 # Never tag nullid
700 if n == nullid:
703 if n == nullid:
701 continue
704 continue
702 # A node's revision number represents its place in a
705 # A node's revision number represents its place in a
703 # topologically sorted list of nodes.
706 # topologically sorted list of nodes.
704 r = self.rev(n)
707 r = self.rev(n)
705 if r >= lowestrev:
708 if r >= lowestrev:
706 if n not in ancestors:
709 if n not in ancestors:
707 # If we are possibly a descendent of one of the roots
710 # If we are possibly a descendent of one of the roots
708 # and we haven't already been marked as an ancestor
711 # and we haven't already been marked as an ancestor
709 ancestors.add(n) # Mark as ancestor
712 ancestors.add(n) # Mark as ancestor
710 # Add non-nullid parents to list of nodes to tag.
713 # Add non-nullid parents to list of nodes to tag.
711 nodestotag.update([p for p in self.parents(n) if
714 nodestotag.update([p for p in self.parents(n) if
712 p != nullid])
715 p != nullid])
713 elif n in heads: # We've seen it before, is it a fake head?
716 elif n in heads: # We've seen it before, is it a fake head?
714 # So it is, real heads should not be the ancestors of
717 # So it is, real heads should not be the ancestors of
715 # any other heads.
718 # any other heads.
716 heads.pop(n)
719 heads.pop(n)
717 if not ancestors:
720 if not ancestors:
718 return nonodes
721 return nonodes
719 # Now that we have our set of ancestors, we want to remove any
722 # Now that we have our set of ancestors, we want to remove any
720 # roots that are not ancestors.
723 # roots that are not ancestors.
721
724
722 # If one of the roots was nullid, everything is included anyway.
725 # If one of the roots was nullid, everything is included anyway.
723 if lowestrev > nullrev:
726 if lowestrev > nullrev:
724 # But, since we weren't, let's recompute the lowest rev to not
727 # But, since we weren't, let's recompute the lowest rev to not
725 # include roots that aren't ancestors.
728 # include roots that aren't ancestors.
726
729
727 # Filter out roots that aren't ancestors of heads
730 # Filter out roots that aren't ancestors of heads
728 roots = [n for n in roots if n in ancestors]
731 roots = [n for n in roots if n in ancestors]
729 # Recompute the lowest revision
732 # Recompute the lowest revision
730 if roots:
733 if roots:
731 lowestrev = min([self.rev(n) for n in roots])
734 lowestrev = min([self.rev(n) for n in roots])
732 else:
735 else:
733 # No more roots? Return empty list
736 # No more roots? Return empty list
734 return nonodes
737 return nonodes
735 else:
738 else:
736 # We are descending from nullid, and don't need to care about
739 # We are descending from nullid, and don't need to care about
737 # any other roots.
740 # any other roots.
738 lowestrev = nullrev
741 lowestrev = nullrev
739 roots = [nullid]
742 roots = [nullid]
740 # Transform our roots list into a set.
743 # Transform our roots list into a set.
741 descendents = set(roots)
744 descendents = set(roots)
742 # Also, keep the original roots so we can filter out roots that aren't
745 # Also, keep the original roots so we can filter out roots that aren't
743 # 'real' roots (i.e. are descended from other roots).
746 # 'real' roots (i.e. are descended from other roots).
744 roots = descendents.copy()
747 roots = descendents.copy()
745 # Our topologically sorted list of output nodes.
748 # Our topologically sorted list of output nodes.
746 orderedout = []
749 orderedout = []
747 # Don't start at nullid since we don't want nullid in our output list,
750 # Don't start at nullid since we don't want nullid in our output list,
748 # and if nullid shows up in descedents, empty parents will look like
751 # and if nullid shows up in descedents, empty parents will look like
749 # they're descendents.
752 # they're descendents.
750 for r in xrange(max(lowestrev, 0), highestrev + 1):
753 for r in xrange(max(lowestrev, 0), highestrev + 1):
751 n = self.node(r)
754 n = self.node(r)
752 isdescendent = False
755 isdescendent = False
753 if lowestrev == nullrev: # Everybody is a descendent of nullid
756 if lowestrev == nullrev: # Everybody is a descendent of nullid
754 isdescendent = True
757 isdescendent = True
755 elif n in descendents:
758 elif n in descendents:
756 # n is already a descendent
759 # n is already a descendent
757 isdescendent = True
760 isdescendent = True
758 # This check only needs to be done here because all the roots
761 # This check only needs to be done here because all the roots
759 # will start being marked is descendents before the loop.
762 # will start being marked is descendents before the loop.
760 if n in roots:
763 if n in roots:
761 # If n was a root, check if it's a 'real' root.
764 # If n was a root, check if it's a 'real' root.
762 p = tuple(self.parents(n))
765 p = tuple(self.parents(n))
763 # If any of its parents are descendents, it's not a root.
766 # If any of its parents are descendents, it's not a root.
764 if (p[0] in descendents) or (p[1] in descendents):
767 if (p[0] in descendents) or (p[1] in descendents):
765 roots.remove(n)
768 roots.remove(n)
766 else:
769 else:
767 p = tuple(self.parents(n))
770 p = tuple(self.parents(n))
768 # A node is a descendent if either of its parents are
771 # A node is a descendent if either of its parents are
769 # descendents. (We seeded the dependents list with the roots
772 # descendents. (We seeded the dependents list with the roots
770 # up there, remember?)
773 # up there, remember?)
771 if (p[0] in descendents) or (p[1] in descendents):
774 if (p[0] in descendents) or (p[1] in descendents):
772 descendents.add(n)
775 descendents.add(n)
773 isdescendent = True
776 isdescendent = True
774 if isdescendent and ((ancestors is None) or (n in ancestors)):
777 if isdescendent and ((ancestors is None) or (n in ancestors)):
775 # Only include nodes that are both descendents and ancestors.
778 # Only include nodes that are both descendents and ancestors.
776 orderedout.append(n)
779 orderedout.append(n)
777 if (ancestors is not None) and (n in heads):
780 if (ancestors is not None) and (n in heads):
778 # We're trying to figure out which heads are reachable
781 # We're trying to figure out which heads are reachable
779 # from roots.
782 # from roots.
780 # Mark this head as having been reached
783 # Mark this head as having been reached
781 heads[n] = 1
784 heads[n] = 1
782 elif ancestors is None:
785 elif ancestors is None:
783 # Otherwise, we're trying to discover the heads.
786 # Otherwise, we're trying to discover the heads.
784 # Assume this is a head because if it isn't, the next step
787 # Assume this is a head because if it isn't, the next step
785 # will eventually remove it.
788 # will eventually remove it.
786 heads[n] = 1
789 heads[n] = 1
787 # But, obviously its parents aren't.
790 # But, obviously its parents aren't.
788 for p in self.parents(n):
791 for p in self.parents(n):
789 heads.pop(p, None)
792 heads.pop(p, None)
790 heads = [n for n in heads.iterkeys() if heads[n] != 0]
793 heads = [n for n in heads.iterkeys() if heads[n] != 0]
791 roots = list(roots)
794 roots = list(roots)
792 assert orderedout
795 assert orderedout
793 assert roots
796 assert roots
794 assert heads
797 assert heads
795 return (orderedout, roots, heads)
798 return (orderedout, roots, heads)
796
799
797 def heads(self, start=None, stop=None):
800 def heads(self, start=None, stop=None):
798 """return the list of all nodes that have no children
801 """return the list of all nodes that have no children
799
802
800 if start is specified, only heads that are descendants of
803 if start is specified, only heads that are descendants of
801 start will be returned
804 start will be returned
802 if stop is specified, it will consider all the revs from stop
805 if stop is specified, it will consider all the revs from stop
803 as if they had no children
806 as if they had no children
804 """
807 """
805 if start is None and stop is None:
808 if start is None and stop is None:
806 count = len(self)
809 count = len(self)
807 if not count:
810 if not count:
808 return [nullid]
811 return [nullid]
809 ishead = [1] * (count + 1)
812 ishead = [1] * (count + 1)
810 index = self.index
813 index = self.index
811 for r in xrange(count):
814 for r in xrange(count):
812 e = index[r]
815 e = index[r]
813 ishead[e[5]] = ishead[e[6]] = 0
816 ishead[e[5]] = ishead[e[6]] = 0
814 return [self.node(r) for r in xrange(count) if ishead[r]]
817 return [self.node(r) for r in xrange(count) if ishead[r]]
815
818
816 if start is None:
819 if start is None:
817 start = nullid
820 start = nullid
818 if stop is None:
821 if stop is None:
819 stop = []
822 stop = []
820 stoprevs = set([self.rev(n) for n in stop])
823 stoprevs = set([self.rev(n) for n in stop])
821 startrev = self.rev(start)
824 startrev = self.rev(start)
822 reachable = set((startrev,))
825 reachable = set((startrev,))
823 heads = set((startrev,))
826 heads = set((startrev,))
824
827
825 parentrevs = self.parentrevs
828 parentrevs = self.parentrevs
826 for r in xrange(startrev + 1, len(self)):
829 for r in xrange(startrev + 1, len(self)):
827 for p in parentrevs(r):
830 for p in parentrevs(r):
828 if p in reachable:
831 if p in reachable:
829 if r not in stoprevs:
832 if r not in stoprevs:
830 reachable.add(r)
833 reachable.add(r)
831 heads.add(r)
834 heads.add(r)
832 if p in heads and p not in stoprevs:
835 if p in heads and p not in stoprevs:
833 heads.remove(p)
836 heads.remove(p)
834
837
835 return [self.node(r) for r in heads]
838 return [self.node(r) for r in heads]
836
839
837 def children(self, node):
840 def children(self, node):
838 """find the children of a given node"""
841 """find the children of a given node"""
839 c = []
842 c = []
840 p = self.rev(node)
843 p = self.rev(node)
841 for r in range(p + 1, len(self)):
844 for r in range(p + 1, len(self)):
842 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
845 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
843 if prevs:
846 if prevs:
844 for pr in prevs:
847 for pr in prevs:
845 if pr == p:
848 if pr == p:
846 c.append(self.node(r))
849 c.append(self.node(r))
847 elif p == nullrev:
850 elif p == nullrev:
848 c.append(self.node(r))
851 c.append(self.node(r))
849 return c
852 return c
850
853
851 def descendant(self, start, end):
854 def descendant(self, start, end):
852 for i in self.descendants(start):
855 for i in self.descendants(start):
853 if i == end:
856 if i == end:
854 return True
857 return True
855 elif i > end:
858 elif i > end:
856 break
859 break
857 return False
860 return False
858
861
859 def ancestor(self, a, b):
862 def ancestor(self, a, b):
860 """calculate the least common ancestor of nodes a and b"""
863 """calculate the least common ancestor of nodes a and b"""
861
864
862 # fast path, check if it is a descendant
865 # fast path, check if it is a descendant
863 a, b = self.rev(a), self.rev(b)
866 a, b = self.rev(a), self.rev(b)
864 start, end = sorted((a, b))
867 start, end = sorted((a, b))
865 if self.descendant(start, end):
868 if self.descendant(start, end):
866 return self.node(start)
869 return self.node(start)
867
870
868 def parents(rev):
871 def parents(rev):
869 return [p for p in self.parentrevs(rev) if p != nullrev]
872 return [p for p in self.parentrevs(rev) if p != nullrev]
870
873
871 c = ancestor.ancestor(a, b, parents)
874 c = ancestor.ancestor(a, b, parents)
872 if c is None:
875 if c is None:
873 return nullid
876 return nullid
874
877
875 return self.node(c)
878 return self.node(c)
876
879
877 def _match(self, id):
880 def _match(self, id):
878 if isinstance(id, (long, int)):
881 if isinstance(id, (long, int)):
879 # rev
882 # rev
880 return self.node(id)
883 return self.node(id)
881 if len(id) == 20:
884 if len(id) == 20:
882 # possibly a binary node
885 # possibly a binary node
883 # odds of a binary node being all hex in ASCII are 1 in 10**25
886 # odds of a binary node being all hex in ASCII are 1 in 10**25
884 try:
887 try:
885 node = id
888 node = id
886 self.rev(node) # quick search the index
889 self.rev(node) # quick search the index
887 return node
890 return node
888 except LookupError:
891 except LookupError:
889 pass # may be partial hex id
892 pass # may be partial hex id
890 try:
893 try:
891 # str(rev)
894 # str(rev)
892 rev = int(id)
895 rev = int(id)
893 if str(rev) != id:
896 if str(rev) != id:
894 raise ValueError
897 raise ValueError
895 if rev < 0:
898 if rev < 0:
896 rev = len(self) + rev
899 rev = len(self) + rev
897 if rev < 0 or rev >= len(self):
900 if rev < 0 or rev >= len(self):
898 raise ValueError
901 raise ValueError
899 return self.node(rev)
902 return self.node(rev)
900 except (ValueError, OverflowError):
903 except (ValueError, OverflowError):
901 pass
904 pass
902 if len(id) == 40:
905 if len(id) == 40:
903 try:
906 try:
904 # a full hex nodeid?
907 # a full hex nodeid?
905 node = bin(id)
908 node = bin(id)
906 self.rev(node)
909 self.rev(node)
907 return node
910 return node
908 except (TypeError, LookupError):
911 except (TypeError, LookupError):
909 pass
912 pass
910
913
911 def _partialmatch(self, id):
914 def _partialmatch(self, id):
912 if len(id) < 40:
915 if len(id) < 40:
913 try:
916 try:
914 # hex(node)[:...]
917 # hex(node)[:...]
915 l = len(id) // 2 # grab an even number of digits
918 l = len(id) // 2 # grab an even number of digits
916 bin_id = bin(id[:l * 2])
919 bin_id = bin(id[:l * 2])
917 nl = [n for n in self.nodemap if n[:l] == bin_id]
920 nl = [n for n in self.nodemap if n[:l] == bin_id]
918 nl = [n for n in nl if hex(n).startswith(id)]
921 nl = [n for n in nl if hex(n).startswith(id)]
919 if len(nl) > 0:
922 if len(nl) > 0:
920 if len(nl) == 1:
923 if len(nl) == 1:
921 return nl[0]
924 return nl[0]
922 raise LookupError(id, self.indexfile,
925 raise LookupError(id, self.indexfile,
923 _('ambiguous identifier'))
926 _('ambiguous identifier'))
924 return None
927 return None
925 except TypeError:
928 except TypeError:
926 pass
929 pass
927
930
928 def lookup(self, id):
931 def lookup(self, id):
929 """locate a node based on:
932 """locate a node based on:
930 - revision number or str(revision number)
933 - revision number or str(revision number)
931 - nodeid or subset of hex nodeid
934 - nodeid or subset of hex nodeid
932 """
935 """
933 n = self._match(id)
936 n = self._match(id)
934 if n is not None:
937 if n is not None:
935 return n
938 return n
936 n = self._partialmatch(id)
939 n = self._partialmatch(id)
937 if n:
940 if n:
938 return n
941 return n
939
942
940 raise LookupError(id, self.indexfile, _('no match found'))
943 raise LookupError(id, self.indexfile, _('no match found'))
941
944
942 def cmp(self, node, text):
945 def cmp(self, node, text):
943 """compare text with a given file revision"""
946 """compare text with a given file revision"""
944 p1, p2 = self.parents(node)
947 p1, p2 = self.parents(node)
945 return hash(text, p1, p2) != node
948 return hash(text, p1, p2) != node
946
949
947 def _addchunk(self, offset, data):
950 def _addchunk(self, offset, data):
948 o, d = self._chunkcache
951 o, d = self._chunkcache
949 # try to add to existing cache
952 # try to add to existing cache
950 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
953 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
951 self._chunkcache = o, d + data
954 self._chunkcache = o, d + data
952 else:
955 else:
953 self._chunkcache = offset, data
956 self._chunkcache = offset, data
954
957
955 def _loadchunk(self, offset, length):
958 def _loadchunk(self, offset, length):
956 if self._inline:
959 if self._inline:
957 df = self.opener(self.indexfile)
960 df = self.opener(self.indexfile)
958 else:
961 else:
959 df = self.opener(self.datafile)
962 df = self.opener(self.datafile)
960
963
961 readahead = max(65536, length)
964 readahead = max(65536, length)
962 df.seek(offset)
965 df.seek(offset)
963 d = df.read(readahead)
966 d = df.read(readahead)
964 self._addchunk(offset, d)
967 self._addchunk(offset, d)
965 if readahead > length:
968 if readahead > length:
966 return d[:length]
969 return d[:length]
967 return d
970 return d
968
971
969 def _getchunk(self, offset, length):
972 def _getchunk(self, offset, length):
970 o, d = self._chunkcache
973 o, d = self._chunkcache
971 l = len(d)
974 l = len(d)
972
975
973 # is it in the cache?
976 # is it in the cache?
974 cachestart = offset - o
977 cachestart = offset - o
975 cacheend = cachestart + length
978 cacheend = cachestart + length
976 if cachestart >= 0 and cacheend <= l:
979 if cachestart >= 0 and cacheend <= l:
977 if cachestart == 0 and cacheend == l:
980 if cachestart == 0 and cacheend == l:
978 return d # avoid a copy
981 return d # avoid a copy
979 return d[cachestart:cacheend]
982 return d[cachestart:cacheend]
980
983
981 return self._loadchunk(offset, length)
984 return self._loadchunk(offset, length)
982
985
983 def _chunkraw(self, startrev, endrev):
986 def _chunkraw(self, startrev, endrev):
984 start = self.start(startrev)
987 start = self.start(startrev)
985 length = self.end(endrev) - start
988 length = self.end(endrev) - start
986 if self._inline:
989 if self._inline:
987 start += (startrev + 1) * self._io.size
990 start += (startrev + 1) * self._io.size
988 return self._getchunk(start, length)
991 return self._getchunk(start, length)
989
992
990 def _chunk(self, rev):
993 def _chunk(self, rev):
991 return decompress(self._chunkraw(rev, rev))
994 return decompress(self._chunkraw(rev, rev))
992
995
993 def _chunkclear(self):
996 def _chunkclear(self):
994 self._chunkcache = (0, '')
997 self._chunkcache = (0, '')
995
998
996 def revdiff(self, rev1, rev2):
999 def revdiff(self, rev1, rev2):
997 """return or calculate a delta between two revisions"""
1000 """return or calculate a delta between two revisions"""
998 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1001 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
999 return self._chunk(rev2)
1002 return self._chunk(rev2)
1000
1003
1001 return mdiff.textdiff(self.revision(self.node(rev1)),
1004 return mdiff.textdiff(self.revision(self.node(rev1)),
1002 self.revision(self.node(rev2)))
1005 self.revision(self.node(rev2)))
1003
1006
1004 def revision(self, node):
1007 def revision(self, node):
1005 """return an uncompressed revision of a given node"""
1008 """return an uncompressed revision of a given node"""
1006 if node == nullid:
1009 if node == nullid:
1007 return ""
1010 return ""
1008 if self._cache and self._cache[0] == node:
1011 if self._cache and self._cache[0] == node:
1009 return self._cache[2]
1012 return self._cache[2]
1010
1013
1011 # look up what we need to read
1014 # look up what we need to read
1012 text = None
1015 text = None
1013 rev = self.rev(node)
1016 rev = self.rev(node)
1014 base = self.base(rev)
1017 base = self.base(rev)
1015
1018
1016 # check rev flags
1019 # check rev flags
1017 if self.index[rev][0] & 0xFFFF:
1020 if self.index[rev][0] & 0xFFFF:
1018 raise RevlogError(_('incompatible revision flag %x') %
1021 raise RevlogError(_('incompatible revision flag %x') %
1019 (self.index[rev][0] & 0xFFFF))
1022 (self.index[rev][0] & 0xFFFF))
1020
1023
1021 # do we have useful data cached?
1024 # do we have useful data cached?
1022 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1025 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1023 base = self._cache[1]
1026 base = self._cache[1]
1024 text = self._cache[2]
1027 text = self._cache[2]
1025
1028
1026 self._loadindex(base, rev + 1)
1029 self._loadindex(base, rev + 1)
1027 self._chunkraw(base, rev)
1030 self._chunkraw(base, rev)
1028 if text is None:
1031 if text is None:
1029 text = self._chunk(base)
1032 text = self._chunk(base)
1030
1033
1031 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1034 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1032 text = mdiff.patches(text, bins)
1035 text = mdiff.patches(text, bins)
1033 p1, p2 = self.parents(node)
1036 p1, p2 = self.parents(node)
1034 if node != hash(text, p1, p2):
1037 if node != hash(text, p1, p2):
1035 raise RevlogError(_("integrity check failed on %s:%d")
1038 raise RevlogError(_("integrity check failed on %s:%d")
1036 % (self.indexfile, rev))
1039 % (self.indexfile, rev))
1037
1040
1038 self._cache = (node, rev, text)
1041 self._cache = (node, rev, text)
1039 return text
1042 return text
1040
1043
1041 def checkinlinesize(self, tr, fp=None):
1044 def checkinlinesize(self, tr, fp=None):
1042 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1045 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1043 return
1046 return
1044
1047
1045 trinfo = tr.find(self.indexfile)
1048 trinfo = tr.find(self.indexfile)
1046 if trinfo is None:
1049 if trinfo is None:
1047 raise RevlogError(_("%s not found in the transaction")
1050 raise RevlogError(_("%s not found in the transaction")
1048 % self.indexfile)
1051 % self.indexfile)
1049
1052
1050 trindex = trinfo[2]
1053 trindex = trinfo[2]
1051 dataoff = self.start(trindex)
1054 dataoff = self.start(trindex)
1052
1055
1053 tr.add(self.datafile, dataoff)
1056 tr.add(self.datafile, dataoff)
1054
1057
1055 if fp:
1058 if fp:
1056 fp.flush()
1059 fp.flush()
1057 fp.close()
1060 fp.close()
1058
1061
1059 df = self.opener(self.datafile, 'w')
1062 df = self.opener(self.datafile, 'w')
1060 try:
1063 try:
1061 for r in self:
1064 for r in self:
1062 df.write(self._chunkraw(r, r))
1065 df.write(self._chunkraw(r, r))
1063 finally:
1066 finally:
1064 df.close()
1067 df.close()
1065
1068
1066 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1069 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1067 self.version &= ~(REVLOGNGINLINEDATA)
1070 self.version &= ~(REVLOGNGINLINEDATA)
1068 self._inline = False
1071 self._inline = False
1069 for i in self:
1072 for i in self:
1070 e = self._io.packentry(self.index[i], self.node, self.version, i)
1073 e = self._io.packentry(self.index[i], self.node, self.version, i)
1071 fp.write(e)
1074 fp.write(e)
1072
1075
1073 # if we don't call rename, the temp file will never replace the
1076 # if we don't call rename, the temp file will never replace the
1074 # real index
1077 # real index
1075 fp.rename()
1078 fp.rename()
1076
1079
1077 tr.replace(self.indexfile, trindex * self._io.size)
1080 tr.replace(self.indexfile, trindex * self._io.size)
1078 self._chunkclear()
1081 self._chunkclear()
1079
1082
1080 def addrevision(self, text, transaction, link, p1, p2, d=None):
1083 def addrevision(self, text, transaction, link, p1, p2, d=None):
1081 """add a revision to the log
1084 """add a revision to the log
1082
1085
1083 text - the revision data to add
1086 text - the revision data to add
1084 transaction - the transaction object used for rollback
1087 transaction - the transaction object used for rollback
1085 link - the linkrev data to add
1088 link - the linkrev data to add
1086 p1, p2 - the parent nodeids of the revision
1089 p1, p2 - the parent nodeids of the revision
1087 d - an optional precomputed delta
1090 d - an optional precomputed delta
1088 """
1091 """
1089 dfh = None
1092 dfh = None
1090 if not self._inline:
1093 if not self._inline:
1091 dfh = self.opener(self.datafile, "a")
1094 dfh = self.opener(self.datafile, "a")
1092 ifh = self.opener(self.indexfile, "a+")
1095 ifh = self.opener(self.indexfile, "a+")
1093 try:
1096 try:
1094 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1097 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1095 finally:
1098 finally:
1096 if dfh:
1099 if dfh:
1097 dfh.close()
1100 dfh.close()
1098 ifh.close()
1101 ifh.close()
1099
1102
1100 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1103 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1101 node = hash(text, p1, p2)
1104 node = hash(text, p1, p2)
1102 if node in self.nodemap:
1105 if node in self.nodemap:
1103 return node
1106 return node
1104
1107
1105 curr = len(self)
1108 curr = len(self)
1106 prev = curr - 1
1109 prev = curr - 1
1107 base = self.base(prev)
1110 base = self.base(prev)
1108 offset = self.end(prev)
1111 offset = self.end(prev)
1109
1112
1110 if curr:
1113 if curr:
1111 if not d:
1114 if not d:
1112 ptext = self.revision(self.node(prev))
1115 ptext = self.revision(self.node(prev))
1113 d = mdiff.textdiff(ptext, text)
1116 d = mdiff.textdiff(ptext, text)
1114 data = compress(d)
1117 data = compress(d)
1115 l = len(data[1]) + len(data[0])
1118 l = len(data[1]) + len(data[0])
1116 dist = l + offset - self.start(base)
1119 dist = l + offset - self.start(base)
1117
1120
1118 # full versions are inserted when the needed deltas
1121 # full versions are inserted when the needed deltas
1119 # become comparable to the uncompressed text
1122 # become comparable to the uncompressed text
1120 if not curr or dist > len(text) * 2:
1123 if not curr or dist > len(text) * 2:
1121 data = compress(text)
1124 data = compress(text)
1122 l = len(data[1]) + len(data[0])
1125 l = len(data[1]) + len(data[0])
1123 base = curr
1126 base = curr
1124
1127
1125 e = (offset_type(offset, 0), l, len(text),
1128 e = (offset_type(offset, 0), l, len(text),
1126 base, link, self.rev(p1), self.rev(p2), node)
1129 base, link, self.rev(p1), self.rev(p2), node)
1127 self.index.insert(-1, e)
1130 self.index.insert(-1, e)
1128 self.nodemap[node] = curr
1131 self.nodemap[node] = curr
1129
1132
1130 entry = self._io.packentry(e, self.node, self.version, curr)
1133 entry = self._io.packentry(e, self.node, self.version, curr)
1131 if not self._inline:
1134 if not self._inline:
1132 transaction.add(self.datafile, offset)
1135 transaction.add(self.datafile, offset)
1133 transaction.add(self.indexfile, curr * len(entry))
1136 transaction.add(self.indexfile, curr * len(entry))
1134 if data[0]:
1137 if data[0]:
1135 dfh.write(data[0])
1138 dfh.write(data[0])
1136 dfh.write(data[1])
1139 dfh.write(data[1])
1137 dfh.flush()
1140 dfh.flush()
1138 ifh.write(entry)
1141 ifh.write(entry)
1139 else:
1142 else:
1140 offset += curr * self._io.size
1143 offset += curr * self._io.size
1141 transaction.add(self.indexfile, offset, curr)
1144 transaction.add(self.indexfile, offset, curr)
1142 ifh.write(entry)
1145 ifh.write(entry)
1143 ifh.write(data[0])
1146 ifh.write(data[0])
1144 ifh.write(data[1])
1147 ifh.write(data[1])
1145 self.checkinlinesize(transaction, ifh)
1148 self.checkinlinesize(transaction, ifh)
1146
1149
1147 if type(text) == str: # only accept immutable objects
1150 if type(text) == str: # only accept immutable objects
1148 self._cache = (node, curr, text)
1151 self._cache = (node, curr, text)
1149 return node
1152 return node
1150
1153
1151 def group(self, nodelist, lookup, infocollect=None):
1154 def group(self, nodelist, lookup, infocollect=None):
1152 """Calculate a delta group, yielding a sequence of changegroup chunks
1155 """Calculate a delta group, yielding a sequence of changegroup chunks
1153 (strings).
1156 (strings).
1154
1157
1155 Given a list of changeset revs, return a set of deltas and
1158 Given a list of changeset revs, return a set of deltas and
1156 metadata corresponding to nodes. the first delta is
1159 metadata corresponding to nodes. the first delta is
1157 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1160 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1158 have this parent as it has all history before these
1161 have this parent as it has all history before these
1159 changesets. parent is parent[0]
1162 changesets. parent is parent[0]
1160 """
1163 """
1161
1164
1162 revs = [self.rev(n) for n in nodelist]
1165 revs = [self.rev(n) for n in nodelist]
1163
1166
1164 # if we don't have any revisions touched by these changesets, bail
1167 # if we don't have any revisions touched by these changesets, bail
1165 if not revs:
1168 if not revs:
1166 yield changegroup.closechunk()
1169 yield changegroup.closechunk()
1167 return
1170 return
1168
1171
1169 # add the parent of the first rev
1172 # add the parent of the first rev
1170 p = self.parentrevs(revs[0])[0]
1173 p = self.parentrevs(revs[0])[0]
1171 revs.insert(0, p)
1174 revs.insert(0, p)
1172
1175
1173 # build deltas
1176 # build deltas
1174 for d in xrange(len(revs) - 1):
1177 for d in xrange(len(revs) - 1):
1175 a, b = revs[d], revs[d + 1]
1178 a, b = revs[d], revs[d + 1]
1176 nb = self.node(b)
1179 nb = self.node(b)
1177
1180
1178 if infocollect is not None:
1181 if infocollect is not None:
1179 infocollect(nb)
1182 infocollect(nb)
1180
1183
1181 p = self.parents(nb)
1184 p = self.parents(nb)
1182 meta = nb + p[0] + p[1] + lookup(nb)
1185 meta = nb + p[0] + p[1] + lookup(nb)
1183 if a == -1:
1186 if a == -1:
1184 d = self.revision(nb)
1187 d = self.revision(nb)
1185 meta += mdiff.trivialdiffheader(len(d))
1188 meta += mdiff.trivialdiffheader(len(d))
1186 else:
1189 else:
1187 d = self.revdiff(a, b)
1190 d = self.revdiff(a, b)
1188 yield changegroup.chunkheader(len(meta) + len(d))
1191 yield changegroup.chunkheader(len(meta) + len(d))
1189 yield meta
1192 yield meta
1190 if len(d) > 2**20:
1193 if len(d) > 2**20:
1191 pos = 0
1194 pos = 0
1192 while pos < len(d):
1195 while pos < len(d):
1193 pos2 = pos + 2 ** 18
1196 pos2 = pos + 2 ** 18
1194 yield d[pos:pos2]
1197 yield d[pos:pos2]
1195 pos = pos2
1198 pos = pos2
1196 else:
1199 else:
1197 yield d
1200 yield d
1198
1201
1199 yield changegroup.closechunk()
1202 yield changegroup.closechunk()
1200
1203
1201 def addgroup(self, revs, linkmapper, transaction):
1204 def addgroup(self, revs, linkmapper, transaction):
1202 """
1205 """
1203 add a delta group
1206 add a delta group
1204
1207
1205 given a set of deltas, add them to the revision log. the
1208 given a set of deltas, add them to the revision log. the
1206 first delta is against its parent, which should be in our
1209 first delta is against its parent, which should be in our
1207 log, the rest are against the previous delta.
1210 log, the rest are against the previous delta.
1208 """
1211 """
1209
1212
1210 #track the base of the current delta log
1213 #track the base of the current delta log
1211 r = len(self)
1214 r = len(self)
1212 t = r - 1
1215 t = r - 1
1213 node = None
1216 node = None
1214
1217
1215 base = prev = nullrev
1218 base = prev = nullrev
1216 start = end = textlen = 0
1219 start = end = textlen = 0
1217 if r:
1220 if r:
1218 end = self.end(t)
1221 end = self.end(t)
1219
1222
1220 ifh = self.opener(self.indexfile, "a+")
1223 ifh = self.opener(self.indexfile, "a+")
1221 isize = r * self._io.size
1224 isize = r * self._io.size
1222 if self._inline:
1225 if self._inline:
1223 transaction.add(self.indexfile, end + isize, r)
1226 transaction.add(self.indexfile, end + isize, r)
1224 dfh = None
1227 dfh = None
1225 else:
1228 else:
1226 transaction.add(self.indexfile, isize, r)
1229 transaction.add(self.indexfile, isize, r)
1227 transaction.add(self.datafile, end)
1230 transaction.add(self.datafile, end)
1228 dfh = self.opener(self.datafile, "a")
1231 dfh = self.opener(self.datafile, "a")
1229
1232
1230 try:
1233 try:
1231 # loop through our set of deltas
1234 # loop through our set of deltas
1232 chain = None
1235 chain = None
1233 for chunk in revs:
1236 for chunk in revs:
1234 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1237 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1235 link = linkmapper(cs)
1238 link = linkmapper(cs)
1236 if node in self.nodemap:
1239 if node in self.nodemap:
1237 # this can happen if two branches make the same change
1240 # this can happen if two branches make the same change
1238 chain = node
1241 chain = node
1239 continue
1242 continue
1240 delta = buffer(chunk, 80)
1243 delta = buffer(chunk, 80)
1241 del chunk
1244 del chunk
1242
1245
1243 for p in (p1, p2):
1246 for p in (p1, p2):
1244 if not p in self.nodemap:
1247 if not p in self.nodemap:
1245 raise LookupError(p, self.indexfile, _('unknown parent'))
1248 raise LookupError(p, self.indexfile, _('unknown parent'))
1246
1249
1247 if not chain:
1250 if not chain:
1248 # retrieve the parent revision of the delta chain
1251 # retrieve the parent revision of the delta chain
1249 chain = p1
1252 chain = p1
1250 if not chain in self.nodemap:
1253 if not chain in self.nodemap:
1251 raise LookupError(chain, self.indexfile, _('unknown base'))
1254 raise LookupError(chain, self.indexfile, _('unknown base'))
1252
1255
1253 # full versions are inserted when the needed deltas become
1256 # full versions are inserted when the needed deltas become
1254 # comparable to the uncompressed text or when the previous
1257 # comparable to the uncompressed text or when the previous
1255 # version is not the one we have a delta against. We use
1258 # version is not the one we have a delta against. We use
1256 # the size of the previous full rev as a proxy for the
1259 # the size of the previous full rev as a proxy for the
1257 # current size.
1260 # current size.
1258
1261
1259 if chain == prev:
1262 if chain == prev:
1260 cdelta = compress(delta)
1263 cdelta = compress(delta)
1261 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1264 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1262 textlen = mdiff.patchedsize(textlen, delta)
1265 textlen = mdiff.patchedsize(textlen, delta)
1263
1266
1264 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1267 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1265 # flush our writes here so we can read it in revision
1268 # flush our writes here so we can read it in revision
1266 if dfh:
1269 if dfh:
1267 dfh.flush()
1270 dfh.flush()
1268 ifh.flush()
1271 ifh.flush()
1269 text = self.revision(chain)
1272 text = self.revision(chain)
1270 if len(text) == 0:
1273 if len(text) == 0:
1271 # skip over trivial delta header
1274 # skip over trivial delta header
1272 text = buffer(delta, 12)
1275 text = buffer(delta, 12)
1273 else:
1276 else:
1274 text = mdiff.patches(text, [delta])
1277 text = mdiff.patches(text, [delta])
1275 del delta
1278 del delta
1276 chk = self._addrevision(text, transaction, link, p1, p2, None,
1279 chk = self._addrevision(text, transaction, link, p1, p2, None,
1277 ifh, dfh)
1280 ifh, dfh)
1278 if not dfh and not self._inline:
1281 if not dfh and not self._inline:
1279 # addrevision switched from inline to conventional
1282 # addrevision switched from inline to conventional
1280 # reopen the index
1283 # reopen the index
1281 dfh = self.opener(self.datafile, "a")
1284 dfh = self.opener(self.datafile, "a")
1282 ifh = self.opener(self.indexfile, "a")
1285 ifh = self.opener(self.indexfile, "a")
1283 if chk != node:
1286 if chk != node:
1284 raise RevlogError(_("consistency error adding group"))
1287 raise RevlogError(_("consistency error adding group"))
1285 textlen = len(text)
1288 textlen = len(text)
1286 else:
1289 else:
1287 e = (offset_type(end, 0), cdeltalen, textlen, base,
1290 e = (offset_type(end, 0), cdeltalen, textlen, base,
1288 link, self.rev(p1), self.rev(p2), node)
1291 link, self.rev(p1), self.rev(p2), node)
1289 self.index.insert(-1, e)
1292 self.index.insert(-1, e)
1290 self.nodemap[node] = r
1293 self.nodemap[node] = r
1291 entry = self._io.packentry(e, self.node, self.version, r)
1294 entry = self._io.packentry(e, self.node, self.version, r)
1292 if self._inline:
1295 if self._inline:
1293 ifh.write(entry)
1296 ifh.write(entry)
1294 ifh.write(cdelta[0])
1297 ifh.write(cdelta[0])
1295 ifh.write(cdelta[1])
1298 ifh.write(cdelta[1])
1296 self.checkinlinesize(transaction, ifh)
1299 self.checkinlinesize(transaction, ifh)
1297 if not self._inline:
1300 if not self._inline:
1298 dfh = self.opener(self.datafile, "a")
1301 dfh = self.opener(self.datafile, "a")
1299 ifh = self.opener(self.indexfile, "a")
1302 ifh = self.opener(self.indexfile, "a")
1300 else:
1303 else:
1301 dfh.write(cdelta[0])
1304 dfh.write(cdelta[0])
1302 dfh.write(cdelta[1])
1305 dfh.write(cdelta[1])
1303 ifh.write(entry)
1306 ifh.write(entry)
1304
1307
1305 t, r, chain, prev = r, r + 1, node, node
1308 t, r, chain, prev = r, r + 1, node, node
1306 base = self.base(t)
1309 base = self.base(t)
1307 start = self.start(base)
1310 start = self.start(base)
1308 end = self.end(t)
1311 end = self.end(t)
1309 finally:
1312 finally:
1310 if dfh:
1313 if dfh:
1311 dfh.close()
1314 dfh.close()
1312 ifh.close()
1315 ifh.close()
1313
1316
1314 return node
1317 return node
1315
1318
1316 def strip(self, minlink, transaction):
1319 def strip(self, minlink, transaction):
1317 """truncate the revlog on the first revision with a linkrev >= minlink
1320 """truncate the revlog on the first revision with a linkrev >= minlink
1318
1321
1319 This function is called when we're stripping revision minlink and
1322 This function is called when we're stripping revision minlink and
1320 its descendants from the repository.
1323 its descendants from the repository.
1321
1324
1322 We have to remove all revisions with linkrev >= minlink, because
1325 We have to remove all revisions with linkrev >= minlink, because
1323 the equivalent changelog revisions will be renumbered after the
1326 the equivalent changelog revisions will be renumbered after the
1324 strip.
1327 strip.
1325
1328
1326 So we truncate the revlog on the first of these revisions, and
1329 So we truncate the revlog on the first of these revisions, and
1327 trust that the caller has saved the revisions that shouldn't be
1330 trust that the caller has saved the revisions that shouldn't be
1328 removed and that it'll readd them after this truncation.
1331 removed and that it'll readd them after this truncation.
1329 """
1332 """
1330 if len(self) == 0:
1333 if len(self) == 0:
1331 return
1334 return
1332
1335
1333 if isinstance(self.index, lazyindex):
1336 if isinstance(self.index, lazyindex):
1334 self._loadindexmap()
1337 self._loadindexmap()
1335
1338
1336 for rev in self:
1339 for rev in self:
1337 if self.index[rev][4] >= minlink:
1340 if self.index[rev][4] >= minlink:
1338 break
1341 break
1339 else:
1342 else:
1340 return
1343 return
1341
1344
1342 # first truncate the files on disk
1345 # first truncate the files on disk
1343 end = self.start(rev)
1346 end = self.start(rev)
1344 if not self._inline:
1347 if not self._inline:
1345 transaction.add(self.datafile, end)
1348 transaction.add(self.datafile, end)
1346 end = rev * self._io.size
1349 end = rev * self._io.size
1347 else:
1350 else:
1348 end += rev * self._io.size
1351 end += rev * self._io.size
1349
1352
1350 transaction.add(self.indexfile, end)
1353 transaction.add(self.indexfile, end)
1351
1354
1352 # then reset internal state in memory to forget those revisions
1355 # then reset internal state in memory to forget those revisions
1353 self._cache = None
1356 self._cache = None
1354 self._chunkclear()
1357 self._chunkclear()
1355 for x in xrange(rev, len(self)):
1358 for x in xrange(rev, len(self)):
1356 del self.nodemap[self.node(x)]
1359 del self.nodemap[self.node(x)]
1357
1360
1358 del self.index[rev:-1]
1361 del self.index[rev:-1]
1359
1362
1360 def checksize(self):
1363 def checksize(self):
1361 expected = 0
1364 expected = 0
1362 if len(self):
1365 if len(self):
1363 expected = max(0, self.end(len(self) - 1))
1366 expected = max(0, self.end(len(self) - 1))
1364
1367
1365 try:
1368 try:
1366 f = self.opener(self.datafile)
1369 f = self.opener(self.datafile)
1367 f.seek(0, 2)
1370 f.seek(0, 2)
1368 actual = f.tell()
1371 actual = f.tell()
1369 dd = actual - expected
1372 dd = actual - expected
1370 except IOError, inst:
1373 except IOError, inst:
1371 if inst.errno != errno.ENOENT:
1374 if inst.errno != errno.ENOENT:
1372 raise
1375 raise
1373 dd = 0
1376 dd = 0
1374
1377
1375 try:
1378 try:
1376 f = self.opener(self.indexfile)
1379 f = self.opener(self.indexfile)
1377 f.seek(0, 2)
1380 f.seek(0, 2)
1378 actual = f.tell()
1381 actual = f.tell()
1379 s = self._io.size
1382 s = self._io.size
1380 i = max(0, actual // s)
1383 i = max(0, actual // s)
1381 di = actual - (i * s)
1384 di = actual - (i * s)
1382 if self._inline:
1385 if self._inline:
1383 databytes = 0
1386 databytes = 0
1384 for r in self:
1387 for r in self:
1385 databytes += max(0, self.length(r))
1388 databytes += max(0, self.length(r))
1386 dd = 0
1389 dd = 0
1387 di = actual - len(self) * s - databytes
1390 di = actual - len(self) * s - databytes
1388 except IOError, inst:
1391 except IOError, inst:
1389 if inst.errno != errno.ENOENT:
1392 if inst.errno != errno.ENOENT:
1390 raise
1393 raise
1391 di = 0
1394 di = 0
1392
1395
1393 return (dd, di)
1396 return (dd, di)
1394
1397
1395 def files(self):
1398 def files(self):
1396 res = [self.indexfile]
1399 res = [self.indexfile]
1397 if not self._inline:
1400 if not self._inline:
1398 res.append(self.datafile)
1401 res.append(self.datafile)
1399 return res
1402 return res
@@ -1,144 +1,145
1 # statichttprepo.py - simple http repository class for mercurial
1 # statichttprepo.py - simple http repository class for mercurial
2 #
2 #
3 # This provides read-only repo access to repositories exported via static http
3 # This provides read-only repo access to repositories exported via static http
4 #
4 #
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from i18n import _
10 from i18n import _
11 import changelog, byterange, url, error
11 import changelog, byterange, url, error
12 import localrepo, manifest, util, store
12 import localrepo, manifest, util, store
13 import urllib, urllib2, errno
13 import urllib, urllib2, errno
14
14
15 class httprangereader(object):
15 class httprangereader(object):
16 def __init__(self, url, opener):
16 def __init__(self, url, opener):
17 # we assume opener has HTTPRangeHandler
17 # we assume opener has HTTPRangeHandler
18 self.url = url
18 self.url = url
19 self.pos = 0
19 self.pos = 0
20 self.opener = opener
20 self.opener = opener
21 self.name = url
21 self.name = url
22 def seek(self, pos):
22 def seek(self, pos):
23 self.pos = pos
23 self.pos = pos
24 def read(self, bytes=None):
24 def read(self, bytes=None):
25 req = urllib2.Request(self.url)
25 req = urllib2.Request(self.url)
26 end = ''
26 end = ''
27 if bytes:
27 if bytes:
28 end = self.pos + bytes - 1
28 end = self.pos + bytes - 1
29 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
29 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
30
30
31 try:
31 try:
32 f = self.opener.open(req)
32 f = self.opener.open(req)
33 data = f.read()
33 data = f.read()
34 if hasattr(f, 'getcode'):
34 if hasattr(f, 'getcode'):
35 # python 2.6+
35 # python 2.6+
36 code = f.getcode()
36 code = f.getcode()
37 elif hasattr(f, 'code'):
37 elif hasattr(f, 'code'):
38 # undocumented attribute, seems to be set in 2.4 and 2.5
38 # undocumented attribute, seems to be set in 2.4 and 2.5
39 code = f.code
39 code = f.code
40 else:
40 else:
41 # Don't know how to check, hope for the best.
41 # Don't know how to check, hope for the best.
42 code = 206
42 code = 206
43 except urllib2.HTTPError, inst:
43 except urllib2.HTTPError, inst:
44 num = inst.code == 404 and errno.ENOENT or None
44 num = inst.code == 404 and errno.ENOENT or None
45 raise IOError(num, inst)
45 raise IOError(num, inst)
46 except urllib2.URLError, inst:
46 except urllib2.URLError, inst:
47 raise IOError(None, inst.reason[1])
47 raise IOError(None, inst.reason[1])
48
48
49 if code == 200:
49 if code == 200:
50 # HTTPRangeHandler does nothing if remote does not support
50 # HTTPRangeHandler does nothing if remote does not support
51 # Range headers and returns the full entity. Let's slice it.
51 # Range headers and returns the full entity. Let's slice it.
52 if bytes:
52 if bytes:
53 data = data[self.pos:self.pos + bytes]
53 data = data[self.pos:self.pos + bytes]
54 else:
54 else:
55 data = data[self.pos:]
55 data = data[self.pos:]
56 elif bytes:
56 elif bytes:
57 data = data[:bytes]
57 data = data[:bytes]
58 self.pos += len(data)
58 self.pos += len(data)
59 return data
59 return data
60 def __iter__(self):
60 def __iter__(self):
61 return iter(self.read().splitlines(1))
61 return iter(self.read().splitlines(1))
62 def close(self):
62 def close(self):
63 pass
63 pass
64
64
65 def build_opener(ui, authinfo):
65 def build_opener(ui, authinfo):
66 # urllib cannot handle URLs with embedded user or passwd
66 # urllib cannot handle URLs with embedded user or passwd
67 urlopener = url.opener(ui, authinfo)
67 urlopener = url.opener(ui, authinfo)
68 urlopener.add_handler(byterange.HTTPRangeHandler())
68 urlopener.add_handler(byterange.HTTPRangeHandler())
69
69
70 def opener(base):
70 def opener(base):
71 """return a function that opens files over http"""
71 """return a function that opens files over http"""
72 p = base
72 p = base
73 def o(path, mode="r", atomictemp=None):
73 def o(path, mode="r", atomictemp=None):
74 if 'a' in mode or 'w' in mode:
74 if 'a' in mode or 'w' in mode:
75 raise IOError('Permission denied')
75 raise IOError('Permission denied')
76 f = "/".join((p, urllib.quote(path)))
76 f = "/".join((p, urllib.quote(path)))
77 return httprangereader(f, urlopener)
77 return httprangereader(f, urlopener)
78 return o
78 return o
79
79
80 opener.options = {'nonlazy': 1}
80 return opener
81 return opener
81
82
82 class statichttprepository(localrepo.localrepository):
83 class statichttprepository(localrepo.localrepository):
83 def __init__(self, ui, path):
84 def __init__(self, ui, path):
84 self._url = path
85 self._url = path
85 self.ui = ui
86 self.ui = ui
86
87
87 self.root = path
88 self.root = path
88 self.path, authinfo = url.getauthinfo(path.rstrip('/') + "/.hg")
89 self.path, authinfo = url.getauthinfo(path.rstrip('/') + "/.hg")
89
90
90 opener = build_opener(ui, authinfo)
91 opener = build_opener(ui, authinfo)
91 self.opener = opener(self.path)
92 self.opener = opener(self.path)
92
93
93 # find requirements
94 # find requirements
94 try:
95 try:
95 requirements = self.opener("requires").read().splitlines()
96 requirements = self.opener("requires").read().splitlines()
96 except IOError, inst:
97 except IOError, inst:
97 if inst.errno != errno.ENOENT:
98 if inst.errno != errno.ENOENT:
98 raise
99 raise
99 # check if it is a non-empty old-style repository
100 # check if it is a non-empty old-style repository
100 try:
101 try:
101 self.opener("00changelog.i").read(1)
102 self.opener("00changelog.i").read(1)
102 except IOError, inst:
103 except IOError, inst:
103 if inst.errno != errno.ENOENT:
104 if inst.errno != errno.ENOENT:
104 raise
105 raise
105 # we do not care about empty old-style repositories here
106 # we do not care about empty old-style repositories here
106 msg = _("'%s' does not appear to be an hg repository") % path
107 msg = _("'%s' does not appear to be an hg repository") % path
107 raise error.RepoError(msg)
108 raise error.RepoError(msg)
108 requirements = []
109 requirements = []
109
110
110 # check them
111 # check them
111 for r in requirements:
112 for r in requirements:
112 if r not in self.supported:
113 if r not in self.supported:
113 raise error.RepoError(_("requirement '%s' not supported") % r)
114 raise error.RepoError(_("requirement '%s' not supported") % r)
114
115
115 # setup store
116 # setup store
116 def pjoin(a, b):
117 def pjoin(a, b):
117 return a + '/' + b
118 return a + '/' + b
118 self.store = store.store(requirements, self.path, opener, pjoin)
119 self.store = store.store(requirements, self.path, opener, pjoin)
119 self.spath = self.store.path
120 self.spath = self.store.path
120 self.sopener = self.store.opener
121 self.sopener = self.store.opener
121 self.sjoin = self.store.join
122 self.sjoin = self.store.join
122
123
123 self.manifest = manifest.manifest(self.sopener)
124 self.manifest = manifest.manifest(self.sopener)
124 self.changelog = changelog.changelog(self.sopener)
125 self.changelog = changelog.changelog(self.sopener)
125 self._tags = None
126 self._tags = None
126 self.nodetagscache = None
127 self.nodetagscache = None
127 self._branchcache = None
128 self._branchcache = None
128 self._branchcachetip = None
129 self._branchcachetip = None
129 self.encodepats = None
130 self.encodepats = None
130 self.decodepats = None
131 self.decodepats = None
131
132
132 def url(self):
133 def url(self):
133 return self._url
134 return self._url
134
135
135 def local(self):
136 def local(self):
136 return False
137 return False
137
138
138 def lock(self, wait=True):
139 def lock(self, wait=True):
139 raise util.Abort(_('cannot lock static-http repository'))
140 raise util.Abort(_('cannot lock static-http repository'))
140
141
141 def instance(ui, path, create):
142 def instance(ui, path, create):
142 if create:
143 if create:
143 raise util.Abort(_('cannot create new static-http repository'))
144 raise util.Abort(_('cannot create new static-http repository'))
144 return statichttprepository(ui, path[7:])
145 return statichttprepository(ui, path[7:])
General Comments 0
You need to be logged in to leave comments. Login now