##// END OF EJS Templates
revlog: increase I/O bound to 4x the amount of data consumed...
Siddharth Agarwal -
r23288:2b9bc796 default
parent child Browse files
Show More
@@ -1,1503 +1,1503 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45 CensoredNodeError = error.CensoredNodeError
46
46
47 def getoffset(q):
47 def getoffset(q):
48 return int(q >> 16)
48 return int(q >> 16)
49
49
50 def gettype(q):
50 def gettype(q):
51 return int(q & 0xFFFF)
51 return int(q & 0xFFFF)
52
52
53 def offset_type(offset, type):
53 def offset_type(offset, type):
54 return long(long(offset) << 16 | type)
54 return long(long(offset) << 16 | type)
55
55
56 _nullhash = _sha(nullid)
56 _nullhash = _sha(nullid)
57
57
58 def hash(text, p1, p2):
58 def hash(text, p1, p2):
59 """generate a hash from the given text and its parent hashes
59 """generate a hash from the given text and its parent hashes
60
60
61 This hash combines both the current file contents and its history
61 This hash combines both the current file contents and its history
62 in a manner that makes it easy to distinguish nodes with the same
62 in a manner that makes it easy to distinguish nodes with the same
63 content in the revision graph.
63 content in the revision graph.
64 """
64 """
65 # As of now, if one of the parent node is null, p2 is null
65 # As of now, if one of the parent node is null, p2 is null
66 if p2 == nullid:
66 if p2 == nullid:
67 # deep copy of a hash is faster than creating one
67 # deep copy of a hash is faster than creating one
68 s = _nullhash.copy()
68 s = _nullhash.copy()
69 s.update(p1)
69 s.update(p1)
70 else:
70 else:
71 # none of the parent nodes are nullid
71 # none of the parent nodes are nullid
72 l = [p1, p2]
72 l = [p1, p2]
73 l.sort()
73 l.sort()
74 s = _sha(l[0])
74 s = _sha(l[0])
75 s.update(l[1])
75 s.update(l[1])
76 s.update(text)
76 s.update(text)
77 return s.digest()
77 return s.digest()
78
78
79 def decompress(bin):
79 def decompress(bin):
80 """ decompress the given input """
80 """ decompress the given input """
81 if not bin:
81 if not bin:
82 return bin
82 return bin
83 t = bin[0]
83 t = bin[0]
84 if t == '\0':
84 if t == '\0':
85 return bin
85 return bin
86 if t == 'x':
86 if t == 'x':
87 try:
87 try:
88 return _decompress(bin)
88 return _decompress(bin)
89 except zlib.error, e:
89 except zlib.error, e:
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 if t == 'u':
91 if t == 'u':
92 return bin[1:]
92 return bin[1:]
93 raise RevlogError(_("unknown compression type %r") % t)
93 raise RevlogError(_("unknown compression type %r") % t)
94
94
95 # index v0:
95 # index v0:
96 # 4 bytes: offset
96 # 4 bytes: offset
97 # 4 bytes: compressed length
97 # 4 bytes: compressed length
98 # 4 bytes: base rev
98 # 4 bytes: base rev
99 # 4 bytes: link rev
99 # 4 bytes: link rev
100 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 1 nodeid
101 # 32 bytes: parent 2 nodeid
101 # 32 bytes: parent 2 nodeid
102 # 32 bytes: nodeid
102 # 32 bytes: nodeid
103 indexformatv0 = ">4l20s20s20s"
103 indexformatv0 = ">4l20s20s20s"
104 v0shaoffset = 56
104 v0shaoffset = 56
105
105
106 class revlogoldio(object):
106 class revlogoldio(object):
107 def __init__(self):
107 def __init__(self):
108 self.size = struct.calcsize(indexformatv0)
108 self.size = struct.calcsize(indexformatv0)
109
109
110 def parseindex(self, data, inline):
110 def parseindex(self, data, inline):
111 s = self.size
111 s = self.size
112 index = []
112 index = []
113 nodemap = {nullid: nullrev}
113 nodemap = {nullid: nullrev}
114 n = off = 0
114 n = off = 0
115 l = len(data)
115 l = len(data)
116 while off + s <= l:
116 while off + s <= l:
117 cur = data[off:off + s]
117 cur = data[off:off + s]
118 off += s
118 off += s
119 e = _unpack(indexformatv0, cur)
119 e = _unpack(indexformatv0, cur)
120 # transform to revlogv1 format
120 # transform to revlogv1 format
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 index.append(e2)
123 index.append(e2)
124 nodemap[e[6]] = n
124 nodemap[e[6]] = n
125 n += 1
125 n += 1
126
126
127 # add the magic null revision at -1
127 # add the magic null revision at -1
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129
129
130 return index, nodemap, None
130 return index, nodemap, None
131
131
132 def packentry(self, entry, node, version, rev):
132 def packentry(self, entry, node, version, rev):
133 if gettype(entry[0]):
133 if gettype(entry[0]):
134 raise RevlogError(_("index entry flags need RevlogNG"))
134 raise RevlogError(_("index entry flags need RevlogNG"))
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 node(entry[5]), node(entry[6]), entry[7])
136 node(entry[5]), node(entry[6]), entry[7])
137 return _pack(indexformatv0, *e2)
137 return _pack(indexformatv0, *e2)
138
138
139 # index ng:
139 # index ng:
140 # 6 bytes: offset
140 # 6 bytes: offset
141 # 2 bytes: flags
141 # 2 bytes: flags
142 # 4 bytes: compressed length
142 # 4 bytes: compressed length
143 # 4 bytes: uncompressed length
143 # 4 bytes: uncompressed length
144 # 4 bytes: base rev
144 # 4 bytes: base rev
145 # 4 bytes: link rev
145 # 4 bytes: link rev
146 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 1 rev
147 # 4 bytes: parent 2 rev
147 # 4 bytes: parent 2 rev
148 # 32 bytes: nodeid
148 # 32 bytes: nodeid
149 indexformatng = ">Qiiiiii20s12x"
149 indexformatng = ">Qiiiiii20s12x"
150 ngshaoffset = 32
150 ngshaoffset = 32
151 versionformat = ">I"
151 versionformat = ">I"
152
152
153 class revlogio(object):
153 class revlogio(object):
154 def __init__(self):
154 def __init__(self):
155 self.size = struct.calcsize(indexformatng)
155 self.size = struct.calcsize(indexformatng)
156
156
157 def parseindex(self, data, inline):
157 def parseindex(self, data, inline):
158 # call the C implementation to parse the index data
158 # call the C implementation to parse the index data
159 index, cache = parsers.parse_index2(data, inline)
159 index, cache = parsers.parse_index2(data, inline)
160 return index, getattr(index, 'nodemap', None), cache
160 return index, getattr(index, 'nodemap', None), cache
161
161
162 def packentry(self, entry, node, version, rev):
162 def packentry(self, entry, node, version, rev):
163 p = _pack(indexformatng, *entry)
163 p = _pack(indexformatng, *entry)
164 if rev == 0:
164 if rev == 0:
165 p = _pack(versionformat, version) + p[4:]
165 p = _pack(versionformat, version) + p[4:]
166 return p
166 return p
167
167
168 class revlog(object):
168 class revlog(object):
169 """
169 """
170 the underlying revision storage object
170 the underlying revision storage object
171
171
172 A revlog consists of two parts, an index and the revision data.
172 A revlog consists of two parts, an index and the revision data.
173
173
174 The index is a file with a fixed record size containing
174 The index is a file with a fixed record size containing
175 information on each revision, including its nodeid (hash), the
175 information on each revision, including its nodeid (hash), the
176 nodeids of its parents, the position and offset of its data within
176 nodeids of its parents, the position and offset of its data within
177 the data file, and the revision it's based on. Finally, each entry
177 the data file, and the revision it's based on. Finally, each entry
178 contains a linkrev entry that can serve as a pointer to external
178 contains a linkrev entry that can serve as a pointer to external
179 data.
179 data.
180
180
181 The revision data itself is a linear collection of data chunks.
181 The revision data itself is a linear collection of data chunks.
182 Each chunk represents a revision and is usually represented as a
182 Each chunk represents a revision and is usually represented as a
183 delta against the previous chunk. To bound lookup time, runs of
183 delta against the previous chunk. To bound lookup time, runs of
184 deltas are limited to about 2 times the length of the original
184 deltas are limited to about 2 times the length of the original
185 version data. This makes retrieval of a version proportional to
185 version data. This makes retrieval of a version proportional to
186 its size, or O(1) relative to the number of revisions.
186 its size, or O(1) relative to the number of revisions.
187
187
188 Both pieces of the revlog are written to in an append-only
188 Both pieces of the revlog are written to in an append-only
189 fashion, which means we never need to rewrite a file to insert or
189 fashion, which means we never need to rewrite a file to insert or
190 remove data, and can use some simple techniques to avoid the need
190 remove data, and can use some simple techniques to avoid the need
191 for locking while reading.
191 for locking while reading.
192 """
192 """
193 def __init__(self, opener, indexfile):
193 def __init__(self, opener, indexfile):
194 """
194 """
195 create a revlog object
195 create a revlog object
196
196
197 opener is a function that abstracts the file opening operation
197 opener is a function that abstracts the file opening operation
198 and can be used to implement COW semantics or the like.
198 and can be used to implement COW semantics or the like.
199 """
199 """
200 self.indexfile = indexfile
200 self.indexfile = indexfile
201 self.datafile = indexfile[:-2] + ".d"
201 self.datafile = indexfile[:-2] + ".d"
202 self.opener = opener
202 self.opener = opener
203 self._cache = None
203 self._cache = None
204 self._basecache = None
204 self._basecache = None
205 self._chunkcache = (0, '')
205 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536
206 self._chunkcachesize = 65536
207 self._maxchainlen = None
207 self._maxchainlen = None
208 self.index = []
208 self.index = []
209 self._pcache = {}
209 self._pcache = {}
210 self._nodecache = {nullid: nullrev}
210 self._nodecache = {nullid: nullrev}
211 self._nodepos = None
211 self._nodepos = None
212
212
213 v = REVLOG_DEFAULT_VERSION
213 v = REVLOG_DEFAULT_VERSION
214 opts = getattr(opener, 'options', None)
214 opts = getattr(opener, 'options', None)
215 if opts is not None:
215 if opts is not None:
216 if 'revlogv1' in opts:
216 if 'revlogv1' in opts:
217 if 'generaldelta' in opts:
217 if 'generaldelta' in opts:
218 v |= REVLOGGENERALDELTA
218 v |= REVLOGGENERALDELTA
219 else:
219 else:
220 v = 0
220 v = 0
221 if 'chunkcachesize' in opts:
221 if 'chunkcachesize' in opts:
222 self._chunkcachesize = opts['chunkcachesize']
222 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
223 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
224 self._maxchainlen = opts['maxchainlen']
225
225
226 if self._chunkcachesize <= 0:
226 if self._chunkcachesize <= 0:
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 'than 0') % self._chunkcachesize)
228 'than 0') % self._chunkcachesize)
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 'of 2') % self._chunkcachesize)
231 'of 2') % self._chunkcachesize)
232
232
233 i = ''
233 i = ''
234 self._initempty = True
234 self._initempty = True
235 try:
235 try:
236 f = self.opener(self.indexfile)
236 f = self.opener(self.indexfile)
237 i = f.read()
237 i = f.read()
238 f.close()
238 f.close()
239 if len(i) > 0:
239 if len(i) > 0:
240 v = struct.unpack(versionformat, i[:4])[0]
240 v = struct.unpack(versionformat, i[:4])[0]
241 self._initempty = False
241 self._initempty = False
242 except IOError, inst:
242 except IOError, inst:
243 if inst.errno != errno.ENOENT:
243 if inst.errno != errno.ENOENT:
244 raise
244 raise
245
245
246 self.version = v
246 self.version = v
247 self._inline = v & REVLOGNGINLINEDATA
247 self._inline = v & REVLOGNGINLINEDATA
248 self._generaldelta = v & REVLOGGENERALDELTA
248 self._generaldelta = v & REVLOGGENERALDELTA
249 flags = v & ~0xFFFF
249 flags = v & ~0xFFFF
250 fmt = v & 0xFFFF
250 fmt = v & 0xFFFF
251 if fmt == REVLOGV0 and flags:
251 if fmt == REVLOGV0 and flags:
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 % (self.indexfile, flags >> 16))
253 % (self.indexfile, flags >> 16))
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 % (self.indexfile, flags >> 16))
256 % (self.indexfile, flags >> 16))
257 elif fmt > REVLOGNG:
257 elif fmt > REVLOGNG:
258 raise RevlogError(_("index %s unknown format %d")
258 raise RevlogError(_("index %s unknown format %d")
259 % (self.indexfile, fmt))
259 % (self.indexfile, fmt))
260
260
261 self._io = revlogio()
261 self._io = revlogio()
262 if self.version == REVLOGV0:
262 if self.version == REVLOGV0:
263 self._io = revlogoldio()
263 self._io = revlogoldio()
264 try:
264 try:
265 d = self._io.parseindex(i, self._inline)
265 d = self._io.parseindex(i, self._inline)
266 except (ValueError, IndexError):
266 except (ValueError, IndexError):
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 self.index, nodemap, self._chunkcache = d
268 self.index, nodemap, self._chunkcache = d
269 if nodemap is not None:
269 if nodemap is not None:
270 self.nodemap = self._nodecache = nodemap
270 self.nodemap = self._nodecache = nodemap
271 if not self._chunkcache:
271 if not self._chunkcache:
272 self._chunkclear()
272 self._chunkclear()
273
273
274 def tip(self):
274 def tip(self):
275 return self.node(len(self.index) - 2)
275 return self.node(len(self.index) - 2)
276 def __len__(self):
276 def __len__(self):
277 return len(self.index) - 1
277 return len(self.index) - 1
278 def __iter__(self):
278 def __iter__(self):
279 return iter(xrange(len(self)))
279 return iter(xrange(len(self)))
280 def revs(self, start=0, stop=None):
280 def revs(self, start=0, stop=None):
281 """iterate over all rev in this revlog (from start to stop)"""
281 """iterate over all rev in this revlog (from start to stop)"""
282 step = 1
282 step = 1
283 if stop is not None:
283 if stop is not None:
284 if start > stop:
284 if start > stop:
285 step = -1
285 step = -1
286 stop += step
286 stop += step
287 else:
287 else:
288 stop = len(self)
288 stop = len(self)
289 return xrange(start, stop, step)
289 return xrange(start, stop, step)
290
290
291 @util.propertycache
291 @util.propertycache
292 def nodemap(self):
292 def nodemap(self):
293 self.rev(self.node(0))
293 self.rev(self.node(0))
294 return self._nodecache
294 return self._nodecache
295
295
296 def hasnode(self, node):
296 def hasnode(self, node):
297 try:
297 try:
298 self.rev(node)
298 self.rev(node)
299 return True
299 return True
300 except KeyError:
300 except KeyError:
301 return False
301 return False
302
302
303 def clearcaches(self):
303 def clearcaches(self):
304 try:
304 try:
305 self._nodecache.clearcaches()
305 self._nodecache.clearcaches()
306 except AttributeError:
306 except AttributeError:
307 self._nodecache = {nullid: nullrev}
307 self._nodecache = {nullid: nullrev}
308 self._nodepos = None
308 self._nodepos = None
309
309
310 def rev(self, node):
310 def rev(self, node):
311 try:
311 try:
312 return self._nodecache[node]
312 return self._nodecache[node]
313 except TypeError:
313 except TypeError:
314 raise
314 raise
315 except RevlogError:
315 except RevlogError:
316 # parsers.c radix tree lookup failed
316 # parsers.c radix tree lookup failed
317 raise LookupError(node, self.indexfile, _('no node'))
317 raise LookupError(node, self.indexfile, _('no node'))
318 except KeyError:
318 except KeyError:
319 # pure python cache lookup failed
319 # pure python cache lookup failed
320 n = self._nodecache
320 n = self._nodecache
321 i = self.index
321 i = self.index
322 p = self._nodepos
322 p = self._nodepos
323 if p is None:
323 if p is None:
324 p = len(i) - 2
324 p = len(i) - 2
325 for r in xrange(p, -1, -1):
325 for r in xrange(p, -1, -1):
326 v = i[r][7]
326 v = i[r][7]
327 n[v] = r
327 n[v] = r
328 if v == node:
328 if v == node:
329 self._nodepos = r - 1
329 self._nodepos = r - 1
330 return r
330 return r
331 raise LookupError(node, self.indexfile, _('no node'))
331 raise LookupError(node, self.indexfile, _('no node'))
332
332
333 def node(self, rev):
333 def node(self, rev):
334 return self.index[rev][7]
334 return self.index[rev][7]
335 def linkrev(self, rev):
335 def linkrev(self, rev):
336 return self.index[rev][4]
336 return self.index[rev][4]
337 def parents(self, node):
337 def parents(self, node):
338 i = self.index
338 i = self.index
339 d = i[self.rev(node)]
339 d = i[self.rev(node)]
340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 def parentrevs(self, rev):
341 def parentrevs(self, rev):
342 return self.index[rev][5:7]
342 return self.index[rev][5:7]
343 def start(self, rev):
343 def start(self, rev):
344 return int(self.index[rev][0] >> 16)
344 return int(self.index[rev][0] >> 16)
345 def end(self, rev):
345 def end(self, rev):
346 return self.start(rev) + self.length(rev)
346 return self.start(rev) + self.length(rev)
347 def length(self, rev):
347 def length(self, rev):
348 return self.index[rev][1]
348 return self.index[rev][1]
349 def chainbase(self, rev):
349 def chainbase(self, rev):
350 index = self.index
350 index = self.index
351 base = index[rev][3]
351 base = index[rev][3]
352 while base != rev:
352 while base != rev:
353 rev = base
353 rev = base
354 base = index[rev][3]
354 base = index[rev][3]
355 return base
355 return base
356 def chainlen(self, rev):
356 def chainlen(self, rev):
357 return self._chaininfo(rev)[0]
357 return self._chaininfo(rev)[0]
358 def _chaininfo(self, rev):
358 def _chaininfo(self, rev):
359 index = self.index
359 index = self.index
360 generaldelta = self._generaldelta
360 generaldelta = self._generaldelta
361 iterrev = rev
361 iterrev = rev
362 e = index[iterrev]
362 e = index[iterrev]
363 clen = 0
363 clen = 0
364 compresseddeltalen = 0
364 compresseddeltalen = 0
365 while iterrev != e[3]:
365 while iterrev != e[3]:
366 clen += 1
366 clen += 1
367 compresseddeltalen += e[1]
367 compresseddeltalen += e[1]
368 if generaldelta:
368 if generaldelta:
369 iterrev = e[3]
369 iterrev = e[3]
370 else:
370 else:
371 iterrev -= 1
371 iterrev -= 1
372 e = index[iterrev]
372 e = index[iterrev]
373 # add text length of base since decompressing that also takes work
373 # add text length of base since decompressing that also takes work
374 compresseddeltalen += e[1]
374 compresseddeltalen += e[1]
375 return clen, compresseddeltalen
375 return clen, compresseddeltalen
376 def flags(self, rev):
376 def flags(self, rev):
377 return self.index[rev][0] & 0xFFFF
377 return self.index[rev][0] & 0xFFFF
378 def rawsize(self, rev):
378 def rawsize(self, rev):
379 """return the length of the uncompressed text for a given revision"""
379 """return the length of the uncompressed text for a given revision"""
380 l = self.index[rev][2]
380 l = self.index[rev][2]
381 if l >= 0:
381 if l >= 0:
382 return l
382 return l
383
383
384 t = self.revision(self.node(rev))
384 t = self.revision(self.node(rev))
385 return len(t)
385 return len(t)
386 size = rawsize
386 size = rawsize
387
387
388 def ancestors(self, revs, stoprev=0, inclusive=False):
388 def ancestors(self, revs, stoprev=0, inclusive=False):
389 """Generate the ancestors of 'revs' in reverse topological order.
389 """Generate the ancestors of 'revs' in reverse topological order.
390 Does not generate revs lower than stoprev.
390 Does not generate revs lower than stoprev.
391
391
392 See the documentation for ancestor.lazyancestors for more details."""
392 See the documentation for ancestor.lazyancestors for more details."""
393
393
394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
395 inclusive=inclusive)
395 inclusive=inclusive)
396
396
397 def descendants(self, revs):
397 def descendants(self, revs):
398 """Generate the descendants of 'revs' in revision order.
398 """Generate the descendants of 'revs' in revision order.
399
399
400 Yield a sequence of revision numbers starting with a child of
400 Yield a sequence of revision numbers starting with a child of
401 some rev in revs, i.e., each revision is *not* considered a
401 some rev in revs, i.e., each revision is *not* considered a
402 descendant of itself. Results are ordered by revision number (a
402 descendant of itself. Results are ordered by revision number (a
403 topological sort)."""
403 topological sort)."""
404 first = min(revs)
404 first = min(revs)
405 if first == nullrev:
405 if first == nullrev:
406 for i in self:
406 for i in self:
407 yield i
407 yield i
408 return
408 return
409
409
410 seen = set(revs)
410 seen = set(revs)
411 for i in self.revs(start=first + 1):
411 for i in self.revs(start=first + 1):
412 for x in self.parentrevs(i):
412 for x in self.parentrevs(i):
413 if x != nullrev and x in seen:
413 if x != nullrev and x in seen:
414 seen.add(i)
414 seen.add(i)
415 yield i
415 yield i
416 break
416 break
417
417
418 def findcommonmissing(self, common=None, heads=None):
418 def findcommonmissing(self, common=None, heads=None):
419 """Return a tuple of the ancestors of common and the ancestors of heads
419 """Return a tuple of the ancestors of common and the ancestors of heads
420 that are not ancestors of common. In revset terminology, we return the
420 that are not ancestors of common. In revset terminology, we return the
421 tuple:
421 tuple:
422
422
423 ::common, (::heads) - (::common)
423 ::common, (::heads) - (::common)
424
424
425 The list is sorted by revision number, meaning it is
425 The list is sorted by revision number, meaning it is
426 topologically sorted.
426 topologically sorted.
427
427
428 'heads' and 'common' are both lists of node IDs. If heads is
428 'heads' and 'common' are both lists of node IDs. If heads is
429 not supplied, uses all of the revlog's heads. If common is not
429 not supplied, uses all of the revlog's heads. If common is not
430 supplied, uses nullid."""
430 supplied, uses nullid."""
431 if common is None:
431 if common is None:
432 common = [nullid]
432 common = [nullid]
433 if heads is None:
433 if heads is None:
434 heads = self.heads()
434 heads = self.heads()
435
435
436 common = [self.rev(n) for n in common]
436 common = [self.rev(n) for n in common]
437 heads = [self.rev(n) for n in heads]
437 heads = [self.rev(n) for n in heads]
438
438
439 # we want the ancestors, but inclusive
439 # we want the ancestors, but inclusive
440 class lazyset(object):
440 class lazyset(object):
441 def __init__(self, lazyvalues):
441 def __init__(self, lazyvalues):
442 self.addedvalues = set()
442 self.addedvalues = set()
443 self.lazyvalues = lazyvalues
443 self.lazyvalues = lazyvalues
444
444
445 def __contains__(self, value):
445 def __contains__(self, value):
446 return value in self.addedvalues or value in self.lazyvalues
446 return value in self.addedvalues or value in self.lazyvalues
447
447
448 def __iter__(self):
448 def __iter__(self):
449 added = self.addedvalues
449 added = self.addedvalues
450 for r in added:
450 for r in added:
451 yield r
451 yield r
452 for r in self.lazyvalues:
452 for r in self.lazyvalues:
453 if not r in added:
453 if not r in added:
454 yield r
454 yield r
455
455
456 def add(self, value):
456 def add(self, value):
457 self.addedvalues.add(value)
457 self.addedvalues.add(value)
458
458
459 def update(self, values):
459 def update(self, values):
460 self.addedvalues.update(values)
460 self.addedvalues.update(values)
461
461
462 has = lazyset(self.ancestors(common))
462 has = lazyset(self.ancestors(common))
463 has.add(nullrev)
463 has.add(nullrev)
464 has.update(common)
464 has.update(common)
465
465
466 # take all ancestors from heads that aren't in has
466 # take all ancestors from heads that aren't in has
467 missing = set()
467 missing = set()
468 visit = util.deque(r for r in heads if r not in has)
468 visit = util.deque(r for r in heads if r not in has)
469 while visit:
469 while visit:
470 r = visit.popleft()
470 r = visit.popleft()
471 if r in missing:
471 if r in missing:
472 continue
472 continue
473 else:
473 else:
474 missing.add(r)
474 missing.add(r)
475 for p in self.parentrevs(r):
475 for p in self.parentrevs(r):
476 if p not in has:
476 if p not in has:
477 visit.append(p)
477 visit.append(p)
478 missing = list(missing)
478 missing = list(missing)
479 missing.sort()
479 missing.sort()
480 return has, [self.node(r) for r in missing]
480 return has, [self.node(r) for r in missing]
481
481
482 def findmissingrevs(self, common=None, heads=None):
482 def findmissingrevs(self, common=None, heads=None):
483 """Return the revision numbers of the ancestors of heads that
483 """Return the revision numbers of the ancestors of heads that
484 are not ancestors of common.
484 are not ancestors of common.
485
485
486 More specifically, return a list of revision numbers corresponding to
486 More specifically, return a list of revision numbers corresponding to
487 nodes N such that every N satisfies the following constraints:
487 nodes N such that every N satisfies the following constraints:
488
488
489 1. N is an ancestor of some node in 'heads'
489 1. N is an ancestor of some node in 'heads'
490 2. N is not an ancestor of any node in 'common'
490 2. N is not an ancestor of any node in 'common'
491
491
492 The list is sorted by revision number, meaning it is
492 The list is sorted by revision number, meaning it is
493 topologically sorted.
493 topologically sorted.
494
494
495 'heads' and 'common' are both lists of revision numbers. If heads is
495 'heads' and 'common' are both lists of revision numbers. If heads is
496 not supplied, uses all of the revlog's heads. If common is not
496 not supplied, uses all of the revlog's heads. If common is not
497 supplied, uses nullid."""
497 supplied, uses nullid."""
498 if common is None:
498 if common is None:
499 common = [nullrev]
499 common = [nullrev]
500 if heads is None:
500 if heads is None:
501 heads = self.headrevs()
501 heads = self.headrevs()
502
502
503 return ancestor.missingancestors(heads, common, self.parentrevs)
503 return ancestor.missingancestors(heads, common, self.parentrevs)
504
504
505 def findmissing(self, common=None, heads=None):
505 def findmissing(self, common=None, heads=None):
506 """Return the ancestors of heads that are not ancestors of common.
506 """Return the ancestors of heads that are not ancestors of common.
507
507
508 More specifically, return a list of nodes N such that every N
508 More specifically, return a list of nodes N such that every N
509 satisfies the following constraints:
509 satisfies the following constraints:
510
510
511 1. N is an ancestor of some node in 'heads'
511 1. N is an ancestor of some node in 'heads'
512 2. N is not an ancestor of any node in 'common'
512 2. N is not an ancestor of any node in 'common'
513
513
514 The list is sorted by revision number, meaning it is
514 The list is sorted by revision number, meaning it is
515 topologically sorted.
515 topologically sorted.
516
516
517 'heads' and 'common' are both lists of node IDs. If heads is
517 'heads' and 'common' are both lists of node IDs. If heads is
518 not supplied, uses all of the revlog's heads. If common is not
518 not supplied, uses all of the revlog's heads. If common is not
519 supplied, uses nullid."""
519 supplied, uses nullid."""
520 if common is None:
520 if common is None:
521 common = [nullid]
521 common = [nullid]
522 if heads is None:
522 if heads is None:
523 heads = self.heads()
523 heads = self.heads()
524
524
525 common = [self.rev(n) for n in common]
525 common = [self.rev(n) for n in common]
526 heads = [self.rev(n) for n in heads]
526 heads = [self.rev(n) for n in heads]
527
527
528 return [self.node(r) for r in
528 return [self.node(r) for r in
529 ancestor.missingancestors(heads, common, self.parentrevs)]
529 ancestor.missingancestors(heads, common, self.parentrevs)]
530
530
531 def nodesbetween(self, roots=None, heads=None):
531 def nodesbetween(self, roots=None, heads=None):
532 """Return a topological path from 'roots' to 'heads'.
532 """Return a topological path from 'roots' to 'heads'.
533
533
534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
535 topologically sorted list of all nodes N that satisfy both of
535 topologically sorted list of all nodes N that satisfy both of
536 these constraints:
536 these constraints:
537
537
538 1. N is a descendant of some node in 'roots'
538 1. N is a descendant of some node in 'roots'
539 2. N is an ancestor of some node in 'heads'
539 2. N is an ancestor of some node in 'heads'
540
540
541 Every node is considered to be both a descendant and an ancestor
541 Every node is considered to be both a descendant and an ancestor
542 of itself, so every reachable node in 'roots' and 'heads' will be
542 of itself, so every reachable node in 'roots' and 'heads' will be
543 included in 'nodes'.
543 included in 'nodes'.
544
544
545 'outroots' is the list of reachable nodes in 'roots', i.e., the
545 'outroots' is the list of reachable nodes in 'roots', i.e., the
546 subset of 'roots' that is returned in 'nodes'. Likewise,
546 subset of 'roots' that is returned in 'nodes'. Likewise,
547 'outheads' is the subset of 'heads' that is also in 'nodes'.
547 'outheads' is the subset of 'heads' that is also in 'nodes'.
548
548
549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
550 unspecified, uses nullid as the only root. If 'heads' is
550 unspecified, uses nullid as the only root. If 'heads' is
551 unspecified, uses list of all of the revlog's heads."""
551 unspecified, uses list of all of the revlog's heads."""
552 nonodes = ([], [], [])
552 nonodes = ([], [], [])
553 if roots is not None:
553 if roots is not None:
554 roots = list(roots)
554 roots = list(roots)
555 if not roots:
555 if not roots:
556 return nonodes
556 return nonodes
557 lowestrev = min([self.rev(n) for n in roots])
557 lowestrev = min([self.rev(n) for n in roots])
558 else:
558 else:
559 roots = [nullid] # Everybody's a descendant of nullid
559 roots = [nullid] # Everybody's a descendant of nullid
560 lowestrev = nullrev
560 lowestrev = nullrev
561 if (lowestrev == nullrev) and (heads is None):
561 if (lowestrev == nullrev) and (heads is None):
562 # We want _all_ the nodes!
562 # We want _all_ the nodes!
563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
564 if heads is None:
564 if heads is None:
565 # All nodes are ancestors, so the latest ancestor is the last
565 # All nodes are ancestors, so the latest ancestor is the last
566 # node.
566 # node.
567 highestrev = len(self) - 1
567 highestrev = len(self) - 1
568 # Set ancestors to None to signal that every node is an ancestor.
568 # Set ancestors to None to signal that every node is an ancestor.
569 ancestors = None
569 ancestors = None
570 # Set heads to an empty dictionary for later discovery of heads
570 # Set heads to an empty dictionary for later discovery of heads
571 heads = {}
571 heads = {}
572 else:
572 else:
573 heads = list(heads)
573 heads = list(heads)
574 if not heads:
574 if not heads:
575 return nonodes
575 return nonodes
576 ancestors = set()
576 ancestors = set()
577 # Turn heads into a dictionary so we can remove 'fake' heads.
577 # Turn heads into a dictionary so we can remove 'fake' heads.
578 # Also, later we will be using it to filter out the heads we can't
578 # Also, later we will be using it to filter out the heads we can't
579 # find from roots.
579 # find from roots.
580 heads = dict.fromkeys(heads, False)
580 heads = dict.fromkeys(heads, False)
581 # Start at the top and keep marking parents until we're done.
581 # Start at the top and keep marking parents until we're done.
582 nodestotag = set(heads)
582 nodestotag = set(heads)
583 # Remember where the top was so we can use it as a limit later.
583 # Remember where the top was so we can use it as a limit later.
584 highestrev = max([self.rev(n) for n in nodestotag])
584 highestrev = max([self.rev(n) for n in nodestotag])
585 while nodestotag:
585 while nodestotag:
586 # grab a node to tag
586 # grab a node to tag
587 n = nodestotag.pop()
587 n = nodestotag.pop()
588 # Never tag nullid
588 # Never tag nullid
589 if n == nullid:
589 if n == nullid:
590 continue
590 continue
591 # A node's revision number represents its place in a
591 # A node's revision number represents its place in a
592 # topologically sorted list of nodes.
592 # topologically sorted list of nodes.
593 r = self.rev(n)
593 r = self.rev(n)
594 if r >= lowestrev:
594 if r >= lowestrev:
595 if n not in ancestors:
595 if n not in ancestors:
596 # If we are possibly a descendant of one of the roots
596 # If we are possibly a descendant of one of the roots
597 # and we haven't already been marked as an ancestor
597 # and we haven't already been marked as an ancestor
598 ancestors.add(n) # Mark as ancestor
598 ancestors.add(n) # Mark as ancestor
599 # Add non-nullid parents to list of nodes to tag.
599 # Add non-nullid parents to list of nodes to tag.
600 nodestotag.update([p for p in self.parents(n) if
600 nodestotag.update([p for p in self.parents(n) if
601 p != nullid])
601 p != nullid])
602 elif n in heads: # We've seen it before, is it a fake head?
602 elif n in heads: # We've seen it before, is it a fake head?
603 # So it is, real heads should not be the ancestors of
603 # So it is, real heads should not be the ancestors of
604 # any other heads.
604 # any other heads.
605 heads.pop(n)
605 heads.pop(n)
606 if not ancestors:
606 if not ancestors:
607 return nonodes
607 return nonodes
608 # Now that we have our set of ancestors, we want to remove any
608 # Now that we have our set of ancestors, we want to remove any
609 # roots that are not ancestors.
609 # roots that are not ancestors.
610
610
611 # If one of the roots was nullid, everything is included anyway.
611 # If one of the roots was nullid, everything is included anyway.
612 if lowestrev > nullrev:
612 if lowestrev > nullrev:
613 # But, since we weren't, let's recompute the lowest rev to not
613 # But, since we weren't, let's recompute the lowest rev to not
614 # include roots that aren't ancestors.
614 # include roots that aren't ancestors.
615
615
616 # Filter out roots that aren't ancestors of heads
616 # Filter out roots that aren't ancestors of heads
617 roots = [n for n in roots if n in ancestors]
617 roots = [n for n in roots if n in ancestors]
618 # Recompute the lowest revision
618 # Recompute the lowest revision
619 if roots:
619 if roots:
620 lowestrev = min([self.rev(n) for n in roots])
620 lowestrev = min([self.rev(n) for n in roots])
621 else:
621 else:
622 # No more roots? Return empty list
622 # No more roots? Return empty list
623 return nonodes
623 return nonodes
624 else:
624 else:
625 # We are descending from nullid, and don't need to care about
625 # We are descending from nullid, and don't need to care about
626 # any other roots.
626 # any other roots.
627 lowestrev = nullrev
627 lowestrev = nullrev
628 roots = [nullid]
628 roots = [nullid]
629 # Transform our roots list into a set.
629 # Transform our roots list into a set.
630 descendants = set(roots)
630 descendants = set(roots)
631 # Also, keep the original roots so we can filter out roots that aren't
631 # Also, keep the original roots so we can filter out roots that aren't
632 # 'real' roots (i.e. are descended from other roots).
632 # 'real' roots (i.e. are descended from other roots).
633 roots = descendants.copy()
633 roots = descendants.copy()
634 # Our topologically sorted list of output nodes.
634 # Our topologically sorted list of output nodes.
635 orderedout = []
635 orderedout = []
636 # Don't start at nullid since we don't want nullid in our output list,
636 # Don't start at nullid since we don't want nullid in our output list,
637 # and if nullid shows up in descendants, empty parents will look like
637 # and if nullid shows up in descendants, empty parents will look like
638 # they're descendants.
638 # they're descendants.
639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
640 n = self.node(r)
640 n = self.node(r)
641 isdescendant = False
641 isdescendant = False
642 if lowestrev == nullrev: # Everybody is a descendant of nullid
642 if lowestrev == nullrev: # Everybody is a descendant of nullid
643 isdescendant = True
643 isdescendant = True
644 elif n in descendants:
644 elif n in descendants:
645 # n is already a descendant
645 # n is already a descendant
646 isdescendant = True
646 isdescendant = True
647 # This check only needs to be done here because all the roots
647 # This check only needs to be done here because all the roots
648 # will start being marked is descendants before the loop.
648 # will start being marked is descendants before the loop.
649 if n in roots:
649 if n in roots:
650 # If n was a root, check if it's a 'real' root.
650 # If n was a root, check if it's a 'real' root.
651 p = tuple(self.parents(n))
651 p = tuple(self.parents(n))
652 # If any of its parents are descendants, it's not a root.
652 # If any of its parents are descendants, it's not a root.
653 if (p[0] in descendants) or (p[1] in descendants):
653 if (p[0] in descendants) or (p[1] in descendants):
654 roots.remove(n)
654 roots.remove(n)
655 else:
655 else:
656 p = tuple(self.parents(n))
656 p = tuple(self.parents(n))
657 # A node is a descendant if either of its parents are
657 # A node is a descendant if either of its parents are
658 # descendants. (We seeded the dependents list with the roots
658 # descendants. (We seeded the dependents list with the roots
659 # up there, remember?)
659 # up there, remember?)
660 if (p[0] in descendants) or (p[1] in descendants):
660 if (p[0] in descendants) or (p[1] in descendants):
661 descendants.add(n)
661 descendants.add(n)
662 isdescendant = True
662 isdescendant = True
663 if isdescendant and ((ancestors is None) or (n in ancestors)):
663 if isdescendant and ((ancestors is None) or (n in ancestors)):
664 # Only include nodes that are both descendants and ancestors.
664 # Only include nodes that are both descendants and ancestors.
665 orderedout.append(n)
665 orderedout.append(n)
666 if (ancestors is not None) and (n in heads):
666 if (ancestors is not None) and (n in heads):
667 # We're trying to figure out which heads are reachable
667 # We're trying to figure out which heads are reachable
668 # from roots.
668 # from roots.
669 # Mark this head as having been reached
669 # Mark this head as having been reached
670 heads[n] = True
670 heads[n] = True
671 elif ancestors is None:
671 elif ancestors is None:
672 # Otherwise, we're trying to discover the heads.
672 # Otherwise, we're trying to discover the heads.
673 # Assume this is a head because if it isn't, the next step
673 # Assume this is a head because if it isn't, the next step
674 # will eventually remove it.
674 # will eventually remove it.
675 heads[n] = True
675 heads[n] = True
676 # But, obviously its parents aren't.
676 # But, obviously its parents aren't.
677 for p in self.parents(n):
677 for p in self.parents(n):
678 heads.pop(p, None)
678 heads.pop(p, None)
679 heads = [n for n, flag in heads.iteritems() if flag]
679 heads = [n for n, flag in heads.iteritems() if flag]
680 roots = list(roots)
680 roots = list(roots)
681 assert orderedout
681 assert orderedout
682 assert roots
682 assert roots
683 assert heads
683 assert heads
684 return (orderedout, roots, heads)
684 return (orderedout, roots, heads)
685
685
686 def headrevs(self):
686 def headrevs(self):
687 try:
687 try:
688 return self.index.headrevs()
688 return self.index.headrevs()
689 except AttributeError:
689 except AttributeError:
690 return self._headrevs()
690 return self._headrevs()
691
691
692 def _headrevs(self):
692 def _headrevs(self):
693 count = len(self)
693 count = len(self)
694 if not count:
694 if not count:
695 return [nullrev]
695 return [nullrev]
696 # we won't iter over filtered rev so nobody is a head at start
696 # we won't iter over filtered rev so nobody is a head at start
697 ishead = [0] * (count + 1)
697 ishead = [0] * (count + 1)
698 index = self.index
698 index = self.index
699 for r in self:
699 for r in self:
700 ishead[r] = 1 # I may be an head
700 ishead[r] = 1 # I may be an head
701 e = index[r]
701 e = index[r]
702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
703 return [r for r, val in enumerate(ishead) if val]
703 return [r for r, val in enumerate(ishead) if val]
704
704
705 def heads(self, start=None, stop=None):
705 def heads(self, start=None, stop=None):
706 """return the list of all nodes that have no children
706 """return the list of all nodes that have no children
707
707
708 if start is specified, only heads that are descendants of
708 if start is specified, only heads that are descendants of
709 start will be returned
709 start will be returned
710 if stop is specified, it will consider all the revs from stop
710 if stop is specified, it will consider all the revs from stop
711 as if they had no children
711 as if they had no children
712 """
712 """
713 if start is None and stop is None:
713 if start is None and stop is None:
714 if not len(self):
714 if not len(self):
715 return [nullid]
715 return [nullid]
716 return [self.node(r) for r in self.headrevs()]
716 return [self.node(r) for r in self.headrevs()]
717
717
718 if start is None:
718 if start is None:
719 start = nullid
719 start = nullid
720 if stop is None:
720 if stop is None:
721 stop = []
721 stop = []
722 stoprevs = set([self.rev(n) for n in stop])
722 stoprevs = set([self.rev(n) for n in stop])
723 startrev = self.rev(start)
723 startrev = self.rev(start)
724 reachable = set((startrev,))
724 reachable = set((startrev,))
725 heads = set((startrev,))
725 heads = set((startrev,))
726
726
727 parentrevs = self.parentrevs
727 parentrevs = self.parentrevs
728 for r in self.revs(start=startrev + 1):
728 for r in self.revs(start=startrev + 1):
729 for p in parentrevs(r):
729 for p in parentrevs(r):
730 if p in reachable:
730 if p in reachable:
731 if r not in stoprevs:
731 if r not in stoprevs:
732 reachable.add(r)
732 reachable.add(r)
733 heads.add(r)
733 heads.add(r)
734 if p in heads and p not in stoprevs:
734 if p in heads and p not in stoprevs:
735 heads.remove(p)
735 heads.remove(p)
736
736
737 return [self.node(r) for r in heads]
737 return [self.node(r) for r in heads]
738
738
739 def children(self, node):
739 def children(self, node):
740 """find the children of a given node"""
740 """find the children of a given node"""
741 c = []
741 c = []
742 p = self.rev(node)
742 p = self.rev(node)
743 for r in self.revs(start=p + 1):
743 for r in self.revs(start=p + 1):
744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
745 if prevs:
745 if prevs:
746 for pr in prevs:
746 for pr in prevs:
747 if pr == p:
747 if pr == p:
748 c.append(self.node(r))
748 c.append(self.node(r))
749 elif p == nullrev:
749 elif p == nullrev:
750 c.append(self.node(r))
750 c.append(self.node(r))
751 return c
751 return c
752
752
753 def descendant(self, start, end):
753 def descendant(self, start, end):
754 if start == nullrev:
754 if start == nullrev:
755 return True
755 return True
756 for i in self.descendants([start]):
756 for i in self.descendants([start]):
757 if i == end:
757 if i == end:
758 return True
758 return True
759 elif i > end:
759 elif i > end:
760 break
760 break
761 return False
761 return False
762
762
763 def commonancestorsheads(self, a, b):
763 def commonancestorsheads(self, a, b):
764 """calculate all the heads of the common ancestors of nodes a and b"""
764 """calculate all the heads of the common ancestors of nodes a and b"""
765 a, b = self.rev(a), self.rev(b)
765 a, b = self.rev(a), self.rev(b)
766 try:
766 try:
767 ancs = self.index.commonancestorsheads(a, b)
767 ancs = self.index.commonancestorsheads(a, b)
768 except (AttributeError, OverflowError): # C implementation failed
768 except (AttributeError, OverflowError): # C implementation failed
769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
770 return map(self.node, ancs)
770 return map(self.node, ancs)
771
771
772 def isancestor(self, a, b):
772 def isancestor(self, a, b):
773 """return True if node a is an ancestor of node b
773 """return True if node a is an ancestor of node b
774
774
775 The implementation of this is trivial but the use of
775 The implementation of this is trivial but the use of
776 commonancestorsheads is not."""
776 commonancestorsheads is not."""
777 return a in self.commonancestorsheads(a, b)
777 return a in self.commonancestorsheads(a, b)
778
778
779 def ancestor(self, a, b):
779 def ancestor(self, a, b):
780 """calculate the "best" common ancestor of nodes a and b"""
780 """calculate the "best" common ancestor of nodes a and b"""
781
781
782 a, b = self.rev(a), self.rev(b)
782 a, b = self.rev(a), self.rev(b)
783 try:
783 try:
784 ancs = self.index.ancestors(a, b)
784 ancs = self.index.ancestors(a, b)
785 except (AttributeError, OverflowError):
785 except (AttributeError, OverflowError):
786 ancs = ancestor.ancestors(self.parentrevs, a, b)
786 ancs = ancestor.ancestors(self.parentrevs, a, b)
787 if ancs:
787 if ancs:
788 # choose a consistent winner when there's a tie
788 # choose a consistent winner when there's a tie
789 return min(map(self.node, ancs))
789 return min(map(self.node, ancs))
790 return nullid
790 return nullid
791
791
792 def _match(self, id):
792 def _match(self, id):
793 if isinstance(id, int):
793 if isinstance(id, int):
794 # rev
794 # rev
795 return self.node(id)
795 return self.node(id)
796 if len(id) == 20:
796 if len(id) == 20:
797 # possibly a binary node
797 # possibly a binary node
798 # odds of a binary node being all hex in ASCII are 1 in 10**25
798 # odds of a binary node being all hex in ASCII are 1 in 10**25
799 try:
799 try:
800 node = id
800 node = id
801 self.rev(node) # quick search the index
801 self.rev(node) # quick search the index
802 return node
802 return node
803 except LookupError:
803 except LookupError:
804 pass # may be partial hex id
804 pass # may be partial hex id
805 try:
805 try:
806 # str(rev)
806 # str(rev)
807 rev = int(id)
807 rev = int(id)
808 if str(rev) != id:
808 if str(rev) != id:
809 raise ValueError
809 raise ValueError
810 if rev < 0:
810 if rev < 0:
811 rev = len(self) + rev
811 rev = len(self) + rev
812 if rev < 0 or rev >= len(self):
812 if rev < 0 or rev >= len(self):
813 raise ValueError
813 raise ValueError
814 return self.node(rev)
814 return self.node(rev)
815 except (ValueError, OverflowError):
815 except (ValueError, OverflowError):
816 pass
816 pass
817 if len(id) == 40:
817 if len(id) == 40:
818 try:
818 try:
819 # a full hex nodeid?
819 # a full hex nodeid?
820 node = bin(id)
820 node = bin(id)
821 self.rev(node)
821 self.rev(node)
822 return node
822 return node
823 except (TypeError, LookupError):
823 except (TypeError, LookupError):
824 pass
824 pass
825
825
826 def _partialmatch(self, id):
826 def _partialmatch(self, id):
827 try:
827 try:
828 n = self.index.partialmatch(id)
828 n = self.index.partialmatch(id)
829 if n and self.hasnode(n):
829 if n and self.hasnode(n):
830 return n
830 return n
831 return None
831 return None
832 except RevlogError:
832 except RevlogError:
833 # parsers.c radix tree lookup gave multiple matches
833 # parsers.c radix tree lookup gave multiple matches
834 # fall through to slow path that filters hidden revisions
834 # fall through to slow path that filters hidden revisions
835 pass
835 pass
836 except (AttributeError, ValueError):
836 except (AttributeError, ValueError):
837 # we are pure python, or key was too short to search radix tree
837 # we are pure python, or key was too short to search radix tree
838 pass
838 pass
839
839
840 if id in self._pcache:
840 if id in self._pcache:
841 return self._pcache[id]
841 return self._pcache[id]
842
842
843 if len(id) < 40:
843 if len(id) < 40:
844 try:
844 try:
845 # hex(node)[:...]
845 # hex(node)[:...]
846 l = len(id) // 2 # grab an even number of digits
846 l = len(id) // 2 # grab an even number of digits
847 prefix = bin(id[:l * 2])
847 prefix = bin(id[:l * 2])
848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
849 nl = [n for n in nl if hex(n).startswith(id) and
849 nl = [n for n in nl if hex(n).startswith(id) and
850 self.hasnode(n)]
850 self.hasnode(n)]
851 if len(nl) > 0:
851 if len(nl) > 0:
852 if len(nl) == 1:
852 if len(nl) == 1:
853 self._pcache[id] = nl[0]
853 self._pcache[id] = nl[0]
854 return nl[0]
854 return nl[0]
855 raise LookupError(id, self.indexfile,
855 raise LookupError(id, self.indexfile,
856 _('ambiguous identifier'))
856 _('ambiguous identifier'))
857 return None
857 return None
858 except TypeError:
858 except TypeError:
859 pass
859 pass
860
860
861 def lookup(self, id):
861 def lookup(self, id):
862 """locate a node based on:
862 """locate a node based on:
863 - revision number or str(revision number)
863 - revision number or str(revision number)
864 - nodeid or subset of hex nodeid
864 - nodeid or subset of hex nodeid
865 """
865 """
866 n = self._match(id)
866 n = self._match(id)
867 if n is not None:
867 if n is not None:
868 return n
868 return n
869 n = self._partialmatch(id)
869 n = self._partialmatch(id)
870 if n:
870 if n:
871 return n
871 return n
872
872
873 raise LookupError(id, self.indexfile, _('no match found'))
873 raise LookupError(id, self.indexfile, _('no match found'))
874
874
875 def cmp(self, node, text):
875 def cmp(self, node, text):
876 """compare text with a given file revision
876 """compare text with a given file revision
877
877
878 returns True if text is different than what is stored.
878 returns True if text is different than what is stored.
879 """
879 """
880 p1, p2 = self.parents(node)
880 p1, p2 = self.parents(node)
881 return hash(text, p1, p2) != node
881 return hash(text, p1, p2) != node
882
882
883 def _addchunk(self, offset, data):
883 def _addchunk(self, offset, data):
884 o, d = self._chunkcache
884 o, d = self._chunkcache
885 # try to add to existing cache
885 # try to add to existing cache
886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
887 self._chunkcache = o, d + data
887 self._chunkcache = o, d + data
888 else:
888 else:
889 self._chunkcache = offset, data
889 self._chunkcache = offset, data
890
890
891 def _loadchunk(self, offset, length):
891 def _loadchunk(self, offset, length):
892 if self._inline:
892 if self._inline:
893 df = self.opener(self.indexfile)
893 df = self.opener(self.indexfile)
894 else:
894 else:
895 df = self.opener(self.datafile)
895 df = self.opener(self.datafile)
896
896
897 # Cache data both forward and backward around the requested
897 # Cache data both forward and backward around the requested
898 # data, in a fixed size window. This helps speed up operations
898 # data, in a fixed size window. This helps speed up operations
899 # involving reading the revlog backwards.
899 # involving reading the revlog backwards.
900 cachesize = self._chunkcachesize
900 cachesize = self._chunkcachesize
901 realoffset = offset & ~(cachesize - 1)
901 realoffset = offset & ~(cachesize - 1)
902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
903 - realoffset)
903 - realoffset)
904 df.seek(realoffset)
904 df.seek(realoffset)
905 d = df.read(reallength)
905 d = df.read(reallength)
906 df.close()
906 df.close()
907 self._addchunk(realoffset, d)
907 self._addchunk(realoffset, d)
908 if offset != realoffset or reallength != length:
908 if offset != realoffset or reallength != length:
909 return util.buffer(d, offset - realoffset, length)
909 return util.buffer(d, offset - realoffset, length)
910 return d
910 return d
911
911
912 def _getchunk(self, offset, length):
912 def _getchunk(self, offset, length):
913 o, d = self._chunkcache
913 o, d = self._chunkcache
914 l = len(d)
914 l = len(d)
915
915
916 # is it in the cache?
916 # is it in the cache?
917 cachestart = offset - o
917 cachestart = offset - o
918 cacheend = cachestart + length
918 cacheend = cachestart + length
919 if cachestart >= 0 and cacheend <= l:
919 if cachestart >= 0 and cacheend <= l:
920 if cachestart == 0 and cacheend == l:
920 if cachestart == 0 and cacheend == l:
921 return d # avoid a copy
921 return d # avoid a copy
922 return util.buffer(d, cachestart, cacheend - cachestart)
922 return util.buffer(d, cachestart, cacheend - cachestart)
923
923
924 return self._loadchunk(offset, length)
924 return self._loadchunk(offset, length)
925
925
926 def _chunkraw(self, startrev, endrev):
926 def _chunkraw(self, startrev, endrev):
927 start = self.start(startrev)
927 start = self.start(startrev)
928 end = self.end(endrev)
928 end = self.end(endrev)
929 if self._inline:
929 if self._inline:
930 start += (startrev + 1) * self._io.size
930 start += (startrev + 1) * self._io.size
931 end += (endrev + 1) * self._io.size
931 end += (endrev + 1) * self._io.size
932 length = end - start
932 length = end - start
933 return self._getchunk(start, length)
933 return self._getchunk(start, length)
934
934
935 def _chunk(self, rev):
935 def _chunk(self, rev):
936 return decompress(self._chunkraw(rev, rev))
936 return decompress(self._chunkraw(rev, rev))
937
937
938 def _chunks(self, revs):
938 def _chunks(self, revs):
939 '''faster version of [self._chunk(rev) for rev in revs]
939 '''faster version of [self._chunk(rev) for rev in revs]
940
940
941 Assumes that revs is in ascending order.'''
941 Assumes that revs is in ascending order.'''
942 if not revs:
942 if not revs:
943 return []
943 return []
944 start = self.start
944 start = self.start
945 length = self.length
945 length = self.length
946 inline = self._inline
946 inline = self._inline
947 iosize = self._io.size
947 iosize = self._io.size
948 buffer = util.buffer
948 buffer = util.buffer
949
949
950 l = []
950 l = []
951 ladd = l.append
951 ladd = l.append
952
952
953 # preload the cache
953 # preload the cache
954 try:
954 try:
955 while True:
955 while True:
956 # ensure that the cache doesn't change out from under us
956 # ensure that the cache doesn't change out from under us
957 _cache = self._chunkcache
957 _cache = self._chunkcache
958 self._chunkraw(revs[0], revs[-1])
958 self._chunkraw(revs[0], revs[-1])
959 if _cache == self._chunkcache:
959 if _cache == self._chunkcache:
960 break
960 break
961 offset, data = _cache
961 offset, data = _cache
962 except OverflowError:
962 except OverflowError:
963 # issue4215 - we can't cache a run of chunks greater than
963 # issue4215 - we can't cache a run of chunks greater than
964 # 2G on Windows
964 # 2G on Windows
965 return [self._chunk(rev) for rev in revs]
965 return [self._chunk(rev) for rev in revs]
966
966
967 for rev in revs:
967 for rev in revs:
968 chunkstart = start(rev)
968 chunkstart = start(rev)
969 if inline:
969 if inline:
970 chunkstart += (rev + 1) * iosize
970 chunkstart += (rev + 1) * iosize
971 chunklength = length(rev)
971 chunklength = length(rev)
972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
973
973
974 return l
974 return l
975
975
976 def _chunkclear(self):
976 def _chunkclear(self):
977 self._chunkcache = (0, '')
977 self._chunkcache = (0, '')
978
978
979 def deltaparent(self, rev):
979 def deltaparent(self, rev):
980 """return deltaparent of the given revision"""
980 """return deltaparent of the given revision"""
981 base = self.index[rev][3]
981 base = self.index[rev][3]
982 if base == rev:
982 if base == rev:
983 return nullrev
983 return nullrev
984 elif self._generaldelta:
984 elif self._generaldelta:
985 return base
985 return base
986 else:
986 else:
987 return rev - 1
987 return rev - 1
988
988
989 def revdiff(self, rev1, rev2):
989 def revdiff(self, rev1, rev2):
990 """return or calculate a delta between two revisions"""
990 """return or calculate a delta between two revisions"""
991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
992 return str(self._chunk(rev2))
992 return str(self._chunk(rev2))
993
993
994 return mdiff.textdiff(self.revision(rev1),
994 return mdiff.textdiff(self.revision(rev1),
995 self.revision(rev2))
995 self.revision(rev2))
996
996
997 def revision(self, nodeorrev):
997 def revision(self, nodeorrev):
998 """return an uncompressed revision of a given node or revision
998 """return an uncompressed revision of a given node or revision
999 number.
999 number.
1000 """
1000 """
1001 if isinstance(nodeorrev, int):
1001 if isinstance(nodeorrev, int):
1002 rev = nodeorrev
1002 rev = nodeorrev
1003 node = self.node(rev)
1003 node = self.node(rev)
1004 else:
1004 else:
1005 node = nodeorrev
1005 node = nodeorrev
1006 rev = None
1006 rev = None
1007
1007
1008 _cache = self._cache # grab local copy of cache to avoid thread race
1008 _cache = self._cache # grab local copy of cache to avoid thread race
1009 cachedrev = None
1009 cachedrev = None
1010 if node == nullid:
1010 if node == nullid:
1011 return ""
1011 return ""
1012 if _cache:
1012 if _cache:
1013 if _cache[0] == node:
1013 if _cache[0] == node:
1014 return _cache[2]
1014 return _cache[2]
1015 cachedrev = _cache[1]
1015 cachedrev = _cache[1]
1016
1016
1017 # look up what we need to read
1017 # look up what we need to read
1018 text = None
1018 text = None
1019 if rev is None:
1019 if rev is None:
1020 rev = self.rev(node)
1020 rev = self.rev(node)
1021
1021
1022 # check rev flags
1022 # check rev flags
1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1024 raise RevlogError(_('incompatible revision flag %x') %
1024 raise RevlogError(_('incompatible revision flag %x') %
1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1026
1026
1027 # build delta chain
1027 # build delta chain
1028 chain = []
1028 chain = []
1029 index = self.index # for performance
1029 index = self.index # for performance
1030 generaldelta = self._generaldelta
1030 generaldelta = self._generaldelta
1031 iterrev = rev
1031 iterrev = rev
1032 e = index[iterrev]
1032 e = index[iterrev]
1033 while iterrev != e[3] and iterrev != cachedrev:
1033 while iterrev != e[3] and iterrev != cachedrev:
1034 chain.append(iterrev)
1034 chain.append(iterrev)
1035 if generaldelta:
1035 if generaldelta:
1036 iterrev = e[3]
1036 iterrev = e[3]
1037 else:
1037 else:
1038 iterrev -= 1
1038 iterrev -= 1
1039 e = index[iterrev]
1039 e = index[iterrev]
1040
1040
1041 if iterrev == cachedrev:
1041 if iterrev == cachedrev:
1042 # cache hit
1042 # cache hit
1043 text = _cache[2]
1043 text = _cache[2]
1044 else:
1044 else:
1045 chain.append(iterrev)
1045 chain.append(iterrev)
1046 chain.reverse()
1046 chain.reverse()
1047
1047
1048 # drop cache to save memory
1048 # drop cache to save memory
1049 self._cache = None
1049 self._cache = None
1050
1050
1051 bins = self._chunks(chain)
1051 bins = self._chunks(chain)
1052 if text is None:
1052 if text is None:
1053 text = str(bins[0])
1053 text = str(bins[0])
1054 bins = bins[1:]
1054 bins = bins[1:]
1055
1055
1056 text = mdiff.patches(text, bins)
1056 text = mdiff.patches(text, bins)
1057
1057
1058 text = self._checkhash(text, node, rev)
1058 text = self._checkhash(text, node, rev)
1059
1059
1060 self._cache = (node, rev, text)
1060 self._cache = (node, rev, text)
1061 return text
1061 return text
1062
1062
1063 def hash(self, text, p1, p2):
1063 def hash(self, text, p1, p2):
1064 """Compute a node hash.
1064 """Compute a node hash.
1065
1065
1066 Available as a function so that subclasses can replace the hash
1066 Available as a function so that subclasses can replace the hash
1067 as needed.
1067 as needed.
1068 """
1068 """
1069 return hash(text, p1, p2)
1069 return hash(text, p1, p2)
1070
1070
1071 def _checkhash(self, text, node, rev):
1071 def _checkhash(self, text, node, rev):
1072 p1, p2 = self.parents(node)
1072 p1, p2 = self.parents(node)
1073 self.checkhash(text, p1, p2, node, rev)
1073 self.checkhash(text, p1, p2, node, rev)
1074 return text
1074 return text
1075
1075
1076 def checkhash(self, text, p1, p2, node, rev=None):
1076 def checkhash(self, text, p1, p2, node, rev=None):
1077 if node != self.hash(text, p1, p2):
1077 if node != self.hash(text, p1, p2):
1078 revornode = rev
1078 revornode = rev
1079 if revornode is None:
1079 if revornode is None:
1080 revornode = templatefilters.short(hex(node))
1080 revornode = templatefilters.short(hex(node))
1081 raise RevlogError(_("integrity check failed on %s:%s")
1081 raise RevlogError(_("integrity check failed on %s:%s")
1082 % (self.indexfile, revornode))
1082 % (self.indexfile, revornode))
1083
1083
1084 def checkinlinesize(self, tr, fp=None):
1084 def checkinlinesize(self, tr, fp=None):
1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1086 return
1086 return
1087
1087
1088 trinfo = tr.find(self.indexfile)
1088 trinfo = tr.find(self.indexfile)
1089 if trinfo is None:
1089 if trinfo is None:
1090 raise RevlogError(_("%s not found in the transaction")
1090 raise RevlogError(_("%s not found in the transaction")
1091 % self.indexfile)
1091 % self.indexfile)
1092
1092
1093 trindex = trinfo[2]
1093 trindex = trinfo[2]
1094 dataoff = self.start(trindex)
1094 dataoff = self.start(trindex)
1095
1095
1096 tr.add(self.datafile, dataoff)
1096 tr.add(self.datafile, dataoff)
1097
1097
1098 if fp:
1098 if fp:
1099 fp.flush()
1099 fp.flush()
1100 fp.close()
1100 fp.close()
1101
1101
1102 df = self.opener(self.datafile, 'w')
1102 df = self.opener(self.datafile, 'w')
1103 try:
1103 try:
1104 for r in self:
1104 for r in self:
1105 df.write(self._chunkraw(r, r))
1105 df.write(self._chunkraw(r, r))
1106 finally:
1106 finally:
1107 df.close()
1107 df.close()
1108
1108
1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1110 self.version &= ~(REVLOGNGINLINEDATA)
1110 self.version &= ~(REVLOGNGINLINEDATA)
1111 self._inline = False
1111 self._inline = False
1112 for i in self:
1112 for i in self:
1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1114 fp.write(e)
1114 fp.write(e)
1115
1115
1116 # if we don't call close, the temp file will never replace the
1116 # if we don't call close, the temp file will never replace the
1117 # real index
1117 # real index
1118 fp.close()
1118 fp.close()
1119
1119
1120 tr.replace(self.indexfile, trindex * self._io.size)
1120 tr.replace(self.indexfile, trindex * self._io.size)
1121 self._chunkclear()
1121 self._chunkclear()
1122
1122
1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1124 node=None):
1124 node=None):
1125 """add a revision to the log
1125 """add a revision to the log
1126
1126
1127 text - the revision data to add
1127 text - the revision data to add
1128 transaction - the transaction object used for rollback
1128 transaction - the transaction object used for rollback
1129 link - the linkrev data to add
1129 link - the linkrev data to add
1130 p1, p2 - the parent nodeids of the revision
1130 p1, p2 - the parent nodeids of the revision
1131 cachedelta - an optional precomputed delta
1131 cachedelta - an optional precomputed delta
1132 node - nodeid of revision; typically node is not specified, and it is
1132 node - nodeid of revision; typically node is not specified, and it is
1133 computed by default as hash(text, p1, p2), however subclasses might
1133 computed by default as hash(text, p1, p2), however subclasses might
1134 use different hashing method (and override checkhash() in such case)
1134 use different hashing method (and override checkhash() in such case)
1135 """
1135 """
1136 if link == nullrev:
1136 if link == nullrev:
1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1138 % self.indexfile)
1138 % self.indexfile)
1139 node = node or self.hash(text, p1, p2)
1139 node = node or self.hash(text, p1, p2)
1140 if node in self.nodemap:
1140 if node in self.nodemap:
1141 return node
1141 return node
1142
1142
1143 dfh = None
1143 dfh = None
1144 if not self._inline:
1144 if not self._inline:
1145 dfh = self.opener(self.datafile, "a")
1145 dfh = self.opener(self.datafile, "a")
1146 ifh = self.opener(self.indexfile, "a+")
1146 ifh = self.opener(self.indexfile, "a+")
1147 try:
1147 try:
1148 return self._addrevision(node, text, transaction, link, p1, p2,
1148 return self._addrevision(node, text, transaction, link, p1, p2,
1149 cachedelta, ifh, dfh)
1149 cachedelta, ifh, dfh)
1150 finally:
1150 finally:
1151 if dfh:
1151 if dfh:
1152 dfh.close()
1152 dfh.close()
1153 ifh.close()
1153 ifh.close()
1154
1154
1155 def compress(self, text):
1155 def compress(self, text):
1156 """ generate a possibly-compressed representation of text """
1156 """ generate a possibly-compressed representation of text """
1157 if not text:
1157 if not text:
1158 return ("", text)
1158 return ("", text)
1159 l = len(text)
1159 l = len(text)
1160 bin = None
1160 bin = None
1161 if l < 44:
1161 if l < 44:
1162 pass
1162 pass
1163 elif l > 1000000:
1163 elif l > 1000000:
1164 # zlib makes an internal copy, thus doubling memory usage for
1164 # zlib makes an internal copy, thus doubling memory usage for
1165 # large files, so lets do this in pieces
1165 # large files, so lets do this in pieces
1166 z = zlib.compressobj()
1166 z = zlib.compressobj()
1167 p = []
1167 p = []
1168 pos = 0
1168 pos = 0
1169 while pos < l:
1169 while pos < l:
1170 pos2 = pos + 2**20
1170 pos2 = pos + 2**20
1171 p.append(z.compress(text[pos:pos2]))
1171 p.append(z.compress(text[pos:pos2]))
1172 pos = pos2
1172 pos = pos2
1173 p.append(z.flush())
1173 p.append(z.flush())
1174 if sum(map(len, p)) < l:
1174 if sum(map(len, p)) < l:
1175 bin = "".join(p)
1175 bin = "".join(p)
1176 else:
1176 else:
1177 bin = _compress(text)
1177 bin = _compress(text)
1178 if bin is None or len(bin) > l:
1178 if bin is None or len(bin) > l:
1179 if text[0] == '\0':
1179 if text[0] == '\0':
1180 return ("", text)
1180 return ("", text)
1181 return ('u', text)
1181 return ('u', text)
1182 return ("", bin)
1182 return ("", bin)
1183
1183
1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1185 cachedelta, ifh, dfh):
1185 cachedelta, ifh, dfh):
1186 """internal function to add revisions to the log
1186 """internal function to add revisions to the log
1187
1187
1188 see addrevision for argument descriptions.
1188 see addrevision for argument descriptions.
1189 invariants:
1189 invariants:
1190 - text is optional (can be None); if not set, cachedelta must be set.
1190 - text is optional (can be None); if not set, cachedelta must be set.
1191 if both are set, they must correspond to each other.
1191 if both are set, they must correspond to each other.
1192 """
1192 """
1193 btext = [text]
1193 btext = [text]
1194 def buildtext():
1194 def buildtext():
1195 if btext[0] is not None:
1195 if btext[0] is not None:
1196 return btext[0]
1196 return btext[0]
1197 # flush any pending writes here so we can read it in revision
1197 # flush any pending writes here so we can read it in revision
1198 if dfh:
1198 if dfh:
1199 dfh.flush()
1199 dfh.flush()
1200 ifh.flush()
1200 ifh.flush()
1201 basetext = self.revision(self.node(cachedelta[0]))
1201 basetext = self.revision(self.node(cachedelta[0]))
1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1203 try:
1203 try:
1204 self.checkhash(btext[0], p1, p2, node)
1204 self.checkhash(btext[0], p1, p2, node)
1205 except CensoredNodeError:
1205 except CensoredNodeError:
1206 pass # always import a censor tombstone.
1206 pass # always import a censor tombstone.
1207 return btext[0]
1207 return btext[0]
1208
1208
1209 def builddelta(rev):
1209 def builddelta(rev):
1210 # can we use the cached delta?
1210 # can we use the cached delta?
1211 if cachedelta and cachedelta[0] == rev:
1211 if cachedelta and cachedelta[0] == rev:
1212 delta = cachedelta[1]
1212 delta = cachedelta[1]
1213 else:
1213 else:
1214 t = buildtext()
1214 t = buildtext()
1215 ptext = self.revision(self.node(rev))
1215 ptext = self.revision(self.node(rev))
1216 delta = mdiff.textdiff(ptext, t)
1216 delta = mdiff.textdiff(ptext, t)
1217 data = self.compress(delta)
1217 data = self.compress(delta)
1218 l = len(data[1]) + len(data[0])
1218 l = len(data[1]) + len(data[0])
1219 if basecache[0] == rev:
1219 if basecache[0] == rev:
1220 chainbase = basecache[1]
1220 chainbase = basecache[1]
1221 else:
1221 else:
1222 chainbase = self.chainbase(rev)
1222 chainbase = self.chainbase(rev)
1223 dist = l + offset - self.start(chainbase)
1223 dist = l + offset - self.start(chainbase)
1224 if self._generaldelta:
1224 if self._generaldelta:
1225 base = rev
1225 base = rev
1226 else:
1226 else:
1227 base = chainbase
1227 base = chainbase
1228 chainlen, compresseddeltalen = self._chaininfo(rev)
1228 chainlen, compresseddeltalen = self._chaininfo(rev)
1229 chainlen += 1
1229 chainlen += 1
1230 compresseddeltalen += l
1230 compresseddeltalen += l
1231 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1231 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1232
1232
1233 curr = len(self)
1233 curr = len(self)
1234 prev = curr - 1
1234 prev = curr - 1
1235 base = chainbase = curr
1235 base = chainbase = curr
1236 chainlen = None
1236 chainlen = None
1237 offset = self.end(prev)
1237 offset = self.end(prev)
1238 flags = 0
1238 flags = 0
1239 d = None
1239 d = None
1240 if self._basecache is None:
1240 if self._basecache is None:
1241 self._basecache = (prev, self.chainbase(prev))
1241 self._basecache = (prev, self.chainbase(prev))
1242 basecache = self._basecache
1242 basecache = self._basecache
1243 p1r, p2r = self.rev(p1), self.rev(p2)
1243 p1r, p2r = self.rev(p1), self.rev(p2)
1244
1244
1245 # should we try to build a delta?
1245 # should we try to build a delta?
1246 if prev != nullrev:
1246 if prev != nullrev:
1247 if self._generaldelta:
1247 if self._generaldelta:
1248 if p1r >= basecache[1]:
1248 if p1r >= basecache[1]:
1249 d = builddelta(p1r)
1249 d = builddelta(p1r)
1250 elif p2r >= basecache[1]:
1250 elif p2r >= basecache[1]:
1251 d = builddelta(p2r)
1251 d = builddelta(p2r)
1252 else:
1252 else:
1253 d = builddelta(prev)
1253 d = builddelta(prev)
1254 else:
1254 else:
1255 d = builddelta(prev)
1255 d = builddelta(prev)
1256 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1256 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1257
1257
1258 # full versions are inserted when the needed deltas
1258 # full versions are inserted when the needed deltas
1259 # become comparable to the uncompressed text
1259 # become comparable to the uncompressed text
1260 if text is None:
1260 if text is None:
1261 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1261 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1262 cachedelta[1])
1262 cachedelta[1])
1263 else:
1263 else:
1264 textlen = len(text)
1264 textlen = len(text)
1265
1265
1266 # - 'dist' is the distance from the base revision -- bounding it limits
1266 # - 'dist' is the distance from the base revision -- bounding it limits
1267 # the amount of I/O we need to do.
1267 # the amount of I/O we need to do.
1268 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1268 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 # to apply -- bounding it limits the amount of CPU we consume.
1269 # to apply -- bounding it limits the amount of CPU we consume.
1270 if (d is None or dist > textlen * 2 or l > textlen or
1270 if (d is None or dist > textlen * 4 or l > textlen or
1271 compresseddeltalen > textlen * 2 or
1271 compresseddeltalen > textlen * 2 or
1272 (self._maxchainlen and chainlen > self._maxchainlen)):
1272 (self._maxchainlen and chainlen > self._maxchainlen)):
1273 text = buildtext()
1273 text = buildtext()
1274 data = self.compress(text)
1274 data = self.compress(text)
1275 l = len(data[1]) + len(data[0])
1275 l = len(data[1]) + len(data[0])
1276 base = chainbase = curr
1276 base = chainbase = curr
1277
1277
1278 e = (offset_type(offset, flags), l, textlen,
1278 e = (offset_type(offset, flags), l, textlen,
1279 base, link, p1r, p2r, node)
1279 base, link, p1r, p2r, node)
1280 self.index.insert(-1, e)
1280 self.index.insert(-1, e)
1281 self.nodemap[node] = curr
1281 self.nodemap[node] = curr
1282
1282
1283 entry = self._io.packentry(e, self.node, self.version, curr)
1283 entry = self._io.packentry(e, self.node, self.version, curr)
1284 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1284 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1285
1285
1286 if type(text) == str: # only accept immutable objects
1286 if type(text) == str: # only accept immutable objects
1287 self._cache = (node, curr, text)
1287 self._cache = (node, curr, text)
1288 self._basecache = (curr, chainbase)
1288 self._basecache = (curr, chainbase)
1289 return node
1289 return node
1290
1290
1291 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1291 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1292 curr = len(self) - 1
1292 curr = len(self) - 1
1293 if not self._inline:
1293 if not self._inline:
1294 transaction.add(self.datafile, offset)
1294 transaction.add(self.datafile, offset)
1295 transaction.add(self.indexfile, curr * len(entry))
1295 transaction.add(self.indexfile, curr * len(entry))
1296 if data[0]:
1296 if data[0]:
1297 dfh.write(data[0])
1297 dfh.write(data[0])
1298 dfh.write(data[1])
1298 dfh.write(data[1])
1299 dfh.flush()
1299 dfh.flush()
1300 ifh.write(entry)
1300 ifh.write(entry)
1301 else:
1301 else:
1302 offset += curr * self._io.size
1302 offset += curr * self._io.size
1303 transaction.add(self.indexfile, offset, curr)
1303 transaction.add(self.indexfile, offset, curr)
1304 ifh.write(entry)
1304 ifh.write(entry)
1305 ifh.write(data[0])
1305 ifh.write(data[0])
1306 ifh.write(data[1])
1306 ifh.write(data[1])
1307 self.checkinlinesize(transaction, ifh)
1307 self.checkinlinesize(transaction, ifh)
1308
1308
1309 def addgroup(self, bundle, linkmapper, transaction):
1309 def addgroup(self, bundle, linkmapper, transaction):
1310 """
1310 """
1311 add a delta group
1311 add a delta group
1312
1312
1313 given a set of deltas, add them to the revision log. the
1313 given a set of deltas, add them to the revision log. the
1314 first delta is against its parent, which should be in our
1314 first delta is against its parent, which should be in our
1315 log, the rest are against the previous delta.
1315 log, the rest are against the previous delta.
1316 """
1316 """
1317
1317
1318 # track the base of the current delta log
1318 # track the base of the current delta log
1319 content = []
1319 content = []
1320 node = None
1320 node = None
1321
1321
1322 r = len(self)
1322 r = len(self)
1323 end = 0
1323 end = 0
1324 if r:
1324 if r:
1325 end = self.end(r - 1)
1325 end = self.end(r - 1)
1326 ifh = self.opener(self.indexfile, "a+")
1326 ifh = self.opener(self.indexfile, "a+")
1327 isize = r * self._io.size
1327 isize = r * self._io.size
1328 if self._inline:
1328 if self._inline:
1329 transaction.add(self.indexfile, end + isize, r)
1329 transaction.add(self.indexfile, end + isize, r)
1330 dfh = None
1330 dfh = None
1331 else:
1331 else:
1332 transaction.add(self.indexfile, isize, r)
1332 transaction.add(self.indexfile, isize, r)
1333 transaction.add(self.datafile, end)
1333 transaction.add(self.datafile, end)
1334 dfh = self.opener(self.datafile, "a")
1334 dfh = self.opener(self.datafile, "a")
1335
1335
1336 try:
1336 try:
1337 # loop through our set of deltas
1337 # loop through our set of deltas
1338 chain = None
1338 chain = None
1339 while True:
1339 while True:
1340 chunkdata = bundle.deltachunk(chain)
1340 chunkdata = bundle.deltachunk(chain)
1341 if not chunkdata:
1341 if not chunkdata:
1342 break
1342 break
1343 node = chunkdata['node']
1343 node = chunkdata['node']
1344 p1 = chunkdata['p1']
1344 p1 = chunkdata['p1']
1345 p2 = chunkdata['p2']
1345 p2 = chunkdata['p2']
1346 cs = chunkdata['cs']
1346 cs = chunkdata['cs']
1347 deltabase = chunkdata['deltabase']
1347 deltabase = chunkdata['deltabase']
1348 delta = chunkdata['delta']
1348 delta = chunkdata['delta']
1349
1349
1350 content.append(node)
1350 content.append(node)
1351
1351
1352 link = linkmapper(cs)
1352 link = linkmapper(cs)
1353 if node in self.nodemap:
1353 if node in self.nodemap:
1354 # this can happen if two branches make the same change
1354 # this can happen if two branches make the same change
1355 chain = node
1355 chain = node
1356 continue
1356 continue
1357
1357
1358 for p in (p1, p2):
1358 for p in (p1, p2):
1359 if p not in self.nodemap:
1359 if p not in self.nodemap:
1360 raise LookupError(p, self.indexfile,
1360 raise LookupError(p, self.indexfile,
1361 _('unknown parent'))
1361 _('unknown parent'))
1362
1362
1363 if deltabase not in self.nodemap:
1363 if deltabase not in self.nodemap:
1364 raise LookupError(deltabase, self.indexfile,
1364 raise LookupError(deltabase, self.indexfile,
1365 _('unknown delta base'))
1365 _('unknown delta base'))
1366
1366
1367 baserev = self.rev(deltabase)
1367 baserev = self.rev(deltabase)
1368 chain = self._addrevision(node, None, transaction, link,
1368 chain = self._addrevision(node, None, transaction, link,
1369 p1, p2, (baserev, delta), ifh, dfh)
1369 p1, p2, (baserev, delta), ifh, dfh)
1370 if not dfh and not self._inline:
1370 if not dfh and not self._inline:
1371 # addrevision switched from inline to conventional
1371 # addrevision switched from inline to conventional
1372 # reopen the index
1372 # reopen the index
1373 ifh.close()
1373 ifh.close()
1374 dfh = self.opener(self.datafile, "a")
1374 dfh = self.opener(self.datafile, "a")
1375 ifh = self.opener(self.indexfile, "a")
1375 ifh = self.opener(self.indexfile, "a")
1376 finally:
1376 finally:
1377 if dfh:
1377 if dfh:
1378 dfh.close()
1378 dfh.close()
1379 ifh.close()
1379 ifh.close()
1380
1380
1381 return content
1381 return content
1382
1382
1383 def getstrippoint(self, minlink):
1383 def getstrippoint(self, minlink):
1384 """find the minimum rev that must be stripped to strip the linkrev
1384 """find the minimum rev that must be stripped to strip the linkrev
1385
1385
1386 Returns a tuple containing the minimum rev and a set of all revs that
1386 Returns a tuple containing the minimum rev and a set of all revs that
1387 have linkrevs that will be broken by this strip.
1387 have linkrevs that will be broken by this strip.
1388 """
1388 """
1389 brokenrevs = set()
1389 brokenrevs = set()
1390 strippoint = len(self)
1390 strippoint = len(self)
1391
1391
1392 heads = {}
1392 heads = {}
1393 futurelargelinkrevs = set()
1393 futurelargelinkrevs = set()
1394 for head in self.headrevs():
1394 for head in self.headrevs():
1395 headlinkrev = self.linkrev(head)
1395 headlinkrev = self.linkrev(head)
1396 heads[head] = headlinkrev
1396 heads[head] = headlinkrev
1397 if headlinkrev >= minlink:
1397 if headlinkrev >= minlink:
1398 futurelargelinkrevs.add(headlinkrev)
1398 futurelargelinkrevs.add(headlinkrev)
1399
1399
1400 # This algorithm involves walking down the rev graph, starting at the
1400 # This algorithm involves walking down the rev graph, starting at the
1401 # heads. Since the revs are topologically sorted according to linkrev,
1401 # heads. Since the revs are topologically sorted according to linkrev,
1402 # once all head linkrevs are below the minlink, we know there are
1402 # once all head linkrevs are below the minlink, we know there are
1403 # no more revs that could have a linkrev greater than minlink.
1403 # no more revs that could have a linkrev greater than minlink.
1404 # So we can stop walking.
1404 # So we can stop walking.
1405 while futurelargelinkrevs:
1405 while futurelargelinkrevs:
1406 strippoint -= 1
1406 strippoint -= 1
1407 linkrev = heads.pop(strippoint)
1407 linkrev = heads.pop(strippoint)
1408
1408
1409 if linkrev < minlink:
1409 if linkrev < minlink:
1410 brokenrevs.add(strippoint)
1410 brokenrevs.add(strippoint)
1411 else:
1411 else:
1412 futurelargelinkrevs.remove(linkrev)
1412 futurelargelinkrevs.remove(linkrev)
1413
1413
1414 for p in self.parentrevs(strippoint):
1414 for p in self.parentrevs(strippoint):
1415 if p != nullrev:
1415 if p != nullrev:
1416 plinkrev = self.linkrev(p)
1416 plinkrev = self.linkrev(p)
1417 heads[p] = plinkrev
1417 heads[p] = plinkrev
1418 if plinkrev >= minlink:
1418 if plinkrev >= minlink:
1419 futurelargelinkrevs.add(plinkrev)
1419 futurelargelinkrevs.add(plinkrev)
1420
1420
1421 return strippoint, brokenrevs
1421 return strippoint, brokenrevs
1422
1422
1423 def strip(self, minlink, transaction):
1423 def strip(self, minlink, transaction):
1424 """truncate the revlog on the first revision with a linkrev >= minlink
1424 """truncate the revlog on the first revision with a linkrev >= minlink
1425
1425
1426 This function is called when we're stripping revision minlink and
1426 This function is called when we're stripping revision minlink and
1427 its descendants from the repository.
1427 its descendants from the repository.
1428
1428
1429 We have to remove all revisions with linkrev >= minlink, because
1429 We have to remove all revisions with linkrev >= minlink, because
1430 the equivalent changelog revisions will be renumbered after the
1430 the equivalent changelog revisions will be renumbered after the
1431 strip.
1431 strip.
1432
1432
1433 So we truncate the revlog on the first of these revisions, and
1433 So we truncate the revlog on the first of these revisions, and
1434 trust that the caller has saved the revisions that shouldn't be
1434 trust that the caller has saved the revisions that shouldn't be
1435 removed and that it'll re-add them after this truncation.
1435 removed and that it'll re-add them after this truncation.
1436 """
1436 """
1437 if len(self) == 0:
1437 if len(self) == 0:
1438 return
1438 return
1439
1439
1440 rev, _ = self.getstrippoint(minlink)
1440 rev, _ = self.getstrippoint(minlink)
1441 if rev == len(self):
1441 if rev == len(self):
1442 return
1442 return
1443
1443
1444 # first truncate the files on disk
1444 # first truncate the files on disk
1445 end = self.start(rev)
1445 end = self.start(rev)
1446 if not self._inline:
1446 if not self._inline:
1447 transaction.add(self.datafile, end)
1447 transaction.add(self.datafile, end)
1448 end = rev * self._io.size
1448 end = rev * self._io.size
1449 else:
1449 else:
1450 end += rev * self._io.size
1450 end += rev * self._io.size
1451
1451
1452 transaction.add(self.indexfile, end)
1452 transaction.add(self.indexfile, end)
1453
1453
1454 # then reset internal state in memory to forget those revisions
1454 # then reset internal state in memory to forget those revisions
1455 self._cache = None
1455 self._cache = None
1456 self._chunkclear()
1456 self._chunkclear()
1457 for x in xrange(rev, len(self)):
1457 for x in xrange(rev, len(self)):
1458 del self.nodemap[self.node(x)]
1458 del self.nodemap[self.node(x)]
1459
1459
1460 del self.index[rev:-1]
1460 del self.index[rev:-1]
1461
1461
1462 def checksize(self):
1462 def checksize(self):
1463 expected = 0
1463 expected = 0
1464 if len(self):
1464 if len(self):
1465 expected = max(0, self.end(len(self) - 1))
1465 expected = max(0, self.end(len(self) - 1))
1466
1466
1467 try:
1467 try:
1468 f = self.opener(self.datafile)
1468 f = self.opener(self.datafile)
1469 f.seek(0, 2)
1469 f.seek(0, 2)
1470 actual = f.tell()
1470 actual = f.tell()
1471 f.close()
1471 f.close()
1472 dd = actual - expected
1472 dd = actual - expected
1473 except IOError, inst:
1473 except IOError, inst:
1474 if inst.errno != errno.ENOENT:
1474 if inst.errno != errno.ENOENT:
1475 raise
1475 raise
1476 dd = 0
1476 dd = 0
1477
1477
1478 try:
1478 try:
1479 f = self.opener(self.indexfile)
1479 f = self.opener(self.indexfile)
1480 f.seek(0, 2)
1480 f.seek(0, 2)
1481 actual = f.tell()
1481 actual = f.tell()
1482 f.close()
1482 f.close()
1483 s = self._io.size
1483 s = self._io.size
1484 i = max(0, actual // s)
1484 i = max(0, actual // s)
1485 di = actual - (i * s)
1485 di = actual - (i * s)
1486 if self._inline:
1486 if self._inline:
1487 databytes = 0
1487 databytes = 0
1488 for r in self:
1488 for r in self:
1489 databytes += max(0, self.length(r))
1489 databytes += max(0, self.length(r))
1490 dd = 0
1490 dd = 0
1491 di = actual - len(self) * s - databytes
1491 di = actual - len(self) * s - databytes
1492 except IOError, inst:
1492 except IOError, inst:
1493 if inst.errno != errno.ENOENT:
1493 if inst.errno != errno.ENOENT:
1494 raise
1494 raise
1495 di = 0
1495 di = 0
1496
1496
1497 return (dd, di)
1497 return (dd, di)
1498
1498
1499 def files(self):
1499 def files(self):
1500 res = [self.indexfile]
1500 res = [self.indexfile]
1501 if not self._inline:
1501 if not self._inline:
1502 res.append(self.datafile)
1502 res.append(self.datafile)
1503 return res
1503 return res
General Comments 0
You need to be logged in to leave comments. Login now