##// END OF EJS Templates
revlog: use an LRU cache for delta chain bases...
Gregory Szorc -
r29830:92ac2baa default
parent child Browse files
Show More
@@ -1,1782 +1,1781
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # Maps rev to chain base rev.
229 # begins at for a revision.
229 self._chainbasecache = util.lrucachedict(100)
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
230 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
231 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
232 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
233 self._chunkcachesize = 65536
235 self._maxchainlen = None
234 self._maxchainlen = None
236 self._aggressivemergedeltas = False
235 self._aggressivemergedeltas = False
237 self.index = []
236 self.index = []
238 # Mapping of partial identifiers to full nodes.
237 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
238 self._pcache = {}
240 # Mapping of revision integer to full node.
239 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
240 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
241 self._nodepos = None
243
242
244 v = REVLOG_DEFAULT_VERSION
243 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
244 opts = getattr(opener, 'options', None)
246 if opts is not None:
245 if opts is not None:
247 if 'revlogv1' in opts:
246 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
247 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
248 v |= REVLOGGENERALDELTA
250 else:
249 else:
251 v = 0
250 v = 0
252 if 'chunkcachesize' in opts:
251 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
252 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
253 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
254 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
255 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
256 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
257 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
258
260 if self._chunkcachesize <= 0:
259 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
260 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
261 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
262 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
263 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
264 'of 2') % self._chunkcachesize)
266
265
267 indexdata = ''
266 indexdata = ''
268 self._initempty = True
267 self._initempty = True
269 try:
268 try:
270 f = self.opener(self.indexfile)
269 f = self.opener(self.indexfile)
271 indexdata = f.read()
270 indexdata = f.read()
272 f.close()
271 f.close()
273 if len(indexdata) > 0:
272 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
273 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
274 self._initempty = False
276 except IOError as inst:
275 except IOError as inst:
277 if inst.errno != errno.ENOENT:
276 if inst.errno != errno.ENOENT:
278 raise
277 raise
279
278
280 self.version = v
279 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
280 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
281 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
282 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
283 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
284 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
285 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
286 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
287 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
288 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
289 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
290 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
291 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
292 % (self.indexfile, fmt))
294
293
295 self._io = revlogio()
294 self._io = revlogio()
296 if self.version == REVLOGV0:
295 if self.version == REVLOGV0:
297 self._io = revlogoldio()
296 self._io = revlogoldio()
298 try:
297 try:
299 d = self._io.parseindex(indexdata, self._inline)
298 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
299 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
300 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
301 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
302 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
303 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
304 if not self._chunkcache:
306 self._chunkclear()
305 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
306 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
307 self._chaininfocache = {}
309
308
310 def tip(self):
309 def tip(self):
311 return self.node(len(self.index) - 2)
310 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
311 def __contains__(self, rev):
313 return 0 <= rev < len(self)
312 return 0 <= rev < len(self)
314 def __len__(self):
313 def __len__(self):
315 return len(self.index) - 1
314 return len(self.index) - 1
316 def __iter__(self):
315 def __iter__(self):
317 return iter(xrange(len(self)))
316 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
317 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
318 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
319 step = 1
321 if stop is not None:
320 if stop is not None:
322 if start > stop:
321 if start > stop:
323 step = -1
322 step = -1
324 stop += step
323 stop += step
325 else:
324 else:
326 stop = len(self)
325 stop = len(self)
327 return xrange(start, stop, step)
326 return xrange(start, stop, step)
328
327
329 @util.propertycache
328 @util.propertycache
330 def nodemap(self):
329 def nodemap(self):
331 self.rev(self.node(0))
330 self.rev(self.node(0))
332 return self._nodecache
331 return self._nodecache
333
332
334 def hasnode(self, node):
333 def hasnode(self, node):
335 try:
334 try:
336 self.rev(node)
335 self.rev(node)
337 return True
336 return True
338 except KeyError:
337 except KeyError:
339 return False
338 return False
340
339
341 def clearcaches(self):
340 def clearcaches(self):
342 self._cache = None
341 self._cache = None
343 self._basecache = None
342 self._chainbasecache.clear()
344 self._chunkcache = (0, '')
343 self._chunkcache = (0, '')
345 self._pcache = {}
344 self._pcache = {}
346
345
347 try:
346 try:
348 self._nodecache.clearcaches()
347 self._nodecache.clearcaches()
349 except AttributeError:
348 except AttributeError:
350 self._nodecache = {nullid: nullrev}
349 self._nodecache = {nullid: nullrev}
351 self._nodepos = None
350 self._nodepos = None
352
351
353 def rev(self, node):
352 def rev(self, node):
354 try:
353 try:
355 return self._nodecache[node]
354 return self._nodecache[node]
356 except TypeError:
355 except TypeError:
357 raise
356 raise
358 except RevlogError:
357 except RevlogError:
359 # parsers.c radix tree lookup failed
358 # parsers.c radix tree lookup failed
360 raise LookupError(node, self.indexfile, _('no node'))
359 raise LookupError(node, self.indexfile, _('no node'))
361 except KeyError:
360 except KeyError:
362 # pure python cache lookup failed
361 # pure python cache lookup failed
363 n = self._nodecache
362 n = self._nodecache
364 i = self.index
363 i = self.index
365 p = self._nodepos
364 p = self._nodepos
366 if p is None:
365 if p is None:
367 p = len(i) - 2
366 p = len(i) - 2
368 for r in xrange(p, -1, -1):
367 for r in xrange(p, -1, -1):
369 v = i[r][7]
368 v = i[r][7]
370 n[v] = r
369 n[v] = r
371 if v == node:
370 if v == node:
372 self._nodepos = r - 1
371 self._nodepos = r - 1
373 return r
372 return r
374 raise LookupError(node, self.indexfile, _('no node'))
373 raise LookupError(node, self.indexfile, _('no node'))
375
374
376 def node(self, rev):
375 def node(self, rev):
377 return self.index[rev][7]
376 return self.index[rev][7]
378 def linkrev(self, rev):
377 def linkrev(self, rev):
379 return self.index[rev][4]
378 return self.index[rev][4]
380 def parents(self, node):
379 def parents(self, node):
381 i = self.index
380 i = self.index
382 d = i[self.rev(node)]
381 d = i[self.rev(node)]
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
382 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 def parentrevs(self, rev):
383 def parentrevs(self, rev):
385 return self.index[rev][5:7]
384 return self.index[rev][5:7]
386 def start(self, rev):
385 def start(self, rev):
387 return int(self.index[rev][0] >> 16)
386 return int(self.index[rev][0] >> 16)
388 def end(self, rev):
387 def end(self, rev):
389 return self.start(rev) + self.length(rev)
388 return self.start(rev) + self.length(rev)
390 def length(self, rev):
389 def length(self, rev):
391 return self.index[rev][1]
390 return self.index[rev][1]
392 def chainbase(self, rev):
391 def chainbase(self, rev):
392 base = self._chainbasecache.get(rev)
393 if base is not None:
394 return base
395
393 index = self.index
396 index = self.index
394 base = index[rev][3]
397 base = index[rev][3]
395 while base != rev:
398 while base != rev:
396 rev = base
399 rev = base
397 base = index[rev][3]
400 base = index[rev][3]
401
402 self._chainbasecache[rev] = base
398 return base
403 return base
399 def chainlen(self, rev):
404 def chainlen(self, rev):
400 return self._chaininfo(rev)[0]
405 return self._chaininfo(rev)[0]
401
406
402 def _chaininfo(self, rev):
407 def _chaininfo(self, rev):
403 chaininfocache = self._chaininfocache
408 chaininfocache = self._chaininfocache
404 if rev in chaininfocache:
409 if rev in chaininfocache:
405 return chaininfocache[rev]
410 return chaininfocache[rev]
406 index = self.index
411 index = self.index
407 generaldelta = self._generaldelta
412 generaldelta = self._generaldelta
408 iterrev = rev
413 iterrev = rev
409 e = index[iterrev]
414 e = index[iterrev]
410 clen = 0
415 clen = 0
411 compresseddeltalen = 0
416 compresseddeltalen = 0
412 while iterrev != e[3]:
417 while iterrev != e[3]:
413 clen += 1
418 clen += 1
414 compresseddeltalen += e[1]
419 compresseddeltalen += e[1]
415 if generaldelta:
420 if generaldelta:
416 iterrev = e[3]
421 iterrev = e[3]
417 else:
422 else:
418 iterrev -= 1
423 iterrev -= 1
419 if iterrev in chaininfocache:
424 if iterrev in chaininfocache:
420 t = chaininfocache[iterrev]
425 t = chaininfocache[iterrev]
421 clen += t[0]
426 clen += t[0]
422 compresseddeltalen += t[1]
427 compresseddeltalen += t[1]
423 break
428 break
424 e = index[iterrev]
429 e = index[iterrev]
425 else:
430 else:
426 # Add text length of base since decompressing that also takes
431 # Add text length of base since decompressing that also takes
427 # work. For cache hits the length is already included.
432 # work. For cache hits the length is already included.
428 compresseddeltalen += e[1]
433 compresseddeltalen += e[1]
429 r = (clen, compresseddeltalen)
434 r = (clen, compresseddeltalen)
430 chaininfocache[rev] = r
435 chaininfocache[rev] = r
431 return r
436 return r
432
437
433 def _deltachain(self, rev, stoprev=None):
438 def _deltachain(self, rev, stoprev=None):
434 """Obtain the delta chain for a revision.
439 """Obtain the delta chain for a revision.
435
440
436 ``stoprev`` specifies a revision to stop at. If not specified, we
441 ``stoprev`` specifies a revision to stop at. If not specified, we
437 stop at the base of the chain.
442 stop at the base of the chain.
438
443
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
444 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 revs in ascending order and ``stopped`` is a bool indicating whether
445 revs in ascending order and ``stopped`` is a bool indicating whether
441 ``stoprev`` was hit.
446 ``stoprev`` was hit.
442 """
447 """
443 chain = []
448 chain = []
444
449
445 # Alias to prevent attribute lookup in tight loop.
450 # Alias to prevent attribute lookup in tight loop.
446 index = self.index
451 index = self.index
447 generaldelta = self._generaldelta
452 generaldelta = self._generaldelta
448
453
449 iterrev = rev
454 iterrev = rev
450 e = index[iterrev]
455 e = index[iterrev]
451 while iterrev != e[3] and iterrev != stoprev:
456 while iterrev != e[3] and iterrev != stoprev:
452 chain.append(iterrev)
457 chain.append(iterrev)
453 if generaldelta:
458 if generaldelta:
454 iterrev = e[3]
459 iterrev = e[3]
455 else:
460 else:
456 iterrev -= 1
461 iterrev -= 1
457 e = index[iterrev]
462 e = index[iterrev]
458
463
459 if iterrev == stoprev:
464 if iterrev == stoprev:
460 stopped = True
465 stopped = True
461 else:
466 else:
462 chain.append(iterrev)
467 chain.append(iterrev)
463 stopped = False
468 stopped = False
464
469
465 chain.reverse()
470 chain.reverse()
466 return chain, stopped
471 return chain, stopped
467
472
468 def flags(self, rev):
473 def flags(self, rev):
469 return self.index[rev][0] & 0xFFFF
474 return self.index[rev][0] & 0xFFFF
470 def rawsize(self, rev):
475 def rawsize(self, rev):
471 """return the length of the uncompressed text for a given revision"""
476 """return the length of the uncompressed text for a given revision"""
472 l = self.index[rev][2]
477 l = self.index[rev][2]
473 if l >= 0:
478 if l >= 0:
474 return l
479 return l
475
480
476 t = self.revision(self.node(rev))
481 t = self.revision(self.node(rev))
477 return len(t)
482 return len(t)
478 size = rawsize
483 size = rawsize
479
484
480 def ancestors(self, revs, stoprev=0, inclusive=False):
485 def ancestors(self, revs, stoprev=0, inclusive=False):
481 """Generate the ancestors of 'revs' in reverse topological order.
486 """Generate the ancestors of 'revs' in reverse topological order.
482 Does not generate revs lower than stoprev.
487 Does not generate revs lower than stoprev.
483
488
484 See the documentation for ancestor.lazyancestors for more details."""
489 See the documentation for ancestor.lazyancestors for more details."""
485
490
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
491 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 inclusive=inclusive)
492 inclusive=inclusive)
488
493
489 def descendants(self, revs):
494 def descendants(self, revs):
490 """Generate the descendants of 'revs' in revision order.
495 """Generate the descendants of 'revs' in revision order.
491
496
492 Yield a sequence of revision numbers starting with a child of
497 Yield a sequence of revision numbers starting with a child of
493 some rev in revs, i.e., each revision is *not* considered a
498 some rev in revs, i.e., each revision is *not* considered a
494 descendant of itself. Results are ordered by revision number (a
499 descendant of itself. Results are ordered by revision number (a
495 topological sort)."""
500 topological sort)."""
496 first = min(revs)
501 first = min(revs)
497 if first == nullrev:
502 if first == nullrev:
498 for i in self:
503 for i in self:
499 yield i
504 yield i
500 return
505 return
501
506
502 seen = set(revs)
507 seen = set(revs)
503 for i in self.revs(start=first + 1):
508 for i in self.revs(start=first + 1):
504 for x in self.parentrevs(i):
509 for x in self.parentrevs(i):
505 if x != nullrev and x in seen:
510 if x != nullrev and x in seen:
506 seen.add(i)
511 seen.add(i)
507 yield i
512 yield i
508 break
513 break
509
514
510 def findcommonmissing(self, common=None, heads=None):
515 def findcommonmissing(self, common=None, heads=None):
511 """Return a tuple of the ancestors of common and the ancestors of heads
516 """Return a tuple of the ancestors of common and the ancestors of heads
512 that are not ancestors of common. In revset terminology, we return the
517 that are not ancestors of common. In revset terminology, we return the
513 tuple:
518 tuple:
514
519
515 ::common, (::heads) - (::common)
520 ::common, (::heads) - (::common)
516
521
517 The list is sorted by revision number, meaning it is
522 The list is sorted by revision number, meaning it is
518 topologically sorted.
523 topologically sorted.
519
524
520 'heads' and 'common' are both lists of node IDs. If heads is
525 'heads' and 'common' are both lists of node IDs. If heads is
521 not supplied, uses all of the revlog's heads. If common is not
526 not supplied, uses all of the revlog's heads. If common is not
522 supplied, uses nullid."""
527 supplied, uses nullid."""
523 if common is None:
528 if common is None:
524 common = [nullid]
529 common = [nullid]
525 if heads is None:
530 if heads is None:
526 heads = self.heads()
531 heads = self.heads()
527
532
528 common = [self.rev(n) for n in common]
533 common = [self.rev(n) for n in common]
529 heads = [self.rev(n) for n in heads]
534 heads = [self.rev(n) for n in heads]
530
535
531 # we want the ancestors, but inclusive
536 # we want the ancestors, but inclusive
532 class lazyset(object):
537 class lazyset(object):
533 def __init__(self, lazyvalues):
538 def __init__(self, lazyvalues):
534 self.addedvalues = set()
539 self.addedvalues = set()
535 self.lazyvalues = lazyvalues
540 self.lazyvalues = lazyvalues
536
541
537 def __contains__(self, value):
542 def __contains__(self, value):
538 return value in self.addedvalues or value in self.lazyvalues
543 return value in self.addedvalues or value in self.lazyvalues
539
544
540 def __iter__(self):
545 def __iter__(self):
541 added = self.addedvalues
546 added = self.addedvalues
542 for r in added:
547 for r in added:
543 yield r
548 yield r
544 for r in self.lazyvalues:
549 for r in self.lazyvalues:
545 if not r in added:
550 if not r in added:
546 yield r
551 yield r
547
552
548 def add(self, value):
553 def add(self, value):
549 self.addedvalues.add(value)
554 self.addedvalues.add(value)
550
555
551 def update(self, values):
556 def update(self, values):
552 self.addedvalues.update(values)
557 self.addedvalues.update(values)
553
558
554 has = lazyset(self.ancestors(common))
559 has = lazyset(self.ancestors(common))
555 has.add(nullrev)
560 has.add(nullrev)
556 has.update(common)
561 has.update(common)
557
562
558 # take all ancestors from heads that aren't in has
563 # take all ancestors from heads that aren't in has
559 missing = set()
564 missing = set()
560 visit = collections.deque(r for r in heads if r not in has)
565 visit = collections.deque(r for r in heads if r not in has)
561 while visit:
566 while visit:
562 r = visit.popleft()
567 r = visit.popleft()
563 if r in missing:
568 if r in missing:
564 continue
569 continue
565 else:
570 else:
566 missing.add(r)
571 missing.add(r)
567 for p in self.parentrevs(r):
572 for p in self.parentrevs(r):
568 if p not in has:
573 if p not in has:
569 visit.append(p)
574 visit.append(p)
570 missing = list(missing)
575 missing = list(missing)
571 missing.sort()
576 missing.sort()
572 return has, [self.node(r) for r in missing]
577 return has, [self.node(r) for r in missing]
573
578
574 def incrementalmissingrevs(self, common=None):
579 def incrementalmissingrevs(self, common=None):
575 """Return an object that can be used to incrementally compute the
580 """Return an object that can be used to incrementally compute the
576 revision numbers of the ancestors of arbitrary sets that are not
581 revision numbers of the ancestors of arbitrary sets that are not
577 ancestors of common. This is an ancestor.incrementalmissingancestors
582 ancestors of common. This is an ancestor.incrementalmissingancestors
578 object.
583 object.
579
584
580 'common' is a list of revision numbers. If common is not supplied, uses
585 'common' is a list of revision numbers. If common is not supplied, uses
581 nullrev.
586 nullrev.
582 """
587 """
583 if common is None:
588 if common is None:
584 common = [nullrev]
589 common = [nullrev]
585
590
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
591 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587
592
588 def findmissingrevs(self, common=None, heads=None):
593 def findmissingrevs(self, common=None, heads=None):
589 """Return the revision numbers of the ancestors of heads that
594 """Return the revision numbers of the ancestors of heads that
590 are not ancestors of common.
595 are not ancestors of common.
591
596
592 More specifically, return a list of revision numbers corresponding to
597 More specifically, return a list of revision numbers corresponding to
593 nodes N such that every N satisfies the following constraints:
598 nodes N such that every N satisfies the following constraints:
594
599
595 1. N is an ancestor of some node in 'heads'
600 1. N is an ancestor of some node in 'heads'
596 2. N is not an ancestor of any node in 'common'
601 2. N is not an ancestor of any node in 'common'
597
602
598 The list is sorted by revision number, meaning it is
603 The list is sorted by revision number, meaning it is
599 topologically sorted.
604 topologically sorted.
600
605
601 'heads' and 'common' are both lists of revision numbers. If heads is
606 'heads' and 'common' are both lists of revision numbers. If heads is
602 not supplied, uses all of the revlog's heads. If common is not
607 not supplied, uses all of the revlog's heads. If common is not
603 supplied, uses nullid."""
608 supplied, uses nullid."""
604 if common is None:
609 if common is None:
605 common = [nullrev]
610 common = [nullrev]
606 if heads is None:
611 if heads is None:
607 heads = self.headrevs()
612 heads = self.headrevs()
608
613
609 inc = self.incrementalmissingrevs(common=common)
614 inc = self.incrementalmissingrevs(common=common)
610 return inc.missingancestors(heads)
615 return inc.missingancestors(heads)
611
616
612 def findmissing(self, common=None, heads=None):
617 def findmissing(self, common=None, heads=None):
613 """Return the ancestors of heads that are not ancestors of common.
618 """Return the ancestors of heads that are not ancestors of common.
614
619
615 More specifically, return a list of nodes N such that every N
620 More specifically, return a list of nodes N such that every N
616 satisfies the following constraints:
621 satisfies the following constraints:
617
622
618 1. N is an ancestor of some node in 'heads'
623 1. N is an ancestor of some node in 'heads'
619 2. N is not an ancestor of any node in 'common'
624 2. N is not an ancestor of any node in 'common'
620
625
621 The list is sorted by revision number, meaning it is
626 The list is sorted by revision number, meaning it is
622 topologically sorted.
627 topologically sorted.
623
628
624 'heads' and 'common' are both lists of node IDs. If heads is
629 'heads' and 'common' are both lists of node IDs. If heads is
625 not supplied, uses all of the revlog's heads. If common is not
630 not supplied, uses all of the revlog's heads. If common is not
626 supplied, uses nullid."""
631 supplied, uses nullid."""
627 if common is None:
632 if common is None:
628 common = [nullid]
633 common = [nullid]
629 if heads is None:
634 if heads is None:
630 heads = self.heads()
635 heads = self.heads()
631
636
632 common = [self.rev(n) for n in common]
637 common = [self.rev(n) for n in common]
633 heads = [self.rev(n) for n in heads]
638 heads = [self.rev(n) for n in heads]
634
639
635 inc = self.incrementalmissingrevs(common=common)
640 inc = self.incrementalmissingrevs(common=common)
636 return [self.node(r) for r in inc.missingancestors(heads)]
641 return [self.node(r) for r in inc.missingancestors(heads)]
637
642
638 def nodesbetween(self, roots=None, heads=None):
643 def nodesbetween(self, roots=None, heads=None):
639 """Return a topological path from 'roots' to 'heads'.
644 """Return a topological path from 'roots' to 'heads'.
640
645
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
646 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 topologically sorted list of all nodes N that satisfy both of
647 topologically sorted list of all nodes N that satisfy both of
643 these constraints:
648 these constraints:
644
649
645 1. N is a descendant of some node in 'roots'
650 1. N is a descendant of some node in 'roots'
646 2. N is an ancestor of some node in 'heads'
651 2. N is an ancestor of some node in 'heads'
647
652
648 Every node is considered to be both a descendant and an ancestor
653 Every node is considered to be both a descendant and an ancestor
649 of itself, so every reachable node in 'roots' and 'heads' will be
654 of itself, so every reachable node in 'roots' and 'heads' will be
650 included in 'nodes'.
655 included in 'nodes'.
651
656
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
657 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 subset of 'roots' that is returned in 'nodes'. Likewise,
658 subset of 'roots' that is returned in 'nodes'. Likewise,
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
659 'outheads' is the subset of 'heads' that is also in 'nodes'.
655
660
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
661 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 unspecified, uses nullid as the only root. If 'heads' is
662 unspecified, uses nullid as the only root. If 'heads' is
658 unspecified, uses list of all of the revlog's heads."""
663 unspecified, uses list of all of the revlog's heads."""
659 nonodes = ([], [], [])
664 nonodes = ([], [], [])
660 if roots is not None:
665 if roots is not None:
661 roots = list(roots)
666 roots = list(roots)
662 if not roots:
667 if not roots:
663 return nonodes
668 return nonodes
664 lowestrev = min([self.rev(n) for n in roots])
669 lowestrev = min([self.rev(n) for n in roots])
665 else:
670 else:
666 roots = [nullid] # Everybody's a descendant of nullid
671 roots = [nullid] # Everybody's a descendant of nullid
667 lowestrev = nullrev
672 lowestrev = nullrev
668 if (lowestrev == nullrev) and (heads is None):
673 if (lowestrev == nullrev) and (heads is None):
669 # We want _all_ the nodes!
674 # We want _all_ the nodes!
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
675 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 if heads is None:
676 if heads is None:
672 # All nodes are ancestors, so the latest ancestor is the last
677 # All nodes are ancestors, so the latest ancestor is the last
673 # node.
678 # node.
674 highestrev = len(self) - 1
679 highestrev = len(self) - 1
675 # Set ancestors to None to signal that every node is an ancestor.
680 # Set ancestors to None to signal that every node is an ancestor.
676 ancestors = None
681 ancestors = None
677 # Set heads to an empty dictionary for later discovery of heads
682 # Set heads to an empty dictionary for later discovery of heads
678 heads = {}
683 heads = {}
679 else:
684 else:
680 heads = list(heads)
685 heads = list(heads)
681 if not heads:
686 if not heads:
682 return nonodes
687 return nonodes
683 ancestors = set()
688 ancestors = set()
684 # Turn heads into a dictionary so we can remove 'fake' heads.
689 # Turn heads into a dictionary so we can remove 'fake' heads.
685 # Also, later we will be using it to filter out the heads we can't
690 # Also, later we will be using it to filter out the heads we can't
686 # find from roots.
691 # find from roots.
687 heads = dict.fromkeys(heads, False)
692 heads = dict.fromkeys(heads, False)
688 # Start at the top and keep marking parents until we're done.
693 # Start at the top and keep marking parents until we're done.
689 nodestotag = set(heads)
694 nodestotag = set(heads)
690 # Remember where the top was so we can use it as a limit later.
695 # Remember where the top was so we can use it as a limit later.
691 highestrev = max([self.rev(n) for n in nodestotag])
696 highestrev = max([self.rev(n) for n in nodestotag])
692 while nodestotag:
697 while nodestotag:
693 # grab a node to tag
698 # grab a node to tag
694 n = nodestotag.pop()
699 n = nodestotag.pop()
695 # Never tag nullid
700 # Never tag nullid
696 if n == nullid:
701 if n == nullid:
697 continue
702 continue
698 # A node's revision number represents its place in a
703 # A node's revision number represents its place in a
699 # topologically sorted list of nodes.
704 # topologically sorted list of nodes.
700 r = self.rev(n)
705 r = self.rev(n)
701 if r >= lowestrev:
706 if r >= lowestrev:
702 if n not in ancestors:
707 if n not in ancestors:
703 # If we are possibly a descendant of one of the roots
708 # If we are possibly a descendant of one of the roots
704 # and we haven't already been marked as an ancestor
709 # and we haven't already been marked as an ancestor
705 ancestors.add(n) # Mark as ancestor
710 ancestors.add(n) # Mark as ancestor
706 # Add non-nullid parents to list of nodes to tag.
711 # Add non-nullid parents to list of nodes to tag.
707 nodestotag.update([p for p in self.parents(n) if
712 nodestotag.update([p for p in self.parents(n) if
708 p != nullid])
713 p != nullid])
709 elif n in heads: # We've seen it before, is it a fake head?
714 elif n in heads: # We've seen it before, is it a fake head?
710 # So it is, real heads should not be the ancestors of
715 # So it is, real heads should not be the ancestors of
711 # any other heads.
716 # any other heads.
712 heads.pop(n)
717 heads.pop(n)
713 if not ancestors:
718 if not ancestors:
714 return nonodes
719 return nonodes
715 # Now that we have our set of ancestors, we want to remove any
720 # Now that we have our set of ancestors, we want to remove any
716 # roots that are not ancestors.
721 # roots that are not ancestors.
717
722
718 # If one of the roots was nullid, everything is included anyway.
723 # If one of the roots was nullid, everything is included anyway.
719 if lowestrev > nullrev:
724 if lowestrev > nullrev:
720 # But, since we weren't, let's recompute the lowest rev to not
725 # But, since we weren't, let's recompute the lowest rev to not
721 # include roots that aren't ancestors.
726 # include roots that aren't ancestors.
722
727
723 # Filter out roots that aren't ancestors of heads
728 # Filter out roots that aren't ancestors of heads
724 roots = [n for n in roots if n in ancestors]
729 roots = [n for n in roots if n in ancestors]
725 # Recompute the lowest revision
730 # Recompute the lowest revision
726 if roots:
731 if roots:
727 lowestrev = min([self.rev(n) for n in roots])
732 lowestrev = min([self.rev(n) for n in roots])
728 else:
733 else:
729 # No more roots? Return empty list
734 # No more roots? Return empty list
730 return nonodes
735 return nonodes
731 else:
736 else:
732 # We are descending from nullid, and don't need to care about
737 # We are descending from nullid, and don't need to care about
733 # any other roots.
738 # any other roots.
734 lowestrev = nullrev
739 lowestrev = nullrev
735 roots = [nullid]
740 roots = [nullid]
736 # Transform our roots list into a set.
741 # Transform our roots list into a set.
737 descendants = set(roots)
742 descendants = set(roots)
738 # Also, keep the original roots so we can filter out roots that aren't
743 # Also, keep the original roots so we can filter out roots that aren't
739 # 'real' roots (i.e. are descended from other roots).
744 # 'real' roots (i.e. are descended from other roots).
740 roots = descendants.copy()
745 roots = descendants.copy()
741 # Our topologically sorted list of output nodes.
746 # Our topologically sorted list of output nodes.
742 orderedout = []
747 orderedout = []
743 # Don't start at nullid since we don't want nullid in our output list,
748 # Don't start at nullid since we don't want nullid in our output list,
744 # and if nullid shows up in descendants, empty parents will look like
749 # and if nullid shows up in descendants, empty parents will look like
745 # they're descendants.
750 # they're descendants.
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
751 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 n = self.node(r)
752 n = self.node(r)
748 isdescendant = False
753 isdescendant = False
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
754 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 isdescendant = True
755 isdescendant = True
751 elif n in descendants:
756 elif n in descendants:
752 # n is already a descendant
757 # n is already a descendant
753 isdescendant = True
758 isdescendant = True
754 # This check only needs to be done here because all the roots
759 # This check only needs to be done here because all the roots
755 # will start being marked is descendants before the loop.
760 # will start being marked is descendants before the loop.
756 if n in roots:
761 if n in roots:
757 # If n was a root, check if it's a 'real' root.
762 # If n was a root, check if it's a 'real' root.
758 p = tuple(self.parents(n))
763 p = tuple(self.parents(n))
759 # If any of its parents are descendants, it's not a root.
764 # If any of its parents are descendants, it's not a root.
760 if (p[0] in descendants) or (p[1] in descendants):
765 if (p[0] in descendants) or (p[1] in descendants):
761 roots.remove(n)
766 roots.remove(n)
762 else:
767 else:
763 p = tuple(self.parents(n))
768 p = tuple(self.parents(n))
764 # A node is a descendant if either of its parents are
769 # A node is a descendant if either of its parents are
765 # descendants. (We seeded the dependents list with the roots
770 # descendants. (We seeded the dependents list with the roots
766 # up there, remember?)
771 # up there, remember?)
767 if (p[0] in descendants) or (p[1] in descendants):
772 if (p[0] in descendants) or (p[1] in descendants):
768 descendants.add(n)
773 descendants.add(n)
769 isdescendant = True
774 isdescendant = True
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
775 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 # Only include nodes that are both descendants and ancestors.
776 # Only include nodes that are both descendants and ancestors.
772 orderedout.append(n)
777 orderedout.append(n)
773 if (ancestors is not None) and (n in heads):
778 if (ancestors is not None) and (n in heads):
774 # We're trying to figure out which heads are reachable
779 # We're trying to figure out which heads are reachable
775 # from roots.
780 # from roots.
776 # Mark this head as having been reached
781 # Mark this head as having been reached
777 heads[n] = True
782 heads[n] = True
778 elif ancestors is None:
783 elif ancestors is None:
779 # Otherwise, we're trying to discover the heads.
784 # Otherwise, we're trying to discover the heads.
780 # Assume this is a head because if it isn't, the next step
785 # Assume this is a head because if it isn't, the next step
781 # will eventually remove it.
786 # will eventually remove it.
782 heads[n] = True
787 heads[n] = True
783 # But, obviously its parents aren't.
788 # But, obviously its parents aren't.
784 for p in self.parents(n):
789 for p in self.parents(n):
785 heads.pop(p, None)
790 heads.pop(p, None)
786 heads = [n for n, flag in heads.iteritems() if flag]
791 heads = [n for n, flag in heads.iteritems() if flag]
787 roots = list(roots)
792 roots = list(roots)
788 assert orderedout
793 assert orderedout
789 assert roots
794 assert roots
790 assert heads
795 assert heads
791 return (orderedout, roots, heads)
796 return (orderedout, roots, heads)
792
797
793 def headrevs(self):
798 def headrevs(self):
794 try:
799 try:
795 return self.index.headrevs()
800 return self.index.headrevs()
796 except AttributeError:
801 except AttributeError:
797 return self._headrevs()
802 return self._headrevs()
798
803
799 def computephases(self, roots):
804 def computephases(self, roots):
800 return self.index.computephasesmapsets(roots)
805 return self.index.computephasesmapsets(roots)
801
806
802 def _headrevs(self):
807 def _headrevs(self):
803 count = len(self)
808 count = len(self)
804 if not count:
809 if not count:
805 return [nullrev]
810 return [nullrev]
806 # we won't iter over filtered rev so nobody is a head at start
811 # we won't iter over filtered rev so nobody is a head at start
807 ishead = [0] * (count + 1)
812 ishead = [0] * (count + 1)
808 index = self.index
813 index = self.index
809 for r in self:
814 for r in self:
810 ishead[r] = 1 # I may be an head
815 ishead[r] = 1 # I may be an head
811 e = index[r]
816 e = index[r]
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
817 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 return [r for r, val in enumerate(ishead) if val]
818 return [r for r, val in enumerate(ishead) if val]
814
819
815 def heads(self, start=None, stop=None):
820 def heads(self, start=None, stop=None):
816 """return the list of all nodes that have no children
821 """return the list of all nodes that have no children
817
822
818 if start is specified, only heads that are descendants of
823 if start is specified, only heads that are descendants of
819 start will be returned
824 start will be returned
820 if stop is specified, it will consider all the revs from stop
825 if stop is specified, it will consider all the revs from stop
821 as if they had no children
826 as if they had no children
822 """
827 """
823 if start is None and stop is None:
828 if start is None and stop is None:
824 if not len(self):
829 if not len(self):
825 return [nullid]
830 return [nullid]
826 return [self.node(r) for r in self.headrevs()]
831 return [self.node(r) for r in self.headrevs()]
827
832
828 if start is None:
833 if start is None:
829 start = nullid
834 start = nullid
830 if stop is None:
835 if stop is None:
831 stop = []
836 stop = []
832 stoprevs = set([self.rev(n) for n in stop])
837 stoprevs = set([self.rev(n) for n in stop])
833 startrev = self.rev(start)
838 startrev = self.rev(start)
834 reachable = set((startrev,))
839 reachable = set((startrev,))
835 heads = set((startrev,))
840 heads = set((startrev,))
836
841
837 parentrevs = self.parentrevs
842 parentrevs = self.parentrevs
838 for r in self.revs(start=startrev + 1):
843 for r in self.revs(start=startrev + 1):
839 for p in parentrevs(r):
844 for p in parentrevs(r):
840 if p in reachable:
845 if p in reachable:
841 if r not in stoprevs:
846 if r not in stoprevs:
842 reachable.add(r)
847 reachable.add(r)
843 heads.add(r)
848 heads.add(r)
844 if p in heads and p not in stoprevs:
849 if p in heads and p not in stoprevs:
845 heads.remove(p)
850 heads.remove(p)
846
851
847 return [self.node(r) for r in heads]
852 return [self.node(r) for r in heads]
848
853
849 def children(self, node):
854 def children(self, node):
850 """find the children of a given node"""
855 """find the children of a given node"""
851 c = []
856 c = []
852 p = self.rev(node)
857 p = self.rev(node)
853 for r in self.revs(start=p + 1):
858 for r in self.revs(start=p + 1):
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
859 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 if prevs:
860 if prevs:
856 for pr in prevs:
861 for pr in prevs:
857 if pr == p:
862 if pr == p:
858 c.append(self.node(r))
863 c.append(self.node(r))
859 elif p == nullrev:
864 elif p == nullrev:
860 c.append(self.node(r))
865 c.append(self.node(r))
861 return c
866 return c
862
867
863 def descendant(self, start, end):
868 def descendant(self, start, end):
864 if start == nullrev:
869 if start == nullrev:
865 return True
870 return True
866 for i in self.descendants([start]):
871 for i in self.descendants([start]):
867 if i == end:
872 if i == end:
868 return True
873 return True
869 elif i > end:
874 elif i > end:
870 break
875 break
871 return False
876 return False
872
877
873 def commonancestorsheads(self, a, b):
878 def commonancestorsheads(self, a, b):
874 """calculate all the heads of the common ancestors of nodes a and b"""
879 """calculate all the heads of the common ancestors of nodes a and b"""
875 a, b = self.rev(a), self.rev(b)
880 a, b = self.rev(a), self.rev(b)
876 try:
881 try:
877 ancs = self.index.commonancestorsheads(a, b)
882 ancs = self.index.commonancestorsheads(a, b)
878 except (AttributeError, OverflowError): # C implementation failed
883 except (AttributeError, OverflowError): # C implementation failed
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
884 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 return map(self.node, ancs)
885 return map(self.node, ancs)
881
886
882 def isancestor(self, a, b):
887 def isancestor(self, a, b):
883 """return True if node a is an ancestor of node b
888 """return True if node a is an ancestor of node b
884
889
885 The implementation of this is trivial but the use of
890 The implementation of this is trivial but the use of
886 commonancestorsheads is not."""
891 commonancestorsheads is not."""
887 return a in self.commonancestorsheads(a, b)
892 return a in self.commonancestorsheads(a, b)
888
893
889 def ancestor(self, a, b):
894 def ancestor(self, a, b):
890 """calculate the "best" common ancestor of nodes a and b"""
895 """calculate the "best" common ancestor of nodes a and b"""
891
896
892 a, b = self.rev(a), self.rev(b)
897 a, b = self.rev(a), self.rev(b)
893 try:
898 try:
894 ancs = self.index.ancestors(a, b)
899 ancs = self.index.ancestors(a, b)
895 except (AttributeError, OverflowError):
900 except (AttributeError, OverflowError):
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
901 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 if ancs:
902 if ancs:
898 # choose a consistent winner when there's a tie
903 # choose a consistent winner when there's a tie
899 return min(map(self.node, ancs))
904 return min(map(self.node, ancs))
900 return nullid
905 return nullid
901
906
902 def _match(self, id):
907 def _match(self, id):
903 if isinstance(id, int):
908 if isinstance(id, int):
904 # rev
909 # rev
905 return self.node(id)
910 return self.node(id)
906 if len(id) == 20:
911 if len(id) == 20:
907 # possibly a binary node
912 # possibly a binary node
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
913 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 try:
914 try:
910 node = id
915 node = id
911 self.rev(node) # quick search the index
916 self.rev(node) # quick search the index
912 return node
917 return node
913 except LookupError:
918 except LookupError:
914 pass # may be partial hex id
919 pass # may be partial hex id
915 try:
920 try:
916 # str(rev)
921 # str(rev)
917 rev = int(id)
922 rev = int(id)
918 if str(rev) != id:
923 if str(rev) != id:
919 raise ValueError
924 raise ValueError
920 if rev < 0:
925 if rev < 0:
921 rev = len(self) + rev
926 rev = len(self) + rev
922 if rev < 0 or rev >= len(self):
927 if rev < 0 or rev >= len(self):
923 raise ValueError
928 raise ValueError
924 return self.node(rev)
929 return self.node(rev)
925 except (ValueError, OverflowError):
930 except (ValueError, OverflowError):
926 pass
931 pass
927 if len(id) == 40:
932 if len(id) == 40:
928 try:
933 try:
929 # a full hex nodeid?
934 # a full hex nodeid?
930 node = bin(id)
935 node = bin(id)
931 self.rev(node)
936 self.rev(node)
932 return node
937 return node
933 except (TypeError, LookupError):
938 except (TypeError, LookupError):
934 pass
939 pass
935
940
936 def _partialmatch(self, id):
941 def _partialmatch(self, id):
937 try:
942 try:
938 n = self.index.partialmatch(id)
943 n = self.index.partialmatch(id)
939 if n and self.hasnode(n):
944 if n and self.hasnode(n):
940 return n
945 return n
941 return None
946 return None
942 except RevlogError:
947 except RevlogError:
943 # parsers.c radix tree lookup gave multiple matches
948 # parsers.c radix tree lookup gave multiple matches
944 # fast path: for unfiltered changelog, radix tree is accurate
949 # fast path: for unfiltered changelog, radix tree is accurate
945 if not getattr(self, 'filteredrevs', None):
950 if not getattr(self, 'filteredrevs', None):
946 raise LookupError(id, self.indexfile,
951 raise LookupError(id, self.indexfile,
947 _('ambiguous identifier'))
952 _('ambiguous identifier'))
948 # fall through to slow path that filters hidden revisions
953 # fall through to slow path that filters hidden revisions
949 except (AttributeError, ValueError):
954 except (AttributeError, ValueError):
950 # we are pure python, or key was too short to search radix tree
955 # we are pure python, or key was too short to search radix tree
951 pass
956 pass
952
957
953 if id in self._pcache:
958 if id in self._pcache:
954 return self._pcache[id]
959 return self._pcache[id]
955
960
956 if len(id) < 40:
961 if len(id) < 40:
957 try:
962 try:
958 # hex(node)[:...]
963 # hex(node)[:...]
959 l = len(id) // 2 # grab an even number of digits
964 l = len(id) // 2 # grab an even number of digits
960 prefix = bin(id[:l * 2])
965 prefix = bin(id[:l * 2])
961 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
966 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
962 nl = [n for n in nl if hex(n).startswith(id) and
967 nl = [n for n in nl if hex(n).startswith(id) and
963 self.hasnode(n)]
968 self.hasnode(n)]
964 if len(nl) > 0:
969 if len(nl) > 0:
965 if len(nl) == 1:
970 if len(nl) == 1:
966 self._pcache[id] = nl[0]
971 self._pcache[id] = nl[0]
967 return nl[0]
972 return nl[0]
968 raise LookupError(id, self.indexfile,
973 raise LookupError(id, self.indexfile,
969 _('ambiguous identifier'))
974 _('ambiguous identifier'))
970 return None
975 return None
971 except TypeError:
976 except TypeError:
972 pass
977 pass
973
978
974 def lookup(self, id):
979 def lookup(self, id):
975 """locate a node based on:
980 """locate a node based on:
976 - revision number or str(revision number)
981 - revision number or str(revision number)
977 - nodeid or subset of hex nodeid
982 - nodeid or subset of hex nodeid
978 """
983 """
979 n = self._match(id)
984 n = self._match(id)
980 if n is not None:
985 if n is not None:
981 return n
986 return n
982 n = self._partialmatch(id)
987 n = self._partialmatch(id)
983 if n:
988 if n:
984 return n
989 return n
985
990
986 raise LookupError(id, self.indexfile, _('no match found'))
991 raise LookupError(id, self.indexfile, _('no match found'))
987
992
988 def cmp(self, node, text):
993 def cmp(self, node, text):
989 """compare text with a given file revision
994 """compare text with a given file revision
990
995
991 returns True if text is different than what is stored.
996 returns True if text is different than what is stored.
992 """
997 """
993 p1, p2 = self.parents(node)
998 p1, p2 = self.parents(node)
994 return hash(text, p1, p2) != node
999 return hash(text, p1, p2) != node
995
1000
996 def _addchunk(self, offset, data):
1001 def _addchunk(self, offset, data):
997 """Add a segment to the revlog cache.
1002 """Add a segment to the revlog cache.
998
1003
999 Accepts an absolute offset and the data that is at that location.
1004 Accepts an absolute offset and the data that is at that location.
1000 """
1005 """
1001 o, d = self._chunkcache
1006 o, d = self._chunkcache
1002 # try to add to existing cache
1007 # try to add to existing cache
1003 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1008 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1004 self._chunkcache = o, d + data
1009 self._chunkcache = o, d + data
1005 else:
1010 else:
1006 self._chunkcache = offset, data
1011 self._chunkcache = offset, data
1007
1012
1008 def _loadchunk(self, offset, length, df=None):
1013 def _loadchunk(self, offset, length, df=None):
1009 """Load a segment of raw data from the revlog.
1014 """Load a segment of raw data from the revlog.
1010
1015
1011 Accepts an absolute offset, length to read, and an optional existing
1016 Accepts an absolute offset, length to read, and an optional existing
1012 file handle to read from.
1017 file handle to read from.
1013
1018
1014 If an existing file handle is passed, it will be seeked and the
1019 If an existing file handle is passed, it will be seeked and the
1015 original seek position will NOT be restored.
1020 original seek position will NOT be restored.
1016
1021
1017 Returns a str or buffer of raw byte data.
1022 Returns a str or buffer of raw byte data.
1018 """
1023 """
1019 if df is not None:
1024 if df is not None:
1020 closehandle = False
1025 closehandle = False
1021 else:
1026 else:
1022 if self._inline:
1027 if self._inline:
1023 df = self.opener(self.indexfile)
1028 df = self.opener(self.indexfile)
1024 else:
1029 else:
1025 df = self.opener(self.datafile)
1030 df = self.opener(self.datafile)
1026 closehandle = True
1031 closehandle = True
1027
1032
1028 # Cache data both forward and backward around the requested
1033 # Cache data both forward and backward around the requested
1029 # data, in a fixed size window. This helps speed up operations
1034 # data, in a fixed size window. This helps speed up operations
1030 # involving reading the revlog backwards.
1035 # involving reading the revlog backwards.
1031 cachesize = self._chunkcachesize
1036 cachesize = self._chunkcachesize
1032 realoffset = offset & ~(cachesize - 1)
1037 realoffset = offset & ~(cachesize - 1)
1033 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1038 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1034 - realoffset)
1039 - realoffset)
1035 df.seek(realoffset)
1040 df.seek(realoffset)
1036 d = df.read(reallength)
1041 d = df.read(reallength)
1037 if closehandle:
1042 if closehandle:
1038 df.close()
1043 df.close()
1039 self._addchunk(realoffset, d)
1044 self._addchunk(realoffset, d)
1040 if offset != realoffset or reallength != length:
1045 if offset != realoffset or reallength != length:
1041 return util.buffer(d, offset - realoffset, length)
1046 return util.buffer(d, offset - realoffset, length)
1042 return d
1047 return d
1043
1048
1044 def _getchunk(self, offset, length, df=None):
1049 def _getchunk(self, offset, length, df=None):
1045 """Obtain a segment of raw data from the revlog.
1050 """Obtain a segment of raw data from the revlog.
1046
1051
1047 Accepts an absolute offset, length of bytes to obtain, and an
1052 Accepts an absolute offset, length of bytes to obtain, and an
1048 optional file handle to the already-opened revlog. If the file
1053 optional file handle to the already-opened revlog. If the file
1049 handle is used, it's original seek position will not be preserved.
1054 handle is used, it's original seek position will not be preserved.
1050
1055
1051 Requests for data may be returned from a cache.
1056 Requests for data may be returned from a cache.
1052
1057
1053 Returns a str or a buffer instance of raw byte data.
1058 Returns a str or a buffer instance of raw byte data.
1054 """
1059 """
1055 o, d = self._chunkcache
1060 o, d = self._chunkcache
1056 l = len(d)
1061 l = len(d)
1057
1062
1058 # is it in the cache?
1063 # is it in the cache?
1059 cachestart = offset - o
1064 cachestart = offset - o
1060 cacheend = cachestart + length
1065 cacheend = cachestart + length
1061 if cachestart >= 0 and cacheend <= l:
1066 if cachestart >= 0 and cacheend <= l:
1062 if cachestart == 0 and cacheend == l:
1067 if cachestart == 0 and cacheend == l:
1063 return d # avoid a copy
1068 return d # avoid a copy
1064 return util.buffer(d, cachestart, cacheend - cachestart)
1069 return util.buffer(d, cachestart, cacheend - cachestart)
1065
1070
1066 return self._loadchunk(offset, length, df=df)
1071 return self._loadchunk(offset, length, df=df)
1067
1072
1068 def _chunkraw(self, startrev, endrev, df=None):
1073 def _chunkraw(self, startrev, endrev, df=None):
1069 """Obtain a segment of raw data corresponding to a range of revisions.
1074 """Obtain a segment of raw data corresponding to a range of revisions.
1070
1075
1071 Accepts the start and end revisions and an optional already-open
1076 Accepts the start and end revisions and an optional already-open
1072 file handle to be used for reading. If the file handle is read, its
1077 file handle to be used for reading. If the file handle is read, its
1073 seek position will not be preserved.
1078 seek position will not be preserved.
1074
1079
1075 Requests for data may be satisfied by a cache.
1080 Requests for data may be satisfied by a cache.
1076
1081
1077 Returns a 2-tuple of (offset, data) for the requested range of
1082 Returns a 2-tuple of (offset, data) for the requested range of
1078 revisions. Offset is the integer offset from the beginning of the
1083 revisions. Offset is the integer offset from the beginning of the
1079 revlog and data is a str or buffer of the raw byte data.
1084 revlog and data is a str or buffer of the raw byte data.
1080
1085
1081 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1086 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1082 to determine where each revision's data begins and ends.
1087 to determine where each revision's data begins and ends.
1083 """
1088 """
1084 start = self.start(startrev)
1089 start = self.start(startrev)
1085 end = self.end(endrev)
1090 end = self.end(endrev)
1086 if self._inline:
1091 if self._inline:
1087 start += (startrev + 1) * self._io.size
1092 start += (startrev + 1) * self._io.size
1088 end += (endrev + 1) * self._io.size
1093 end += (endrev + 1) * self._io.size
1089 length = end - start
1094 length = end - start
1090
1095
1091 return start, self._getchunk(start, length, df=df)
1096 return start, self._getchunk(start, length, df=df)
1092
1097
1093 def _chunk(self, rev, df=None):
1098 def _chunk(self, rev, df=None):
1094 """Obtain a single decompressed chunk for a revision.
1099 """Obtain a single decompressed chunk for a revision.
1095
1100
1096 Accepts an integer revision and an optional already-open file handle
1101 Accepts an integer revision and an optional already-open file handle
1097 to be used for reading. If used, the seek position of the file will not
1102 to be used for reading. If used, the seek position of the file will not
1098 be preserved.
1103 be preserved.
1099
1104
1100 Returns a str holding uncompressed data for the requested revision.
1105 Returns a str holding uncompressed data for the requested revision.
1101 """
1106 """
1102 return decompress(self._chunkraw(rev, rev, df=df)[1])
1107 return decompress(self._chunkraw(rev, rev, df=df)[1])
1103
1108
1104 def _chunks(self, revs, df=None):
1109 def _chunks(self, revs, df=None):
1105 """Obtain decompressed chunks for the specified revisions.
1110 """Obtain decompressed chunks for the specified revisions.
1106
1111
1107 Accepts an iterable of numeric revisions that are assumed to be in
1112 Accepts an iterable of numeric revisions that are assumed to be in
1108 ascending order. Also accepts an optional already-open file handle
1113 ascending order. Also accepts an optional already-open file handle
1109 to be used for reading. If used, the seek position of the file will
1114 to be used for reading. If used, the seek position of the file will
1110 not be preserved.
1115 not be preserved.
1111
1116
1112 This function is similar to calling ``self._chunk()`` multiple times,
1117 This function is similar to calling ``self._chunk()`` multiple times,
1113 but is faster.
1118 but is faster.
1114
1119
1115 Returns a list with decompressed data for each requested revision.
1120 Returns a list with decompressed data for each requested revision.
1116 """
1121 """
1117 if not revs:
1122 if not revs:
1118 return []
1123 return []
1119 start = self.start
1124 start = self.start
1120 length = self.length
1125 length = self.length
1121 inline = self._inline
1126 inline = self._inline
1122 iosize = self._io.size
1127 iosize = self._io.size
1123 buffer = util.buffer
1128 buffer = util.buffer
1124
1129
1125 l = []
1130 l = []
1126 ladd = l.append
1131 ladd = l.append
1127
1132
1128 try:
1133 try:
1129 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1134 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1130 except OverflowError:
1135 except OverflowError:
1131 # issue4215 - we can't cache a run of chunks greater than
1136 # issue4215 - we can't cache a run of chunks greater than
1132 # 2G on Windows
1137 # 2G on Windows
1133 return [self._chunk(rev, df=df) for rev in revs]
1138 return [self._chunk(rev, df=df) for rev in revs]
1134
1139
1135 for rev in revs:
1140 for rev in revs:
1136 chunkstart = start(rev)
1141 chunkstart = start(rev)
1137 if inline:
1142 if inline:
1138 chunkstart += (rev + 1) * iosize
1143 chunkstart += (rev + 1) * iosize
1139 chunklength = length(rev)
1144 chunklength = length(rev)
1140 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1145 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1141
1146
1142 return l
1147 return l
1143
1148
1144 def _chunkclear(self):
1149 def _chunkclear(self):
1145 """Clear the raw chunk cache."""
1150 """Clear the raw chunk cache."""
1146 self._chunkcache = (0, '')
1151 self._chunkcache = (0, '')
1147
1152
1148 def deltaparent(self, rev):
1153 def deltaparent(self, rev):
1149 """return deltaparent of the given revision"""
1154 """return deltaparent of the given revision"""
1150 base = self.index[rev][3]
1155 base = self.index[rev][3]
1151 if base == rev:
1156 if base == rev:
1152 return nullrev
1157 return nullrev
1153 elif self._generaldelta:
1158 elif self._generaldelta:
1154 return base
1159 return base
1155 else:
1160 else:
1156 return rev - 1
1161 return rev - 1
1157
1162
1158 def revdiff(self, rev1, rev2):
1163 def revdiff(self, rev1, rev2):
1159 """return or calculate a delta between two revisions"""
1164 """return or calculate a delta between two revisions"""
1160 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1165 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1161 return str(self._chunk(rev2))
1166 return str(self._chunk(rev2))
1162
1167
1163 return mdiff.textdiff(self.revision(rev1),
1168 return mdiff.textdiff(self.revision(rev1),
1164 self.revision(rev2))
1169 self.revision(rev2))
1165
1170
1166 def revision(self, nodeorrev, _df=None):
1171 def revision(self, nodeorrev, _df=None):
1167 """return an uncompressed revision of a given node or revision
1172 """return an uncompressed revision of a given node or revision
1168 number.
1173 number.
1169
1174
1170 _df is an existing file handle to read from. It is meant to only be
1175 _df is an existing file handle to read from. It is meant to only be
1171 used internally.
1176 used internally.
1172 """
1177 """
1173 if isinstance(nodeorrev, int):
1178 if isinstance(nodeorrev, int):
1174 rev = nodeorrev
1179 rev = nodeorrev
1175 node = self.node(rev)
1180 node = self.node(rev)
1176 else:
1181 else:
1177 node = nodeorrev
1182 node = nodeorrev
1178 rev = None
1183 rev = None
1179
1184
1180 cachedrev = None
1185 cachedrev = None
1181 if node == nullid:
1186 if node == nullid:
1182 return ""
1187 return ""
1183 if self._cache:
1188 if self._cache:
1184 if self._cache[0] == node:
1189 if self._cache[0] == node:
1185 return self._cache[2]
1190 return self._cache[2]
1186 cachedrev = self._cache[1]
1191 cachedrev = self._cache[1]
1187
1192
1188 # look up what we need to read
1193 # look up what we need to read
1189 text = None
1194 text = None
1190 if rev is None:
1195 if rev is None:
1191 rev = self.rev(node)
1196 rev = self.rev(node)
1192
1197
1193 # check rev flags
1198 # check rev flags
1194 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1199 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1195 raise RevlogError(_('incompatible revision flag %x') %
1200 raise RevlogError(_('incompatible revision flag %x') %
1196 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1201 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1197
1202
1198 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1203 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1199 if stopped:
1204 if stopped:
1200 text = self._cache[2]
1205 text = self._cache[2]
1201
1206
1202 # drop cache to save memory
1207 # drop cache to save memory
1203 self._cache = None
1208 self._cache = None
1204
1209
1205 bins = self._chunks(chain, df=_df)
1210 bins = self._chunks(chain, df=_df)
1206 if text is None:
1211 if text is None:
1207 text = str(bins[0])
1212 text = str(bins[0])
1208 bins = bins[1:]
1213 bins = bins[1:]
1209
1214
1210 text = mdiff.patches(text, bins)
1215 text = mdiff.patches(text, bins)
1211
1216
1212 text = self._checkhash(text, node, rev)
1217 text = self._checkhash(text, node, rev)
1213
1218
1214 self._cache = (node, rev, text)
1219 self._cache = (node, rev, text)
1215 return text
1220 return text
1216
1221
1217 def hash(self, text, p1, p2):
1222 def hash(self, text, p1, p2):
1218 """Compute a node hash.
1223 """Compute a node hash.
1219
1224
1220 Available as a function so that subclasses can replace the hash
1225 Available as a function so that subclasses can replace the hash
1221 as needed.
1226 as needed.
1222 """
1227 """
1223 return hash(text, p1, p2)
1228 return hash(text, p1, p2)
1224
1229
1225 def _checkhash(self, text, node, rev):
1230 def _checkhash(self, text, node, rev):
1226 p1, p2 = self.parents(node)
1231 p1, p2 = self.parents(node)
1227 self.checkhash(text, p1, p2, node, rev)
1232 self.checkhash(text, p1, p2, node, rev)
1228 return text
1233 return text
1229
1234
1230 def checkhash(self, text, p1, p2, node, rev=None):
1235 def checkhash(self, text, p1, p2, node, rev=None):
1231 if node != self.hash(text, p1, p2):
1236 if node != self.hash(text, p1, p2):
1232 revornode = rev
1237 revornode = rev
1233 if revornode is None:
1238 if revornode is None:
1234 revornode = templatefilters.short(hex(node))
1239 revornode = templatefilters.short(hex(node))
1235 raise RevlogError(_("integrity check failed on %s:%s")
1240 raise RevlogError(_("integrity check failed on %s:%s")
1236 % (self.indexfile, revornode))
1241 % (self.indexfile, revornode))
1237
1242
1238 def checkinlinesize(self, tr, fp=None):
1243 def checkinlinesize(self, tr, fp=None):
1239 """Check if the revlog is too big for inline and convert if so.
1244 """Check if the revlog is too big for inline and convert if so.
1240
1245
1241 This should be called after revisions are added to the revlog. If the
1246 This should be called after revisions are added to the revlog. If the
1242 revlog has grown too large to be an inline revlog, it will convert it
1247 revlog has grown too large to be an inline revlog, it will convert it
1243 to use multiple index and data files.
1248 to use multiple index and data files.
1244 """
1249 """
1245 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1250 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1246 return
1251 return
1247
1252
1248 trinfo = tr.find(self.indexfile)
1253 trinfo = tr.find(self.indexfile)
1249 if trinfo is None:
1254 if trinfo is None:
1250 raise RevlogError(_("%s not found in the transaction")
1255 raise RevlogError(_("%s not found in the transaction")
1251 % self.indexfile)
1256 % self.indexfile)
1252
1257
1253 trindex = trinfo[2]
1258 trindex = trinfo[2]
1254 if trindex is not None:
1259 if trindex is not None:
1255 dataoff = self.start(trindex)
1260 dataoff = self.start(trindex)
1256 else:
1261 else:
1257 # revlog was stripped at start of transaction, use all leftover data
1262 # revlog was stripped at start of transaction, use all leftover data
1258 trindex = len(self) - 1
1263 trindex = len(self) - 1
1259 dataoff = self.end(-2)
1264 dataoff = self.end(-2)
1260
1265
1261 tr.add(self.datafile, dataoff)
1266 tr.add(self.datafile, dataoff)
1262
1267
1263 if fp:
1268 if fp:
1264 fp.flush()
1269 fp.flush()
1265 fp.close()
1270 fp.close()
1266
1271
1267 df = self.opener(self.datafile, 'w')
1272 df = self.opener(self.datafile, 'w')
1268 try:
1273 try:
1269 for r in self:
1274 for r in self:
1270 df.write(self._chunkraw(r, r)[1])
1275 df.write(self._chunkraw(r, r)[1])
1271 finally:
1276 finally:
1272 df.close()
1277 df.close()
1273
1278
1274 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1279 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1275 self.version &= ~(REVLOGNGINLINEDATA)
1280 self.version &= ~(REVLOGNGINLINEDATA)
1276 self._inline = False
1281 self._inline = False
1277 for i in self:
1282 for i in self:
1278 e = self._io.packentry(self.index[i], self.node, self.version, i)
1283 e = self._io.packentry(self.index[i], self.node, self.version, i)
1279 fp.write(e)
1284 fp.write(e)
1280
1285
1281 # if we don't call close, the temp file will never replace the
1286 # if we don't call close, the temp file will never replace the
1282 # real index
1287 # real index
1283 fp.close()
1288 fp.close()
1284
1289
1285 tr.replace(self.indexfile, trindex * self._io.size)
1290 tr.replace(self.indexfile, trindex * self._io.size)
1286 self._chunkclear()
1291 self._chunkclear()
1287
1292
1288 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1293 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1289 node=None):
1294 node=None):
1290 """add a revision to the log
1295 """add a revision to the log
1291
1296
1292 text - the revision data to add
1297 text - the revision data to add
1293 transaction - the transaction object used for rollback
1298 transaction - the transaction object used for rollback
1294 link - the linkrev data to add
1299 link - the linkrev data to add
1295 p1, p2 - the parent nodeids of the revision
1300 p1, p2 - the parent nodeids of the revision
1296 cachedelta - an optional precomputed delta
1301 cachedelta - an optional precomputed delta
1297 node - nodeid of revision; typically node is not specified, and it is
1302 node - nodeid of revision; typically node is not specified, and it is
1298 computed by default as hash(text, p1, p2), however subclasses might
1303 computed by default as hash(text, p1, p2), however subclasses might
1299 use different hashing method (and override checkhash() in such case)
1304 use different hashing method (and override checkhash() in such case)
1300 """
1305 """
1301 if link == nullrev:
1306 if link == nullrev:
1302 raise RevlogError(_("attempted to add linkrev -1 to %s")
1307 raise RevlogError(_("attempted to add linkrev -1 to %s")
1303 % self.indexfile)
1308 % self.indexfile)
1304
1309
1305 if len(text) > _maxentrysize:
1310 if len(text) > _maxentrysize:
1306 raise RevlogError(
1311 raise RevlogError(
1307 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1312 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1308 % (self.indexfile, len(text)))
1313 % (self.indexfile, len(text)))
1309
1314
1310 node = node or self.hash(text, p1, p2)
1315 node = node or self.hash(text, p1, p2)
1311 if node in self.nodemap:
1316 if node in self.nodemap:
1312 return node
1317 return node
1313
1318
1314 dfh = None
1319 dfh = None
1315 if not self._inline:
1320 if not self._inline:
1316 dfh = self.opener(self.datafile, "a+")
1321 dfh = self.opener(self.datafile, "a+")
1317 ifh = self.opener(self.indexfile, "a+")
1322 ifh = self.opener(self.indexfile, "a+")
1318 try:
1323 try:
1319 return self._addrevision(node, text, transaction, link, p1, p2,
1324 return self._addrevision(node, text, transaction, link, p1, p2,
1320 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1325 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1321 finally:
1326 finally:
1322 if dfh:
1327 if dfh:
1323 dfh.close()
1328 dfh.close()
1324 ifh.close()
1329 ifh.close()
1325
1330
1326 def compress(self, text):
1331 def compress(self, text):
1327 """ generate a possibly-compressed representation of text """
1332 """ generate a possibly-compressed representation of text """
1328 if not text:
1333 if not text:
1329 return ("", text)
1334 return ("", text)
1330 l = len(text)
1335 l = len(text)
1331 bin = None
1336 bin = None
1332 if l < 44:
1337 if l < 44:
1333 pass
1338 pass
1334 elif l > 1000000:
1339 elif l > 1000000:
1335 # zlib makes an internal copy, thus doubling memory usage for
1340 # zlib makes an internal copy, thus doubling memory usage for
1336 # large files, so lets do this in pieces
1341 # large files, so lets do this in pieces
1337 z = zlib.compressobj()
1342 z = zlib.compressobj()
1338 p = []
1343 p = []
1339 pos = 0
1344 pos = 0
1340 while pos < l:
1345 while pos < l:
1341 pos2 = pos + 2**20
1346 pos2 = pos + 2**20
1342 p.append(z.compress(text[pos:pos2]))
1347 p.append(z.compress(text[pos:pos2]))
1343 pos = pos2
1348 pos = pos2
1344 p.append(z.flush())
1349 p.append(z.flush())
1345 if sum(map(len, p)) < l:
1350 if sum(map(len, p)) < l:
1346 bin = "".join(p)
1351 bin = "".join(p)
1347 else:
1352 else:
1348 bin = _compress(text)
1353 bin = _compress(text)
1349 if bin is None or len(bin) > l:
1354 if bin is None or len(bin) > l:
1350 if text[0] == '\0':
1355 if text[0] == '\0':
1351 return ("", text)
1356 return ("", text)
1352 return ('u', text)
1357 return ('u', text)
1353 return ("", bin)
1358 return ("", bin)
1354
1359
1355 def _isgooddelta(self, d, textlen):
1360 def _isgooddelta(self, d, textlen):
1356 """Returns True if the given delta is good. Good means that it is within
1361 """Returns True if the given delta is good. Good means that it is within
1357 the disk span, disk size, and chain length bounds that we know to be
1362 the disk span, disk size, and chain length bounds that we know to be
1358 performant."""
1363 performant."""
1359 if d is None:
1364 if d is None:
1360 return False
1365 return False
1361
1366
1362 # - 'dist' is the distance from the base revision -- bounding it limits
1367 # - 'dist' is the distance from the base revision -- bounding it limits
1363 # the amount of I/O we need to do.
1368 # the amount of I/O we need to do.
1364 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1369 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1365 # to apply -- bounding it limits the amount of CPU we consume.
1370 # to apply -- bounding it limits the amount of CPU we consume.
1366 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1371 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1367 if (dist > textlen * 4 or l > textlen or
1372 if (dist > textlen * 4 or l > textlen or
1368 compresseddeltalen > textlen * 2 or
1373 compresseddeltalen > textlen * 2 or
1369 (self._maxchainlen and chainlen > self._maxchainlen)):
1374 (self._maxchainlen and chainlen > self._maxchainlen)):
1370 return False
1375 return False
1371
1376
1372 return True
1377 return True
1373
1378
1374 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1379 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1375 cachedelta, ifh, dfh, alwayscache=False):
1380 cachedelta, ifh, dfh, alwayscache=False):
1376 """internal function to add revisions to the log
1381 """internal function to add revisions to the log
1377
1382
1378 see addrevision for argument descriptions.
1383 see addrevision for argument descriptions.
1379 invariants:
1384 invariants:
1380 - text is optional (can be None); if not set, cachedelta must be set.
1385 - text is optional (can be None); if not set, cachedelta must be set.
1381 if both are set, they must correspond to each other.
1386 if both are set, they must correspond to each other.
1382 """
1387 """
1383 btext = [text]
1388 btext = [text]
1384 def buildtext():
1389 def buildtext():
1385 if btext[0] is not None:
1390 if btext[0] is not None:
1386 return btext[0]
1391 return btext[0]
1387 baserev = cachedelta[0]
1392 baserev = cachedelta[0]
1388 delta = cachedelta[1]
1393 delta = cachedelta[1]
1389 # special case deltas which replace entire base; no need to decode
1394 # special case deltas which replace entire base; no need to decode
1390 # base revision. this neatly avoids censored bases, which throw when
1395 # base revision. this neatly avoids censored bases, which throw when
1391 # they're decoded.
1396 # they're decoded.
1392 hlen = struct.calcsize(">lll")
1397 hlen = struct.calcsize(">lll")
1393 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1398 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1394 len(delta) - hlen):
1399 len(delta) - hlen):
1395 btext[0] = delta[hlen:]
1400 btext[0] = delta[hlen:]
1396 else:
1401 else:
1397 if self._inline:
1402 if self._inline:
1398 fh = ifh
1403 fh = ifh
1399 else:
1404 else:
1400 fh = dfh
1405 fh = dfh
1401 basetext = self.revision(self.node(baserev), _df=fh)
1406 basetext = self.revision(self.node(baserev), _df=fh)
1402 btext[0] = mdiff.patch(basetext, delta)
1407 btext[0] = mdiff.patch(basetext, delta)
1403 try:
1408 try:
1404 self.checkhash(btext[0], p1, p2, node)
1409 self.checkhash(btext[0], p1, p2, node)
1405 if flags & REVIDX_ISCENSORED:
1410 if flags & REVIDX_ISCENSORED:
1406 raise RevlogError(_('node %s is not censored') % node)
1411 raise RevlogError(_('node %s is not censored') % node)
1407 except CensoredNodeError:
1412 except CensoredNodeError:
1408 # must pass the censored index flag to add censored revisions
1413 # must pass the censored index flag to add censored revisions
1409 if not flags & REVIDX_ISCENSORED:
1414 if not flags & REVIDX_ISCENSORED:
1410 raise
1415 raise
1411 return btext[0]
1416 return btext[0]
1412
1417
1413 def builddelta(rev):
1418 def builddelta(rev):
1414 # can we use the cached delta?
1419 # can we use the cached delta?
1415 if cachedelta and cachedelta[0] == rev:
1420 if cachedelta and cachedelta[0] == rev:
1416 delta = cachedelta[1]
1421 delta = cachedelta[1]
1417 else:
1422 else:
1418 t = buildtext()
1423 t = buildtext()
1419 if self.iscensored(rev):
1424 if self.iscensored(rev):
1420 # deltas based on a censored revision must replace the
1425 # deltas based on a censored revision must replace the
1421 # full content in one patch, so delta works everywhere
1426 # full content in one patch, so delta works everywhere
1422 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1427 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1423 delta = header + t
1428 delta = header + t
1424 else:
1429 else:
1425 if self._inline:
1430 if self._inline:
1426 fh = ifh
1431 fh = ifh
1427 else:
1432 else:
1428 fh = dfh
1433 fh = dfh
1429 ptext = self.revision(self.node(rev), _df=fh)
1434 ptext = self.revision(self.node(rev), _df=fh)
1430 delta = mdiff.textdiff(ptext, t)
1435 delta = mdiff.textdiff(ptext, t)
1431 data = self.compress(delta)
1436 data = self.compress(delta)
1432 l = len(data[1]) + len(data[0])
1437 l = len(data[1]) + len(data[0])
1433 if basecache[0] == rev:
1438 chainbase = self.chainbase(rev)
1434 chainbase = basecache[1]
1435 else:
1436 chainbase = self.chainbase(rev)
1437 dist = l + offset - self.start(chainbase)
1439 dist = l + offset - self.start(chainbase)
1438 if self._generaldelta:
1440 if self._generaldelta:
1439 base = rev
1441 base = rev
1440 else:
1442 else:
1441 base = chainbase
1443 base = chainbase
1442 chainlen, compresseddeltalen = self._chaininfo(rev)
1444 chainlen, compresseddeltalen = self._chaininfo(rev)
1443 chainlen += 1
1445 chainlen += 1
1444 compresseddeltalen += l
1446 compresseddeltalen += l
1445 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1447 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1446
1448
1447 curr = len(self)
1449 curr = len(self)
1448 prev = curr - 1
1450 prev = curr - 1
1449 offset = self.end(prev)
1451 offset = self.end(prev)
1450 delta = None
1452 delta = None
1451 if self._basecache is None:
1452 self._basecache = (prev, self.chainbase(prev))
1453 basecache = self._basecache
1454 p1r, p2r = self.rev(p1), self.rev(p2)
1453 p1r, p2r = self.rev(p1), self.rev(p2)
1455
1454
1456 # full versions are inserted when the needed deltas
1455 # full versions are inserted when the needed deltas
1457 # become comparable to the uncompressed text
1456 # become comparable to the uncompressed text
1458 if text is None:
1457 if text is None:
1459 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1458 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1460 cachedelta[1])
1459 cachedelta[1])
1461 else:
1460 else:
1462 textlen = len(text)
1461 textlen = len(text)
1463
1462
1464 # should we try to build a delta?
1463 # should we try to build a delta?
1465 if prev != nullrev:
1464 if prev != nullrev:
1466 tested = set()
1465 tested = set()
1467 if cachedelta and self._generaldelta and self._lazydeltabase:
1466 if cachedelta and self._generaldelta and self._lazydeltabase:
1468 # Assume what we received from the server is a good choice
1467 # Assume what we received from the server is a good choice
1469 # build delta will reuse the cache
1468 # build delta will reuse the cache
1470 candidatedelta = builddelta(cachedelta[0])
1469 candidatedelta = builddelta(cachedelta[0])
1471 tested.add(cachedelta[0])
1470 tested.add(cachedelta[0])
1472 if self._isgooddelta(candidatedelta, textlen):
1471 if self._isgooddelta(candidatedelta, textlen):
1473 delta = candidatedelta
1472 delta = candidatedelta
1474 if delta is None and self._generaldelta:
1473 if delta is None and self._generaldelta:
1475 # exclude already lazy tested base if any
1474 # exclude already lazy tested base if any
1476 parents = [p for p in (p1r, p2r)
1475 parents = [p for p in (p1r, p2r)
1477 if p != nullrev and p not in tested]
1476 if p != nullrev and p not in tested]
1478 if parents and not self._aggressivemergedeltas:
1477 if parents and not self._aggressivemergedeltas:
1479 # Pick whichever parent is closer to us (to minimize the
1478 # Pick whichever parent is closer to us (to minimize the
1480 # chance of having to build a fulltext).
1479 # chance of having to build a fulltext).
1481 parents = [max(parents)]
1480 parents = [max(parents)]
1482 tested.update(parents)
1481 tested.update(parents)
1483 pdeltas = []
1482 pdeltas = []
1484 for p in parents:
1483 for p in parents:
1485 pd = builddelta(p)
1484 pd = builddelta(p)
1486 if self._isgooddelta(pd, textlen):
1485 if self._isgooddelta(pd, textlen):
1487 pdeltas.append(pd)
1486 pdeltas.append(pd)
1488 if pdeltas:
1487 if pdeltas:
1489 delta = min(pdeltas, key=lambda x: x[1])
1488 delta = min(pdeltas, key=lambda x: x[1])
1490 if delta is None and prev not in tested:
1489 if delta is None and prev not in tested:
1491 # other approach failed try against prev to hopefully save us a
1490 # other approach failed try against prev to hopefully save us a
1492 # fulltext.
1491 # fulltext.
1493 candidatedelta = builddelta(prev)
1492 candidatedelta = builddelta(prev)
1494 if self._isgooddelta(candidatedelta, textlen):
1493 if self._isgooddelta(candidatedelta, textlen):
1495 delta = candidatedelta
1494 delta = candidatedelta
1496 if delta is not None:
1495 if delta is not None:
1497 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1496 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1498 else:
1497 else:
1499 text = buildtext()
1498 text = buildtext()
1500 data = self.compress(text)
1499 data = self.compress(text)
1501 l = len(data[1]) + len(data[0])
1500 l = len(data[1]) + len(data[0])
1502 base = chainbase = curr
1501 base = chainbase = curr
1503
1502
1504 e = (offset_type(offset, flags), l, textlen,
1503 e = (offset_type(offset, flags), l, textlen,
1505 base, link, p1r, p2r, node)
1504 base, link, p1r, p2r, node)
1506 self.index.insert(-1, e)
1505 self.index.insert(-1, e)
1507 self.nodemap[node] = curr
1506 self.nodemap[node] = curr
1508
1507
1509 entry = self._io.packentry(e, self.node, self.version, curr)
1508 entry = self._io.packentry(e, self.node, self.version, curr)
1510 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1509 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1511
1510
1512 if alwayscache and text is None:
1511 if alwayscache and text is None:
1513 text = buildtext()
1512 text = buildtext()
1514
1513
1515 if type(text) == str: # only accept immutable objects
1514 if type(text) == str: # only accept immutable objects
1516 self._cache = (node, curr, text)
1515 self._cache = (node, curr, text)
1517 self._basecache = (curr, chainbase)
1516 self._chainbasecache[curr] = chainbase
1518 return node
1517 return node
1519
1518
1520 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1519 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1521 # Files opened in a+ mode have inconsistent behavior on various
1520 # Files opened in a+ mode have inconsistent behavior on various
1522 # platforms. Windows requires that a file positioning call be made
1521 # platforms. Windows requires that a file positioning call be made
1523 # when the file handle transitions between reads and writes. See
1522 # when the file handle transitions between reads and writes. See
1524 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1523 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1525 # platforms, Python or the platform itself can be buggy. Some versions
1524 # platforms, Python or the platform itself can be buggy. Some versions
1526 # of Solaris have been observed to not append at the end of the file
1525 # of Solaris have been observed to not append at the end of the file
1527 # if the file was seeked to before the end. See issue4943 for more.
1526 # if the file was seeked to before the end. See issue4943 for more.
1528 #
1527 #
1529 # We work around this issue by inserting a seek() before writing.
1528 # We work around this issue by inserting a seek() before writing.
1530 # Note: This is likely not necessary on Python 3.
1529 # Note: This is likely not necessary on Python 3.
1531 ifh.seek(0, os.SEEK_END)
1530 ifh.seek(0, os.SEEK_END)
1532 if dfh:
1531 if dfh:
1533 dfh.seek(0, os.SEEK_END)
1532 dfh.seek(0, os.SEEK_END)
1534
1533
1535 curr = len(self) - 1
1534 curr = len(self) - 1
1536 if not self._inline:
1535 if not self._inline:
1537 transaction.add(self.datafile, offset)
1536 transaction.add(self.datafile, offset)
1538 transaction.add(self.indexfile, curr * len(entry))
1537 transaction.add(self.indexfile, curr * len(entry))
1539 if data[0]:
1538 if data[0]:
1540 dfh.write(data[0])
1539 dfh.write(data[0])
1541 dfh.write(data[1])
1540 dfh.write(data[1])
1542 ifh.write(entry)
1541 ifh.write(entry)
1543 else:
1542 else:
1544 offset += curr * self._io.size
1543 offset += curr * self._io.size
1545 transaction.add(self.indexfile, offset, curr)
1544 transaction.add(self.indexfile, offset, curr)
1546 ifh.write(entry)
1545 ifh.write(entry)
1547 ifh.write(data[0])
1546 ifh.write(data[0])
1548 ifh.write(data[1])
1547 ifh.write(data[1])
1549 self.checkinlinesize(transaction, ifh)
1548 self.checkinlinesize(transaction, ifh)
1550
1549
1551 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1550 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1552 """
1551 """
1553 add a delta group
1552 add a delta group
1554
1553
1555 given a set of deltas, add them to the revision log. the
1554 given a set of deltas, add them to the revision log. the
1556 first delta is against its parent, which should be in our
1555 first delta is against its parent, which should be in our
1557 log, the rest are against the previous delta.
1556 log, the rest are against the previous delta.
1558
1557
1559 If ``addrevisioncb`` is defined, it will be called with arguments of
1558 If ``addrevisioncb`` is defined, it will be called with arguments of
1560 this revlog and the node that was added.
1559 this revlog and the node that was added.
1561 """
1560 """
1562
1561
1563 # track the base of the current delta log
1562 # track the base of the current delta log
1564 content = []
1563 content = []
1565 node = None
1564 node = None
1566
1565
1567 r = len(self)
1566 r = len(self)
1568 end = 0
1567 end = 0
1569 if r:
1568 if r:
1570 end = self.end(r - 1)
1569 end = self.end(r - 1)
1571 ifh = self.opener(self.indexfile, "a+")
1570 ifh = self.opener(self.indexfile, "a+")
1572 isize = r * self._io.size
1571 isize = r * self._io.size
1573 if self._inline:
1572 if self._inline:
1574 transaction.add(self.indexfile, end + isize, r)
1573 transaction.add(self.indexfile, end + isize, r)
1575 dfh = None
1574 dfh = None
1576 else:
1575 else:
1577 transaction.add(self.indexfile, isize, r)
1576 transaction.add(self.indexfile, isize, r)
1578 transaction.add(self.datafile, end)
1577 transaction.add(self.datafile, end)
1579 dfh = self.opener(self.datafile, "a+")
1578 dfh = self.opener(self.datafile, "a+")
1580 def flush():
1579 def flush():
1581 if dfh:
1580 if dfh:
1582 dfh.flush()
1581 dfh.flush()
1583 ifh.flush()
1582 ifh.flush()
1584 try:
1583 try:
1585 # loop through our set of deltas
1584 # loop through our set of deltas
1586 chain = None
1585 chain = None
1587 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1586 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1588 node = chunkdata['node']
1587 node = chunkdata['node']
1589 p1 = chunkdata['p1']
1588 p1 = chunkdata['p1']
1590 p2 = chunkdata['p2']
1589 p2 = chunkdata['p2']
1591 cs = chunkdata['cs']
1590 cs = chunkdata['cs']
1592 deltabase = chunkdata['deltabase']
1591 deltabase = chunkdata['deltabase']
1593 delta = chunkdata['delta']
1592 delta = chunkdata['delta']
1594 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1593 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1595
1594
1596 content.append(node)
1595 content.append(node)
1597
1596
1598 link = linkmapper(cs)
1597 link = linkmapper(cs)
1599 if node in self.nodemap:
1598 if node in self.nodemap:
1600 # this can happen if two branches make the same change
1599 # this can happen if two branches make the same change
1601 chain = node
1600 chain = node
1602 continue
1601 continue
1603
1602
1604 for p in (p1, p2):
1603 for p in (p1, p2):
1605 if p not in self.nodemap:
1604 if p not in self.nodemap:
1606 raise LookupError(p, self.indexfile,
1605 raise LookupError(p, self.indexfile,
1607 _('unknown parent'))
1606 _('unknown parent'))
1608
1607
1609 if deltabase not in self.nodemap:
1608 if deltabase not in self.nodemap:
1610 raise LookupError(deltabase, self.indexfile,
1609 raise LookupError(deltabase, self.indexfile,
1611 _('unknown delta base'))
1610 _('unknown delta base'))
1612
1611
1613 baserev = self.rev(deltabase)
1612 baserev = self.rev(deltabase)
1614
1613
1615 if baserev != nullrev and self.iscensored(baserev):
1614 if baserev != nullrev and self.iscensored(baserev):
1616 # if base is censored, delta must be full replacement in a
1615 # if base is censored, delta must be full replacement in a
1617 # single patch operation
1616 # single patch operation
1618 hlen = struct.calcsize(">lll")
1617 hlen = struct.calcsize(">lll")
1619 oldlen = self.rawsize(baserev)
1618 oldlen = self.rawsize(baserev)
1620 newlen = len(delta) - hlen
1619 newlen = len(delta) - hlen
1621 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1620 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1622 raise error.CensoredBaseError(self.indexfile,
1621 raise error.CensoredBaseError(self.indexfile,
1623 self.node(baserev))
1622 self.node(baserev))
1624
1623
1625 if not flags and self._peek_iscensored(baserev, delta, flush):
1624 if not flags and self._peek_iscensored(baserev, delta, flush):
1626 flags |= REVIDX_ISCENSORED
1625 flags |= REVIDX_ISCENSORED
1627
1626
1628 # We assume consumers of addrevisioncb will want to retrieve
1627 # We assume consumers of addrevisioncb will want to retrieve
1629 # the added revision, which will require a call to
1628 # the added revision, which will require a call to
1630 # revision(). revision() will fast path if there is a cache
1629 # revision(). revision() will fast path if there is a cache
1631 # hit. So, we tell _addrevision() to always cache in this case.
1630 # hit. So, we tell _addrevision() to always cache in this case.
1632 chain = self._addrevision(node, None, transaction, link,
1631 chain = self._addrevision(node, None, transaction, link,
1633 p1, p2, flags, (baserev, delta),
1632 p1, p2, flags, (baserev, delta),
1634 ifh, dfh,
1633 ifh, dfh,
1635 alwayscache=bool(addrevisioncb))
1634 alwayscache=bool(addrevisioncb))
1636
1635
1637 if addrevisioncb:
1636 if addrevisioncb:
1638 addrevisioncb(self, chain)
1637 addrevisioncb(self, chain)
1639
1638
1640 if not dfh and not self._inline:
1639 if not dfh and not self._inline:
1641 # addrevision switched from inline to conventional
1640 # addrevision switched from inline to conventional
1642 # reopen the index
1641 # reopen the index
1643 ifh.close()
1642 ifh.close()
1644 dfh = self.opener(self.datafile, "a+")
1643 dfh = self.opener(self.datafile, "a+")
1645 ifh = self.opener(self.indexfile, "a+")
1644 ifh = self.opener(self.indexfile, "a+")
1646 finally:
1645 finally:
1647 if dfh:
1646 if dfh:
1648 dfh.close()
1647 dfh.close()
1649 ifh.close()
1648 ifh.close()
1650
1649
1651 return content
1650 return content
1652
1651
1653 def iscensored(self, rev):
1652 def iscensored(self, rev):
1654 """Check if a file revision is censored."""
1653 """Check if a file revision is censored."""
1655 return False
1654 return False
1656
1655
1657 def _peek_iscensored(self, baserev, delta, flush):
1656 def _peek_iscensored(self, baserev, delta, flush):
1658 """Quickly check if a delta produces a censored revision."""
1657 """Quickly check if a delta produces a censored revision."""
1659 return False
1658 return False
1660
1659
1661 def getstrippoint(self, minlink):
1660 def getstrippoint(self, minlink):
1662 """find the minimum rev that must be stripped to strip the linkrev
1661 """find the minimum rev that must be stripped to strip the linkrev
1663
1662
1664 Returns a tuple containing the minimum rev and a set of all revs that
1663 Returns a tuple containing the minimum rev and a set of all revs that
1665 have linkrevs that will be broken by this strip.
1664 have linkrevs that will be broken by this strip.
1666 """
1665 """
1667 brokenrevs = set()
1666 brokenrevs = set()
1668 strippoint = len(self)
1667 strippoint = len(self)
1669
1668
1670 heads = {}
1669 heads = {}
1671 futurelargelinkrevs = set()
1670 futurelargelinkrevs = set()
1672 for head in self.headrevs():
1671 for head in self.headrevs():
1673 headlinkrev = self.linkrev(head)
1672 headlinkrev = self.linkrev(head)
1674 heads[head] = headlinkrev
1673 heads[head] = headlinkrev
1675 if headlinkrev >= minlink:
1674 if headlinkrev >= minlink:
1676 futurelargelinkrevs.add(headlinkrev)
1675 futurelargelinkrevs.add(headlinkrev)
1677
1676
1678 # This algorithm involves walking down the rev graph, starting at the
1677 # This algorithm involves walking down the rev graph, starting at the
1679 # heads. Since the revs are topologically sorted according to linkrev,
1678 # heads. Since the revs are topologically sorted according to linkrev,
1680 # once all head linkrevs are below the minlink, we know there are
1679 # once all head linkrevs are below the minlink, we know there are
1681 # no more revs that could have a linkrev greater than minlink.
1680 # no more revs that could have a linkrev greater than minlink.
1682 # So we can stop walking.
1681 # So we can stop walking.
1683 while futurelargelinkrevs:
1682 while futurelargelinkrevs:
1684 strippoint -= 1
1683 strippoint -= 1
1685 linkrev = heads.pop(strippoint)
1684 linkrev = heads.pop(strippoint)
1686
1685
1687 if linkrev < minlink:
1686 if linkrev < minlink:
1688 brokenrevs.add(strippoint)
1687 brokenrevs.add(strippoint)
1689 else:
1688 else:
1690 futurelargelinkrevs.remove(linkrev)
1689 futurelargelinkrevs.remove(linkrev)
1691
1690
1692 for p in self.parentrevs(strippoint):
1691 for p in self.parentrevs(strippoint):
1693 if p != nullrev:
1692 if p != nullrev:
1694 plinkrev = self.linkrev(p)
1693 plinkrev = self.linkrev(p)
1695 heads[p] = plinkrev
1694 heads[p] = plinkrev
1696 if plinkrev >= minlink:
1695 if plinkrev >= minlink:
1697 futurelargelinkrevs.add(plinkrev)
1696 futurelargelinkrevs.add(plinkrev)
1698
1697
1699 return strippoint, brokenrevs
1698 return strippoint, brokenrevs
1700
1699
1701 def strip(self, minlink, transaction):
1700 def strip(self, minlink, transaction):
1702 """truncate the revlog on the first revision with a linkrev >= minlink
1701 """truncate the revlog on the first revision with a linkrev >= minlink
1703
1702
1704 This function is called when we're stripping revision minlink and
1703 This function is called when we're stripping revision minlink and
1705 its descendants from the repository.
1704 its descendants from the repository.
1706
1705
1707 We have to remove all revisions with linkrev >= minlink, because
1706 We have to remove all revisions with linkrev >= minlink, because
1708 the equivalent changelog revisions will be renumbered after the
1707 the equivalent changelog revisions will be renumbered after the
1709 strip.
1708 strip.
1710
1709
1711 So we truncate the revlog on the first of these revisions, and
1710 So we truncate the revlog on the first of these revisions, and
1712 trust that the caller has saved the revisions that shouldn't be
1711 trust that the caller has saved the revisions that shouldn't be
1713 removed and that it'll re-add them after this truncation.
1712 removed and that it'll re-add them after this truncation.
1714 """
1713 """
1715 if len(self) == 0:
1714 if len(self) == 0:
1716 return
1715 return
1717
1716
1718 rev, _ = self.getstrippoint(minlink)
1717 rev, _ = self.getstrippoint(minlink)
1719 if rev == len(self):
1718 if rev == len(self):
1720 return
1719 return
1721
1720
1722 # first truncate the files on disk
1721 # first truncate the files on disk
1723 end = self.start(rev)
1722 end = self.start(rev)
1724 if not self._inline:
1723 if not self._inline:
1725 transaction.add(self.datafile, end)
1724 transaction.add(self.datafile, end)
1726 end = rev * self._io.size
1725 end = rev * self._io.size
1727 else:
1726 else:
1728 end += rev * self._io.size
1727 end += rev * self._io.size
1729
1728
1730 transaction.add(self.indexfile, end)
1729 transaction.add(self.indexfile, end)
1731
1730
1732 # then reset internal state in memory to forget those revisions
1731 # then reset internal state in memory to forget those revisions
1733 self._cache = None
1732 self._cache = None
1734 self._chaininfocache = {}
1733 self._chaininfocache = {}
1735 self._chunkclear()
1734 self._chunkclear()
1736 for x in xrange(rev, len(self)):
1735 for x in xrange(rev, len(self)):
1737 del self.nodemap[self.node(x)]
1736 del self.nodemap[self.node(x)]
1738
1737
1739 del self.index[rev:-1]
1738 del self.index[rev:-1]
1740
1739
1741 def checksize(self):
1740 def checksize(self):
1742 expected = 0
1741 expected = 0
1743 if len(self):
1742 if len(self):
1744 expected = max(0, self.end(len(self) - 1))
1743 expected = max(0, self.end(len(self) - 1))
1745
1744
1746 try:
1745 try:
1747 f = self.opener(self.datafile)
1746 f = self.opener(self.datafile)
1748 f.seek(0, 2)
1747 f.seek(0, 2)
1749 actual = f.tell()
1748 actual = f.tell()
1750 f.close()
1749 f.close()
1751 dd = actual - expected
1750 dd = actual - expected
1752 except IOError as inst:
1751 except IOError as inst:
1753 if inst.errno != errno.ENOENT:
1752 if inst.errno != errno.ENOENT:
1754 raise
1753 raise
1755 dd = 0
1754 dd = 0
1756
1755
1757 try:
1756 try:
1758 f = self.opener(self.indexfile)
1757 f = self.opener(self.indexfile)
1759 f.seek(0, 2)
1758 f.seek(0, 2)
1760 actual = f.tell()
1759 actual = f.tell()
1761 f.close()
1760 f.close()
1762 s = self._io.size
1761 s = self._io.size
1763 i = max(0, actual // s)
1762 i = max(0, actual // s)
1764 di = actual - (i * s)
1763 di = actual - (i * s)
1765 if self._inline:
1764 if self._inline:
1766 databytes = 0
1765 databytes = 0
1767 for r in self:
1766 for r in self:
1768 databytes += max(0, self.length(r))
1767 databytes += max(0, self.length(r))
1769 dd = 0
1768 dd = 0
1770 di = actual - len(self) * s - databytes
1769 di = actual - len(self) * s - databytes
1771 except IOError as inst:
1770 except IOError as inst:
1772 if inst.errno != errno.ENOENT:
1771 if inst.errno != errno.ENOENT:
1773 raise
1772 raise
1774 di = 0
1773 di = 0
1775
1774
1776 return (dd, di)
1775 return (dd, di)
1777
1776
1778 def files(self):
1777 def files(self):
1779 res = [self.indexfile]
1778 res = [self.indexfile]
1780 if not self._inline:
1779 if not self._inline:
1781 res.append(self.datafile)
1780 res.append(self.datafile)
1782 return res
1781 return res
General Comments 0
You need to be logged in to leave comments. Login now