##// END OF EJS Templates
revlog: remove unnecessary cache validation in _chunks...
Gregory Szorc -
r27650:e7222d32 default
parent child Browse files
Show More
@@ -1,1790 +1,1783 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import struct
19 import struct
20 import zlib
20 import zlib
21
21
22 # import stuff from node for others to import from revlog
22 # import stuff from node for others to import from revlog
23 from .node import (
23 from .node import (
24 bin,
24 bin,
25 hex,
25 hex,
26 nullid,
26 nullid,
27 nullrev,
27 nullrev,
28 )
28 )
29 from .i18n import _
29 from .i18n import _
30 from . import (
30 from . import (
31 ancestor,
31 ancestor,
32 error,
32 error,
33 mdiff,
33 mdiff,
34 parsers,
34 parsers,
35 templatefilters,
35 templatefilters,
36 util,
36 util,
37 )
37 )
38
38
39 _pack = struct.pack
39 _pack = struct.pack
40 _unpack = struct.unpack
40 _unpack = struct.unpack
41 _compress = zlib.compress
41 _compress = zlib.compress
42 _decompress = zlib.decompress
42 _decompress = zlib.decompress
43 _sha = util.sha1
43 _sha = util.sha1
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = _sha(nullid)
77 _nullhash = _sha(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = _sha(l[0])
95 s = _sha(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
229 # begins at for a revision.
230 self._basecache = None
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
232 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
233 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
234 self._chunkcachesize = 65536
235 self._maxchainlen = None
235 self._maxchainlen = None
236 self._aggressivemergedeltas = False
236 self._aggressivemergedeltas = False
237 self.index = []
237 self.index = []
238 # Mapping of partial identifiers to full nodes.
238 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
239 self._pcache = {}
240 # Mapping of revision integer to full node.
240 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
241 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
242 self._nodepos = None
243
243
244 v = REVLOG_DEFAULT_VERSION
244 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 if 'revlogv1' in opts:
247 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
248 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
249 v |= REVLOGGENERALDELTA
250 else:
250 else:
251 v = 0
251 v = 0
252 if 'chunkcachesize' in opts:
252 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
253 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
254 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
255 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
256 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
259
260 if self._chunkcachesize <= 0:
260 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
262 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
265 'of 2') % self._chunkcachesize)
266
266
267 indexdata = ''
267 indexdata = ''
268 self._initempty = True
268 self._initempty = True
269 try:
269 try:
270 f = self.opener(self.indexfile)
270 f = self.opener(self.indexfile)
271 indexdata = f.read()
271 indexdata = f.read()
272 f.close()
272 f.close()
273 if len(indexdata) > 0:
273 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
275 self._initempty = False
276 except IOError as inst:
276 except IOError as inst:
277 if inst.errno != errno.ENOENT:
277 if inst.errno != errno.ENOENT:
278 raise
278 raise
279
279
280 self.version = v
280 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
281 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
282 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
283 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
284 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
285 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
287 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
290 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
291 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
292 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
293 % (self.indexfile, fmt))
294
294
295 self._io = revlogio()
295 self._io = revlogio()
296 if self.version == REVLOGV0:
296 if self.version == REVLOGV0:
297 self._io = revlogoldio()
297 self._io = revlogoldio()
298 try:
298 try:
299 d = self._io.parseindex(indexdata, self._inline)
299 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
300 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
302 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
303 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
304 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
305 if not self._chunkcache:
306 self._chunkclear()
306 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
307 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
308 self._chaininfocache = {}
309
309
310 def tip(self):
310 def tip(self):
311 return self.node(len(self.index) - 2)
311 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
312 def __contains__(self, rev):
313 return 0 <= rev < len(self)
313 return 0 <= rev < len(self)
314 def __len__(self):
314 def __len__(self):
315 return len(self.index) - 1
315 return len(self.index) - 1
316 def __iter__(self):
316 def __iter__(self):
317 return iter(xrange(len(self)))
317 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
318 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
319 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
320 step = 1
321 if stop is not None:
321 if stop is not None:
322 if start > stop:
322 if start > stop:
323 step = -1
323 step = -1
324 stop += step
324 stop += step
325 else:
325 else:
326 stop = len(self)
326 stop = len(self)
327 return xrange(start, stop, step)
327 return xrange(start, stop, step)
328
328
329 @util.propertycache
329 @util.propertycache
330 def nodemap(self):
330 def nodemap(self):
331 self.rev(self.node(0))
331 self.rev(self.node(0))
332 return self._nodecache
332 return self._nodecache
333
333
334 def hasnode(self, node):
334 def hasnode(self, node):
335 try:
335 try:
336 self.rev(node)
336 self.rev(node)
337 return True
337 return True
338 except KeyError:
338 except KeyError:
339 return False
339 return False
340
340
341 def clearcaches(self):
341 def clearcaches(self):
342 self._cache = None
342 self._cache = None
343 self._basecache = None
343 self._basecache = None
344 self._chunkcache = (0, '')
344 self._chunkcache = (0, '')
345 self._pcache = {}
345 self._pcache = {}
346
346
347 try:
347 try:
348 self._nodecache.clearcaches()
348 self._nodecache.clearcaches()
349 except AttributeError:
349 except AttributeError:
350 self._nodecache = {nullid: nullrev}
350 self._nodecache = {nullid: nullrev}
351 self._nodepos = None
351 self._nodepos = None
352
352
353 def rev(self, node):
353 def rev(self, node):
354 try:
354 try:
355 return self._nodecache[node]
355 return self._nodecache[node]
356 except TypeError:
356 except TypeError:
357 raise
357 raise
358 except RevlogError:
358 except RevlogError:
359 # parsers.c radix tree lookup failed
359 # parsers.c radix tree lookup failed
360 raise LookupError(node, self.indexfile, _('no node'))
360 raise LookupError(node, self.indexfile, _('no node'))
361 except KeyError:
361 except KeyError:
362 # pure python cache lookup failed
362 # pure python cache lookup failed
363 n = self._nodecache
363 n = self._nodecache
364 i = self.index
364 i = self.index
365 p = self._nodepos
365 p = self._nodepos
366 if p is None:
366 if p is None:
367 p = len(i) - 2
367 p = len(i) - 2
368 for r in xrange(p, -1, -1):
368 for r in xrange(p, -1, -1):
369 v = i[r][7]
369 v = i[r][7]
370 n[v] = r
370 n[v] = r
371 if v == node:
371 if v == node:
372 self._nodepos = r - 1
372 self._nodepos = r - 1
373 return r
373 return r
374 raise LookupError(node, self.indexfile, _('no node'))
374 raise LookupError(node, self.indexfile, _('no node'))
375
375
376 def node(self, rev):
376 def node(self, rev):
377 return self.index[rev][7]
377 return self.index[rev][7]
378 def linkrev(self, rev):
378 def linkrev(self, rev):
379 return self.index[rev][4]
379 return self.index[rev][4]
380 def parents(self, node):
380 def parents(self, node):
381 i = self.index
381 i = self.index
382 d = i[self.rev(node)]
382 d = i[self.rev(node)]
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 def parentrevs(self, rev):
384 def parentrevs(self, rev):
385 return self.index[rev][5:7]
385 return self.index[rev][5:7]
386 def start(self, rev):
386 def start(self, rev):
387 return int(self.index[rev][0] >> 16)
387 return int(self.index[rev][0] >> 16)
388 def end(self, rev):
388 def end(self, rev):
389 return self.start(rev) + self.length(rev)
389 return self.start(rev) + self.length(rev)
390 def length(self, rev):
390 def length(self, rev):
391 return self.index[rev][1]
391 return self.index[rev][1]
392 def chainbase(self, rev):
392 def chainbase(self, rev):
393 index = self.index
393 index = self.index
394 base = index[rev][3]
394 base = index[rev][3]
395 while base != rev:
395 while base != rev:
396 rev = base
396 rev = base
397 base = index[rev][3]
397 base = index[rev][3]
398 return base
398 return base
399 def chainlen(self, rev):
399 def chainlen(self, rev):
400 return self._chaininfo(rev)[0]
400 return self._chaininfo(rev)[0]
401
401
402 def _chaininfo(self, rev):
402 def _chaininfo(self, rev):
403 chaininfocache = self._chaininfocache
403 chaininfocache = self._chaininfocache
404 if rev in chaininfocache:
404 if rev in chaininfocache:
405 return chaininfocache[rev]
405 return chaininfocache[rev]
406 index = self.index
406 index = self.index
407 generaldelta = self._generaldelta
407 generaldelta = self._generaldelta
408 iterrev = rev
408 iterrev = rev
409 e = index[iterrev]
409 e = index[iterrev]
410 clen = 0
410 clen = 0
411 compresseddeltalen = 0
411 compresseddeltalen = 0
412 while iterrev != e[3]:
412 while iterrev != e[3]:
413 clen += 1
413 clen += 1
414 compresseddeltalen += e[1]
414 compresseddeltalen += e[1]
415 if generaldelta:
415 if generaldelta:
416 iterrev = e[3]
416 iterrev = e[3]
417 else:
417 else:
418 iterrev -= 1
418 iterrev -= 1
419 if iterrev in chaininfocache:
419 if iterrev in chaininfocache:
420 t = chaininfocache[iterrev]
420 t = chaininfocache[iterrev]
421 clen += t[0]
421 clen += t[0]
422 compresseddeltalen += t[1]
422 compresseddeltalen += t[1]
423 break
423 break
424 e = index[iterrev]
424 e = index[iterrev]
425 else:
425 else:
426 # Add text length of base since decompressing that also takes
426 # Add text length of base since decompressing that also takes
427 # work. For cache hits the length is already included.
427 # work. For cache hits the length is already included.
428 compresseddeltalen += e[1]
428 compresseddeltalen += e[1]
429 r = (clen, compresseddeltalen)
429 r = (clen, compresseddeltalen)
430 chaininfocache[rev] = r
430 chaininfocache[rev] = r
431 return r
431 return r
432
432
433 def _deltachain(self, rev, stoprev=None):
433 def _deltachain(self, rev, stoprev=None):
434 """Obtain the delta chain for a revision.
434 """Obtain the delta chain for a revision.
435
435
436 ``stoprev`` specifies a revision to stop at. If not specified, we
436 ``stoprev`` specifies a revision to stop at. If not specified, we
437 stop at the base of the chain.
437 stop at the base of the chain.
438
438
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 revs in ascending order and ``stopped`` is a bool indicating whether
440 revs in ascending order and ``stopped`` is a bool indicating whether
441 ``stoprev`` was hit.
441 ``stoprev`` was hit.
442 """
442 """
443 chain = []
443 chain = []
444
444
445 # Alias to prevent attribute lookup in tight loop.
445 # Alias to prevent attribute lookup in tight loop.
446 index = self.index
446 index = self.index
447 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
448
448
449 iterrev = rev
449 iterrev = rev
450 e = index[iterrev]
450 e = index[iterrev]
451 while iterrev != e[3] and iterrev != stoprev:
451 while iterrev != e[3] and iterrev != stoprev:
452 chain.append(iterrev)
452 chain.append(iterrev)
453 if generaldelta:
453 if generaldelta:
454 iterrev = e[3]
454 iterrev = e[3]
455 else:
455 else:
456 iterrev -= 1
456 iterrev -= 1
457 e = index[iterrev]
457 e = index[iterrev]
458
458
459 if iterrev == stoprev:
459 if iterrev == stoprev:
460 stopped = True
460 stopped = True
461 else:
461 else:
462 chain.append(iterrev)
462 chain.append(iterrev)
463 stopped = False
463 stopped = False
464
464
465 chain.reverse()
465 chain.reverse()
466 return chain, stopped
466 return chain, stopped
467
467
468 def flags(self, rev):
468 def flags(self, rev):
469 return self.index[rev][0] & 0xFFFF
469 return self.index[rev][0] & 0xFFFF
470 def rawsize(self, rev):
470 def rawsize(self, rev):
471 """return the length of the uncompressed text for a given revision"""
471 """return the length of the uncompressed text for a given revision"""
472 l = self.index[rev][2]
472 l = self.index[rev][2]
473 if l >= 0:
473 if l >= 0:
474 return l
474 return l
475
475
476 t = self.revision(self.node(rev))
476 t = self.revision(self.node(rev))
477 return len(t)
477 return len(t)
478 size = rawsize
478 size = rawsize
479
479
480 def ancestors(self, revs, stoprev=0, inclusive=False):
480 def ancestors(self, revs, stoprev=0, inclusive=False):
481 """Generate the ancestors of 'revs' in reverse topological order.
481 """Generate the ancestors of 'revs' in reverse topological order.
482 Does not generate revs lower than stoprev.
482 Does not generate revs lower than stoprev.
483
483
484 See the documentation for ancestor.lazyancestors for more details."""
484 See the documentation for ancestor.lazyancestors for more details."""
485
485
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 inclusive=inclusive)
487 inclusive=inclusive)
488
488
489 def descendants(self, revs):
489 def descendants(self, revs):
490 """Generate the descendants of 'revs' in revision order.
490 """Generate the descendants of 'revs' in revision order.
491
491
492 Yield a sequence of revision numbers starting with a child of
492 Yield a sequence of revision numbers starting with a child of
493 some rev in revs, i.e., each revision is *not* considered a
493 some rev in revs, i.e., each revision is *not* considered a
494 descendant of itself. Results are ordered by revision number (a
494 descendant of itself. Results are ordered by revision number (a
495 topological sort)."""
495 topological sort)."""
496 first = min(revs)
496 first = min(revs)
497 if first == nullrev:
497 if first == nullrev:
498 for i in self:
498 for i in self:
499 yield i
499 yield i
500 return
500 return
501
501
502 seen = set(revs)
502 seen = set(revs)
503 for i in self.revs(start=first + 1):
503 for i in self.revs(start=first + 1):
504 for x in self.parentrevs(i):
504 for x in self.parentrevs(i):
505 if x != nullrev and x in seen:
505 if x != nullrev and x in seen:
506 seen.add(i)
506 seen.add(i)
507 yield i
507 yield i
508 break
508 break
509
509
510 def findcommonmissing(self, common=None, heads=None):
510 def findcommonmissing(self, common=None, heads=None):
511 """Return a tuple of the ancestors of common and the ancestors of heads
511 """Return a tuple of the ancestors of common and the ancestors of heads
512 that are not ancestors of common. In revset terminology, we return the
512 that are not ancestors of common. In revset terminology, we return the
513 tuple:
513 tuple:
514
514
515 ::common, (::heads) - (::common)
515 ::common, (::heads) - (::common)
516
516
517 The list is sorted by revision number, meaning it is
517 The list is sorted by revision number, meaning it is
518 topologically sorted.
518 topologically sorted.
519
519
520 'heads' and 'common' are both lists of node IDs. If heads is
520 'heads' and 'common' are both lists of node IDs. If heads is
521 not supplied, uses all of the revlog's heads. If common is not
521 not supplied, uses all of the revlog's heads. If common is not
522 supplied, uses nullid."""
522 supplied, uses nullid."""
523 if common is None:
523 if common is None:
524 common = [nullid]
524 common = [nullid]
525 if heads is None:
525 if heads is None:
526 heads = self.heads()
526 heads = self.heads()
527
527
528 common = [self.rev(n) for n in common]
528 common = [self.rev(n) for n in common]
529 heads = [self.rev(n) for n in heads]
529 heads = [self.rev(n) for n in heads]
530
530
531 # we want the ancestors, but inclusive
531 # we want the ancestors, but inclusive
532 class lazyset(object):
532 class lazyset(object):
533 def __init__(self, lazyvalues):
533 def __init__(self, lazyvalues):
534 self.addedvalues = set()
534 self.addedvalues = set()
535 self.lazyvalues = lazyvalues
535 self.lazyvalues = lazyvalues
536
536
537 def __contains__(self, value):
537 def __contains__(self, value):
538 return value in self.addedvalues or value in self.lazyvalues
538 return value in self.addedvalues or value in self.lazyvalues
539
539
540 def __iter__(self):
540 def __iter__(self):
541 added = self.addedvalues
541 added = self.addedvalues
542 for r in added:
542 for r in added:
543 yield r
543 yield r
544 for r in self.lazyvalues:
544 for r in self.lazyvalues:
545 if not r in added:
545 if not r in added:
546 yield r
546 yield r
547
547
548 def add(self, value):
548 def add(self, value):
549 self.addedvalues.add(value)
549 self.addedvalues.add(value)
550
550
551 def update(self, values):
551 def update(self, values):
552 self.addedvalues.update(values)
552 self.addedvalues.update(values)
553
553
554 has = lazyset(self.ancestors(common))
554 has = lazyset(self.ancestors(common))
555 has.add(nullrev)
555 has.add(nullrev)
556 has.update(common)
556 has.update(common)
557
557
558 # take all ancestors from heads that aren't in has
558 # take all ancestors from heads that aren't in has
559 missing = set()
559 missing = set()
560 visit = collections.deque(r for r in heads if r not in has)
560 visit = collections.deque(r for r in heads if r not in has)
561 while visit:
561 while visit:
562 r = visit.popleft()
562 r = visit.popleft()
563 if r in missing:
563 if r in missing:
564 continue
564 continue
565 else:
565 else:
566 missing.add(r)
566 missing.add(r)
567 for p in self.parentrevs(r):
567 for p in self.parentrevs(r):
568 if p not in has:
568 if p not in has:
569 visit.append(p)
569 visit.append(p)
570 missing = list(missing)
570 missing = list(missing)
571 missing.sort()
571 missing.sort()
572 return has, [self.node(r) for r in missing]
572 return has, [self.node(r) for r in missing]
573
573
574 def incrementalmissingrevs(self, common=None):
574 def incrementalmissingrevs(self, common=None):
575 """Return an object that can be used to incrementally compute the
575 """Return an object that can be used to incrementally compute the
576 revision numbers of the ancestors of arbitrary sets that are not
576 revision numbers of the ancestors of arbitrary sets that are not
577 ancestors of common. This is an ancestor.incrementalmissingancestors
577 ancestors of common. This is an ancestor.incrementalmissingancestors
578 object.
578 object.
579
579
580 'common' is a list of revision numbers. If common is not supplied, uses
580 'common' is a list of revision numbers. If common is not supplied, uses
581 nullrev.
581 nullrev.
582 """
582 """
583 if common is None:
583 if common is None:
584 common = [nullrev]
584 common = [nullrev]
585
585
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587
587
588 def findmissingrevs(self, common=None, heads=None):
588 def findmissingrevs(self, common=None, heads=None):
589 """Return the revision numbers of the ancestors of heads that
589 """Return the revision numbers of the ancestors of heads that
590 are not ancestors of common.
590 are not ancestors of common.
591
591
592 More specifically, return a list of revision numbers corresponding to
592 More specifically, return a list of revision numbers corresponding to
593 nodes N such that every N satisfies the following constraints:
593 nodes N such that every N satisfies the following constraints:
594
594
595 1. N is an ancestor of some node in 'heads'
595 1. N is an ancestor of some node in 'heads'
596 2. N is not an ancestor of any node in 'common'
596 2. N is not an ancestor of any node in 'common'
597
597
598 The list is sorted by revision number, meaning it is
598 The list is sorted by revision number, meaning it is
599 topologically sorted.
599 topologically sorted.
600
600
601 'heads' and 'common' are both lists of revision numbers. If heads is
601 'heads' and 'common' are both lists of revision numbers. If heads is
602 not supplied, uses all of the revlog's heads. If common is not
602 not supplied, uses all of the revlog's heads. If common is not
603 supplied, uses nullid."""
603 supplied, uses nullid."""
604 if common is None:
604 if common is None:
605 common = [nullrev]
605 common = [nullrev]
606 if heads is None:
606 if heads is None:
607 heads = self.headrevs()
607 heads = self.headrevs()
608
608
609 inc = self.incrementalmissingrevs(common=common)
609 inc = self.incrementalmissingrevs(common=common)
610 return inc.missingancestors(heads)
610 return inc.missingancestors(heads)
611
611
612 def findmissing(self, common=None, heads=None):
612 def findmissing(self, common=None, heads=None):
613 """Return the ancestors of heads that are not ancestors of common.
613 """Return the ancestors of heads that are not ancestors of common.
614
614
615 More specifically, return a list of nodes N such that every N
615 More specifically, return a list of nodes N such that every N
616 satisfies the following constraints:
616 satisfies the following constraints:
617
617
618 1. N is an ancestor of some node in 'heads'
618 1. N is an ancestor of some node in 'heads'
619 2. N is not an ancestor of any node in 'common'
619 2. N is not an ancestor of any node in 'common'
620
620
621 The list is sorted by revision number, meaning it is
621 The list is sorted by revision number, meaning it is
622 topologically sorted.
622 topologically sorted.
623
623
624 'heads' and 'common' are both lists of node IDs. If heads is
624 'heads' and 'common' are both lists of node IDs. If heads is
625 not supplied, uses all of the revlog's heads. If common is not
625 not supplied, uses all of the revlog's heads. If common is not
626 supplied, uses nullid."""
626 supplied, uses nullid."""
627 if common is None:
627 if common is None:
628 common = [nullid]
628 common = [nullid]
629 if heads is None:
629 if heads is None:
630 heads = self.heads()
630 heads = self.heads()
631
631
632 common = [self.rev(n) for n in common]
632 common = [self.rev(n) for n in common]
633 heads = [self.rev(n) for n in heads]
633 heads = [self.rev(n) for n in heads]
634
634
635 inc = self.incrementalmissingrevs(common=common)
635 inc = self.incrementalmissingrevs(common=common)
636 return [self.node(r) for r in inc.missingancestors(heads)]
636 return [self.node(r) for r in inc.missingancestors(heads)]
637
637
638 def nodesbetween(self, roots=None, heads=None):
638 def nodesbetween(self, roots=None, heads=None):
639 """Return a topological path from 'roots' to 'heads'.
639 """Return a topological path from 'roots' to 'heads'.
640
640
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 topologically sorted list of all nodes N that satisfy both of
642 topologically sorted list of all nodes N that satisfy both of
643 these constraints:
643 these constraints:
644
644
645 1. N is a descendant of some node in 'roots'
645 1. N is a descendant of some node in 'roots'
646 2. N is an ancestor of some node in 'heads'
646 2. N is an ancestor of some node in 'heads'
647
647
648 Every node is considered to be both a descendant and an ancestor
648 Every node is considered to be both a descendant and an ancestor
649 of itself, so every reachable node in 'roots' and 'heads' will be
649 of itself, so every reachable node in 'roots' and 'heads' will be
650 included in 'nodes'.
650 included in 'nodes'.
651
651
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 subset of 'roots' that is returned in 'nodes'. Likewise,
653 subset of 'roots' that is returned in 'nodes'. Likewise,
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
655
655
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 unspecified, uses nullid as the only root. If 'heads' is
657 unspecified, uses nullid as the only root. If 'heads' is
658 unspecified, uses list of all of the revlog's heads."""
658 unspecified, uses list of all of the revlog's heads."""
659 nonodes = ([], [], [])
659 nonodes = ([], [], [])
660 if roots is not None:
660 if roots is not None:
661 roots = list(roots)
661 roots = list(roots)
662 if not roots:
662 if not roots:
663 return nonodes
663 return nonodes
664 lowestrev = min([self.rev(n) for n in roots])
664 lowestrev = min([self.rev(n) for n in roots])
665 else:
665 else:
666 roots = [nullid] # Everybody's a descendant of nullid
666 roots = [nullid] # Everybody's a descendant of nullid
667 lowestrev = nullrev
667 lowestrev = nullrev
668 if (lowestrev == nullrev) and (heads is None):
668 if (lowestrev == nullrev) and (heads is None):
669 # We want _all_ the nodes!
669 # We want _all_ the nodes!
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 if heads is None:
671 if heads is None:
672 # All nodes are ancestors, so the latest ancestor is the last
672 # All nodes are ancestors, so the latest ancestor is the last
673 # node.
673 # node.
674 highestrev = len(self) - 1
674 highestrev = len(self) - 1
675 # Set ancestors to None to signal that every node is an ancestor.
675 # Set ancestors to None to signal that every node is an ancestor.
676 ancestors = None
676 ancestors = None
677 # Set heads to an empty dictionary for later discovery of heads
677 # Set heads to an empty dictionary for later discovery of heads
678 heads = {}
678 heads = {}
679 else:
679 else:
680 heads = list(heads)
680 heads = list(heads)
681 if not heads:
681 if not heads:
682 return nonodes
682 return nonodes
683 ancestors = set()
683 ancestors = set()
684 # Turn heads into a dictionary so we can remove 'fake' heads.
684 # Turn heads into a dictionary so we can remove 'fake' heads.
685 # Also, later we will be using it to filter out the heads we can't
685 # Also, later we will be using it to filter out the heads we can't
686 # find from roots.
686 # find from roots.
687 heads = dict.fromkeys(heads, False)
687 heads = dict.fromkeys(heads, False)
688 # Start at the top and keep marking parents until we're done.
688 # Start at the top and keep marking parents until we're done.
689 nodestotag = set(heads)
689 nodestotag = set(heads)
690 # Remember where the top was so we can use it as a limit later.
690 # Remember where the top was so we can use it as a limit later.
691 highestrev = max([self.rev(n) for n in nodestotag])
691 highestrev = max([self.rev(n) for n in nodestotag])
692 while nodestotag:
692 while nodestotag:
693 # grab a node to tag
693 # grab a node to tag
694 n = nodestotag.pop()
694 n = nodestotag.pop()
695 # Never tag nullid
695 # Never tag nullid
696 if n == nullid:
696 if n == nullid:
697 continue
697 continue
698 # A node's revision number represents its place in a
698 # A node's revision number represents its place in a
699 # topologically sorted list of nodes.
699 # topologically sorted list of nodes.
700 r = self.rev(n)
700 r = self.rev(n)
701 if r >= lowestrev:
701 if r >= lowestrev:
702 if n not in ancestors:
702 if n not in ancestors:
703 # If we are possibly a descendant of one of the roots
703 # If we are possibly a descendant of one of the roots
704 # and we haven't already been marked as an ancestor
704 # and we haven't already been marked as an ancestor
705 ancestors.add(n) # Mark as ancestor
705 ancestors.add(n) # Mark as ancestor
706 # Add non-nullid parents to list of nodes to tag.
706 # Add non-nullid parents to list of nodes to tag.
707 nodestotag.update([p for p in self.parents(n) if
707 nodestotag.update([p for p in self.parents(n) if
708 p != nullid])
708 p != nullid])
709 elif n in heads: # We've seen it before, is it a fake head?
709 elif n in heads: # We've seen it before, is it a fake head?
710 # So it is, real heads should not be the ancestors of
710 # So it is, real heads should not be the ancestors of
711 # any other heads.
711 # any other heads.
712 heads.pop(n)
712 heads.pop(n)
713 if not ancestors:
713 if not ancestors:
714 return nonodes
714 return nonodes
715 # Now that we have our set of ancestors, we want to remove any
715 # Now that we have our set of ancestors, we want to remove any
716 # roots that are not ancestors.
716 # roots that are not ancestors.
717
717
718 # If one of the roots was nullid, everything is included anyway.
718 # If one of the roots was nullid, everything is included anyway.
719 if lowestrev > nullrev:
719 if lowestrev > nullrev:
720 # But, since we weren't, let's recompute the lowest rev to not
720 # But, since we weren't, let's recompute the lowest rev to not
721 # include roots that aren't ancestors.
721 # include roots that aren't ancestors.
722
722
723 # Filter out roots that aren't ancestors of heads
723 # Filter out roots that aren't ancestors of heads
724 roots = [n for n in roots if n in ancestors]
724 roots = [n for n in roots if n in ancestors]
725 # Recompute the lowest revision
725 # Recompute the lowest revision
726 if roots:
726 if roots:
727 lowestrev = min([self.rev(n) for n in roots])
727 lowestrev = min([self.rev(n) for n in roots])
728 else:
728 else:
729 # No more roots? Return empty list
729 # No more roots? Return empty list
730 return nonodes
730 return nonodes
731 else:
731 else:
732 # We are descending from nullid, and don't need to care about
732 # We are descending from nullid, and don't need to care about
733 # any other roots.
733 # any other roots.
734 lowestrev = nullrev
734 lowestrev = nullrev
735 roots = [nullid]
735 roots = [nullid]
736 # Transform our roots list into a set.
736 # Transform our roots list into a set.
737 descendants = set(roots)
737 descendants = set(roots)
738 # Also, keep the original roots so we can filter out roots that aren't
738 # Also, keep the original roots so we can filter out roots that aren't
739 # 'real' roots (i.e. are descended from other roots).
739 # 'real' roots (i.e. are descended from other roots).
740 roots = descendants.copy()
740 roots = descendants.copy()
741 # Our topologically sorted list of output nodes.
741 # Our topologically sorted list of output nodes.
742 orderedout = []
742 orderedout = []
743 # Don't start at nullid since we don't want nullid in our output list,
743 # Don't start at nullid since we don't want nullid in our output list,
744 # and if nullid shows up in descendants, empty parents will look like
744 # and if nullid shows up in descendants, empty parents will look like
745 # they're descendants.
745 # they're descendants.
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 n = self.node(r)
747 n = self.node(r)
748 isdescendant = False
748 isdescendant = False
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 isdescendant = True
750 isdescendant = True
751 elif n in descendants:
751 elif n in descendants:
752 # n is already a descendant
752 # n is already a descendant
753 isdescendant = True
753 isdescendant = True
754 # This check only needs to be done here because all the roots
754 # This check only needs to be done here because all the roots
755 # will start being marked is descendants before the loop.
755 # will start being marked is descendants before the loop.
756 if n in roots:
756 if n in roots:
757 # If n was a root, check if it's a 'real' root.
757 # If n was a root, check if it's a 'real' root.
758 p = tuple(self.parents(n))
758 p = tuple(self.parents(n))
759 # If any of its parents are descendants, it's not a root.
759 # If any of its parents are descendants, it's not a root.
760 if (p[0] in descendants) or (p[1] in descendants):
760 if (p[0] in descendants) or (p[1] in descendants):
761 roots.remove(n)
761 roots.remove(n)
762 else:
762 else:
763 p = tuple(self.parents(n))
763 p = tuple(self.parents(n))
764 # A node is a descendant if either of its parents are
764 # A node is a descendant if either of its parents are
765 # descendants. (We seeded the dependents list with the roots
765 # descendants. (We seeded the dependents list with the roots
766 # up there, remember?)
766 # up there, remember?)
767 if (p[0] in descendants) or (p[1] in descendants):
767 if (p[0] in descendants) or (p[1] in descendants):
768 descendants.add(n)
768 descendants.add(n)
769 isdescendant = True
769 isdescendant = True
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 # Only include nodes that are both descendants and ancestors.
771 # Only include nodes that are both descendants and ancestors.
772 orderedout.append(n)
772 orderedout.append(n)
773 if (ancestors is not None) and (n in heads):
773 if (ancestors is not None) and (n in heads):
774 # We're trying to figure out which heads are reachable
774 # We're trying to figure out which heads are reachable
775 # from roots.
775 # from roots.
776 # Mark this head as having been reached
776 # Mark this head as having been reached
777 heads[n] = True
777 heads[n] = True
778 elif ancestors is None:
778 elif ancestors is None:
779 # Otherwise, we're trying to discover the heads.
779 # Otherwise, we're trying to discover the heads.
780 # Assume this is a head because if it isn't, the next step
780 # Assume this is a head because if it isn't, the next step
781 # will eventually remove it.
781 # will eventually remove it.
782 heads[n] = True
782 heads[n] = True
783 # But, obviously its parents aren't.
783 # But, obviously its parents aren't.
784 for p in self.parents(n):
784 for p in self.parents(n):
785 heads.pop(p, None)
785 heads.pop(p, None)
786 heads = [n for n, flag in heads.iteritems() if flag]
786 heads = [n for n, flag in heads.iteritems() if flag]
787 roots = list(roots)
787 roots = list(roots)
788 assert orderedout
788 assert orderedout
789 assert roots
789 assert roots
790 assert heads
790 assert heads
791 return (orderedout, roots, heads)
791 return (orderedout, roots, heads)
792
792
793 def headrevs(self):
793 def headrevs(self):
794 try:
794 try:
795 return self.index.headrevs()
795 return self.index.headrevs()
796 except AttributeError:
796 except AttributeError:
797 return self._headrevs()
797 return self._headrevs()
798
798
799 def computephases(self, roots):
799 def computephases(self, roots):
800 return self.index.computephasesmapsets(roots)
800 return self.index.computephasesmapsets(roots)
801
801
802 def _headrevs(self):
802 def _headrevs(self):
803 count = len(self)
803 count = len(self)
804 if not count:
804 if not count:
805 return [nullrev]
805 return [nullrev]
806 # we won't iter over filtered rev so nobody is a head at start
806 # we won't iter over filtered rev so nobody is a head at start
807 ishead = [0] * (count + 1)
807 ishead = [0] * (count + 1)
808 index = self.index
808 index = self.index
809 for r in self:
809 for r in self:
810 ishead[r] = 1 # I may be an head
810 ishead[r] = 1 # I may be an head
811 e = index[r]
811 e = index[r]
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 return [r for r, val in enumerate(ishead) if val]
813 return [r for r, val in enumerate(ishead) if val]
814
814
815 def heads(self, start=None, stop=None):
815 def heads(self, start=None, stop=None):
816 """return the list of all nodes that have no children
816 """return the list of all nodes that have no children
817
817
818 if start is specified, only heads that are descendants of
818 if start is specified, only heads that are descendants of
819 start will be returned
819 start will be returned
820 if stop is specified, it will consider all the revs from stop
820 if stop is specified, it will consider all the revs from stop
821 as if they had no children
821 as if they had no children
822 """
822 """
823 if start is None and stop is None:
823 if start is None and stop is None:
824 if not len(self):
824 if not len(self):
825 return [nullid]
825 return [nullid]
826 return [self.node(r) for r in self.headrevs()]
826 return [self.node(r) for r in self.headrevs()]
827
827
828 if start is None:
828 if start is None:
829 start = nullid
829 start = nullid
830 if stop is None:
830 if stop is None:
831 stop = []
831 stop = []
832 stoprevs = set([self.rev(n) for n in stop])
832 stoprevs = set([self.rev(n) for n in stop])
833 startrev = self.rev(start)
833 startrev = self.rev(start)
834 reachable = set((startrev,))
834 reachable = set((startrev,))
835 heads = set((startrev,))
835 heads = set((startrev,))
836
836
837 parentrevs = self.parentrevs
837 parentrevs = self.parentrevs
838 for r in self.revs(start=startrev + 1):
838 for r in self.revs(start=startrev + 1):
839 for p in parentrevs(r):
839 for p in parentrevs(r):
840 if p in reachable:
840 if p in reachable:
841 if r not in stoprevs:
841 if r not in stoprevs:
842 reachable.add(r)
842 reachable.add(r)
843 heads.add(r)
843 heads.add(r)
844 if p in heads and p not in stoprevs:
844 if p in heads and p not in stoprevs:
845 heads.remove(p)
845 heads.remove(p)
846
846
847 return [self.node(r) for r in heads]
847 return [self.node(r) for r in heads]
848
848
849 def children(self, node):
849 def children(self, node):
850 """find the children of a given node"""
850 """find the children of a given node"""
851 c = []
851 c = []
852 p = self.rev(node)
852 p = self.rev(node)
853 for r in self.revs(start=p + 1):
853 for r in self.revs(start=p + 1):
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 if prevs:
855 if prevs:
856 for pr in prevs:
856 for pr in prevs:
857 if pr == p:
857 if pr == p:
858 c.append(self.node(r))
858 c.append(self.node(r))
859 elif p == nullrev:
859 elif p == nullrev:
860 c.append(self.node(r))
860 c.append(self.node(r))
861 return c
861 return c
862
862
863 def descendant(self, start, end):
863 def descendant(self, start, end):
864 if start == nullrev:
864 if start == nullrev:
865 return True
865 return True
866 for i in self.descendants([start]):
866 for i in self.descendants([start]):
867 if i == end:
867 if i == end:
868 return True
868 return True
869 elif i > end:
869 elif i > end:
870 break
870 break
871 return False
871 return False
872
872
873 def commonancestorsheads(self, a, b):
873 def commonancestorsheads(self, a, b):
874 """calculate all the heads of the common ancestors of nodes a and b"""
874 """calculate all the heads of the common ancestors of nodes a and b"""
875 a, b = self.rev(a), self.rev(b)
875 a, b = self.rev(a), self.rev(b)
876 try:
876 try:
877 ancs = self.index.commonancestorsheads(a, b)
877 ancs = self.index.commonancestorsheads(a, b)
878 except (AttributeError, OverflowError): # C implementation failed
878 except (AttributeError, OverflowError): # C implementation failed
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 return map(self.node, ancs)
880 return map(self.node, ancs)
881
881
882 def isancestor(self, a, b):
882 def isancestor(self, a, b):
883 """return True if node a is an ancestor of node b
883 """return True if node a is an ancestor of node b
884
884
885 The implementation of this is trivial but the use of
885 The implementation of this is trivial but the use of
886 commonancestorsheads is not."""
886 commonancestorsheads is not."""
887 return a in self.commonancestorsheads(a, b)
887 return a in self.commonancestorsheads(a, b)
888
888
889 def ancestor(self, a, b):
889 def ancestor(self, a, b):
890 """calculate the "best" common ancestor of nodes a and b"""
890 """calculate the "best" common ancestor of nodes a and b"""
891
891
892 a, b = self.rev(a), self.rev(b)
892 a, b = self.rev(a), self.rev(b)
893 try:
893 try:
894 ancs = self.index.ancestors(a, b)
894 ancs = self.index.ancestors(a, b)
895 except (AttributeError, OverflowError):
895 except (AttributeError, OverflowError):
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 if ancs:
897 if ancs:
898 # choose a consistent winner when there's a tie
898 # choose a consistent winner when there's a tie
899 return min(map(self.node, ancs))
899 return min(map(self.node, ancs))
900 return nullid
900 return nullid
901
901
902 def _match(self, id):
902 def _match(self, id):
903 if isinstance(id, int):
903 if isinstance(id, int):
904 # rev
904 # rev
905 return self.node(id)
905 return self.node(id)
906 if len(id) == 20:
906 if len(id) == 20:
907 # possibly a binary node
907 # possibly a binary node
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 try:
909 try:
910 node = id
910 node = id
911 self.rev(node) # quick search the index
911 self.rev(node) # quick search the index
912 return node
912 return node
913 except LookupError:
913 except LookupError:
914 pass # may be partial hex id
914 pass # may be partial hex id
915 try:
915 try:
916 # str(rev)
916 # str(rev)
917 rev = int(id)
917 rev = int(id)
918 if str(rev) != id:
918 if str(rev) != id:
919 raise ValueError
919 raise ValueError
920 if rev < 0:
920 if rev < 0:
921 rev = len(self) + rev
921 rev = len(self) + rev
922 if rev < 0 or rev >= len(self):
922 if rev < 0 or rev >= len(self):
923 raise ValueError
923 raise ValueError
924 return self.node(rev)
924 return self.node(rev)
925 except (ValueError, OverflowError):
925 except (ValueError, OverflowError):
926 pass
926 pass
927 if len(id) == 40:
927 if len(id) == 40:
928 try:
928 try:
929 # a full hex nodeid?
929 # a full hex nodeid?
930 node = bin(id)
930 node = bin(id)
931 self.rev(node)
931 self.rev(node)
932 return node
932 return node
933 except (TypeError, LookupError):
933 except (TypeError, LookupError):
934 pass
934 pass
935
935
936 def _partialmatch(self, id):
936 def _partialmatch(self, id):
937 try:
937 try:
938 n = self.index.partialmatch(id)
938 n = self.index.partialmatch(id)
939 if n and self.hasnode(n):
939 if n and self.hasnode(n):
940 return n
940 return n
941 return None
941 return None
942 except RevlogError:
942 except RevlogError:
943 # parsers.c radix tree lookup gave multiple matches
943 # parsers.c radix tree lookup gave multiple matches
944 # fall through to slow path that filters hidden revisions
944 # fall through to slow path that filters hidden revisions
945 pass
945 pass
946 except (AttributeError, ValueError):
946 except (AttributeError, ValueError):
947 # we are pure python, or key was too short to search radix tree
947 # we are pure python, or key was too short to search radix tree
948 pass
948 pass
949
949
950 if id in self._pcache:
950 if id in self._pcache:
951 return self._pcache[id]
951 return self._pcache[id]
952
952
953 if len(id) < 40:
953 if len(id) < 40:
954 try:
954 try:
955 # hex(node)[:...]
955 # hex(node)[:...]
956 l = len(id) // 2 # grab an even number of digits
956 l = len(id) // 2 # grab an even number of digits
957 prefix = bin(id[:l * 2])
957 prefix = bin(id[:l * 2])
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
959 nl = [n for n in nl if hex(n).startswith(id) and
959 nl = [n for n in nl if hex(n).startswith(id) and
960 self.hasnode(n)]
960 self.hasnode(n)]
961 if len(nl) > 0:
961 if len(nl) > 0:
962 if len(nl) == 1:
962 if len(nl) == 1:
963 self._pcache[id] = nl[0]
963 self._pcache[id] = nl[0]
964 return nl[0]
964 return nl[0]
965 raise LookupError(id, self.indexfile,
965 raise LookupError(id, self.indexfile,
966 _('ambiguous identifier'))
966 _('ambiguous identifier'))
967 return None
967 return None
968 except TypeError:
968 except TypeError:
969 pass
969 pass
970
970
971 def lookup(self, id):
971 def lookup(self, id):
972 """locate a node based on:
972 """locate a node based on:
973 - revision number or str(revision number)
973 - revision number or str(revision number)
974 - nodeid or subset of hex nodeid
974 - nodeid or subset of hex nodeid
975 """
975 """
976 n = self._match(id)
976 n = self._match(id)
977 if n is not None:
977 if n is not None:
978 return n
978 return n
979 n = self._partialmatch(id)
979 n = self._partialmatch(id)
980 if n:
980 if n:
981 return n
981 return n
982
982
983 raise LookupError(id, self.indexfile, _('no match found'))
983 raise LookupError(id, self.indexfile, _('no match found'))
984
984
985 def cmp(self, node, text):
985 def cmp(self, node, text):
986 """compare text with a given file revision
986 """compare text with a given file revision
987
987
988 returns True if text is different than what is stored.
988 returns True if text is different than what is stored.
989 """
989 """
990 p1, p2 = self.parents(node)
990 p1, p2 = self.parents(node)
991 return hash(text, p1, p2) != node
991 return hash(text, p1, p2) != node
992
992
993 def _addchunk(self, offset, data):
993 def _addchunk(self, offset, data):
994 """Add a segment to the revlog cache.
994 """Add a segment to the revlog cache.
995
995
996 Accepts an absolute offset and the data that is at that location.
996 Accepts an absolute offset and the data that is at that location.
997 """
997 """
998 o, d = self._chunkcache
998 o, d = self._chunkcache
999 # try to add to existing cache
999 # try to add to existing cache
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1001 self._chunkcache = o, d + data
1001 self._chunkcache = o, d + data
1002 else:
1002 else:
1003 self._chunkcache = offset, data
1003 self._chunkcache = offset, data
1004
1004
1005 def _loadchunk(self, offset, length, df=None):
1005 def _loadchunk(self, offset, length, df=None):
1006 """Load a segment of raw data from the revlog.
1006 """Load a segment of raw data from the revlog.
1007
1007
1008 Accepts an absolute offset, length to read, and an optional existing
1008 Accepts an absolute offset, length to read, and an optional existing
1009 file handle to read from.
1009 file handle to read from.
1010
1010
1011 If an existing file handle is passed, it will be seeked and the
1011 If an existing file handle is passed, it will be seeked and the
1012 original seek position will NOT be restored.
1012 original seek position will NOT be restored.
1013
1013
1014 Returns a str or buffer of raw byte data.
1014 Returns a str or buffer of raw byte data.
1015 """
1015 """
1016 if df is not None:
1016 if df is not None:
1017 closehandle = False
1017 closehandle = False
1018 else:
1018 else:
1019 if self._inline:
1019 if self._inline:
1020 df = self.opener(self.indexfile)
1020 df = self.opener(self.indexfile)
1021 else:
1021 else:
1022 df = self.opener(self.datafile)
1022 df = self.opener(self.datafile)
1023 closehandle = True
1023 closehandle = True
1024
1024
1025 # Cache data both forward and backward around the requested
1025 # Cache data both forward and backward around the requested
1026 # data, in a fixed size window. This helps speed up operations
1026 # data, in a fixed size window. This helps speed up operations
1027 # involving reading the revlog backwards.
1027 # involving reading the revlog backwards.
1028 cachesize = self._chunkcachesize
1028 cachesize = self._chunkcachesize
1029 realoffset = offset & ~(cachesize - 1)
1029 realoffset = offset & ~(cachesize - 1)
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1031 - realoffset)
1031 - realoffset)
1032 df.seek(realoffset)
1032 df.seek(realoffset)
1033 d = df.read(reallength)
1033 d = df.read(reallength)
1034 if closehandle:
1034 if closehandle:
1035 df.close()
1035 df.close()
1036 self._addchunk(realoffset, d)
1036 self._addchunk(realoffset, d)
1037 if offset != realoffset or reallength != length:
1037 if offset != realoffset or reallength != length:
1038 return util.buffer(d, offset - realoffset, length)
1038 return util.buffer(d, offset - realoffset, length)
1039 return d
1039 return d
1040
1040
1041 def _getchunk(self, offset, length, df=None):
1041 def _getchunk(self, offset, length, df=None):
1042 """Obtain a segment of raw data from the revlog.
1042 """Obtain a segment of raw data from the revlog.
1043
1043
1044 Accepts an absolute offset, length of bytes to obtain, and an
1044 Accepts an absolute offset, length of bytes to obtain, and an
1045 optional file handle to the already-opened revlog. If the file
1045 optional file handle to the already-opened revlog. If the file
1046 handle is used, it's original seek position will not be preserved.
1046 handle is used, it's original seek position will not be preserved.
1047
1047
1048 Requests for data may be returned from a cache.
1048 Requests for data may be returned from a cache.
1049
1049
1050 Returns a str or a buffer instance of raw byte data.
1050 Returns a str or a buffer instance of raw byte data.
1051 """
1051 """
1052 o, d = self._chunkcache
1052 o, d = self._chunkcache
1053 l = len(d)
1053 l = len(d)
1054
1054
1055 # is it in the cache?
1055 # is it in the cache?
1056 cachestart = offset - o
1056 cachestart = offset - o
1057 cacheend = cachestart + length
1057 cacheend = cachestart + length
1058 if cachestart >= 0 and cacheend <= l:
1058 if cachestart >= 0 and cacheend <= l:
1059 if cachestart == 0 and cacheend == l:
1059 if cachestart == 0 and cacheend == l:
1060 return d # avoid a copy
1060 return d # avoid a copy
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1062
1062
1063 return self._loadchunk(offset, length, df=df)
1063 return self._loadchunk(offset, length, df=df)
1064
1064
1065 def _chunkraw(self, startrev, endrev, df=None):
1065 def _chunkraw(self, startrev, endrev, df=None):
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1067
1067
1068 Accepts the start and end revisions and an optional already-open
1068 Accepts the start and end revisions and an optional already-open
1069 file handle to be used for reading. If the file handle is read, its
1069 file handle to be used for reading. If the file handle is read, its
1070 seek position will not be preserved.
1070 seek position will not be preserved.
1071
1071
1072 Requests for data may be satisfied by a cache.
1072 Requests for data may be satisfied by a cache.
1073
1073
1074 Returns a 2-tuple of (offset, data) for the requested range of
1074 Returns a 2-tuple of (offset, data) for the requested range of
1075 revisions. Offset is the integer offset from the beginning of the
1075 revisions. Offset is the integer offset from the beginning of the
1076 revlog and data is a str or buffer of the raw byte data.
1076 revlog and data is a str or buffer of the raw byte data.
1077
1077
1078 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1078 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1079 to determine where each revision's data begins and ends.
1079 to determine where each revision's data begins and ends.
1080 """
1080 """
1081 start = self.start(startrev)
1081 start = self.start(startrev)
1082 end = self.end(endrev)
1082 end = self.end(endrev)
1083 if self._inline:
1083 if self._inline:
1084 start += (startrev + 1) * self._io.size
1084 start += (startrev + 1) * self._io.size
1085 end += (endrev + 1) * self._io.size
1085 end += (endrev + 1) * self._io.size
1086 length = end - start
1086 length = end - start
1087
1087
1088 return start, self._getchunk(start, length, df=df)
1088 return start, self._getchunk(start, length, df=df)
1089
1089
1090 def _chunk(self, rev, df=None):
1090 def _chunk(self, rev, df=None):
1091 """Obtain a single decompressed chunk for a revision.
1091 """Obtain a single decompressed chunk for a revision.
1092
1092
1093 Accepts an integer revision and an optional already-open file handle
1093 Accepts an integer revision and an optional already-open file handle
1094 to be used for reading. If used, the seek position of the file will not
1094 to be used for reading. If used, the seek position of the file will not
1095 be preserved.
1095 be preserved.
1096
1096
1097 Returns a str holding uncompressed data for the requested revision.
1097 Returns a str holding uncompressed data for the requested revision.
1098 """
1098 """
1099 return decompress(self._chunkraw(rev, rev, df=df)[1])
1099 return decompress(self._chunkraw(rev, rev, df=df)[1])
1100
1100
1101 def _chunks(self, revs, df=None):
1101 def _chunks(self, revs, df=None):
1102 """Obtain decompressed chunks for the specified revisions.
1102 """Obtain decompressed chunks for the specified revisions.
1103
1103
1104 Accepts an iterable of numeric revisions that are assumed to be in
1104 Accepts an iterable of numeric revisions that are assumed to be in
1105 ascending order. Also accepts an optional already-open file handle
1105 ascending order. Also accepts an optional already-open file handle
1106 to be used for reading. If used, the seek position of the file will
1106 to be used for reading. If used, the seek position of the file will
1107 not be preserved.
1107 not be preserved.
1108
1108
1109 This function is similar to calling ``self._chunk()`` multiple times,
1109 This function is similar to calling ``self._chunk()`` multiple times,
1110 but is faster.
1110 but is faster.
1111
1111
1112 Returns a list with decompressed data for each requested revision.
1112 Returns a list with decompressed data for each requested revision.
1113 """
1113 """
1114 if not revs:
1114 if not revs:
1115 return []
1115 return []
1116 start = self.start
1116 start = self.start
1117 length = self.length
1117 length = self.length
1118 inline = self._inline
1118 inline = self._inline
1119 iosize = self._io.size
1119 iosize = self._io.size
1120 buffer = util.buffer
1120 buffer = util.buffer
1121
1121
1122 l = []
1122 l = []
1123 ladd = l.append
1123 ladd = l.append
1124
1124
1125 # preload the cache
1126 try:
1125 try:
1127 while True:
1126 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1128 # ensure that the cache doesn't change out from under us
1129 _cache = self._chunkcache
1130 self._chunkraw(revs[0], revs[-1], df=df)[1]
1131 if _cache == self._chunkcache:
1132 break
1133 offset, data = _cache
1134 except OverflowError:
1127 except OverflowError:
1135 # issue4215 - we can't cache a run of chunks greater than
1128 # issue4215 - we can't cache a run of chunks greater than
1136 # 2G on Windows
1129 # 2G on Windows
1137 return [self._chunk(rev, df=df) for rev in revs]
1130 return [self._chunk(rev, df=df) for rev in revs]
1138
1131
1139 for rev in revs:
1132 for rev in revs:
1140 chunkstart = start(rev)
1133 chunkstart = start(rev)
1141 if inline:
1134 if inline:
1142 chunkstart += (rev + 1) * iosize
1135 chunkstart += (rev + 1) * iosize
1143 chunklength = length(rev)
1136 chunklength = length(rev)
1144 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1137 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1145
1138
1146 return l
1139 return l
1147
1140
1148 def _chunkclear(self):
1141 def _chunkclear(self):
1149 """Clear the raw chunk cache."""
1142 """Clear the raw chunk cache."""
1150 self._chunkcache = (0, '')
1143 self._chunkcache = (0, '')
1151
1144
1152 def deltaparent(self, rev):
1145 def deltaparent(self, rev):
1153 """return deltaparent of the given revision"""
1146 """return deltaparent of the given revision"""
1154 base = self.index[rev][3]
1147 base = self.index[rev][3]
1155 if base == rev:
1148 if base == rev:
1156 return nullrev
1149 return nullrev
1157 elif self._generaldelta:
1150 elif self._generaldelta:
1158 return base
1151 return base
1159 else:
1152 else:
1160 return rev - 1
1153 return rev - 1
1161
1154
1162 def revdiff(self, rev1, rev2):
1155 def revdiff(self, rev1, rev2):
1163 """return or calculate a delta between two revisions"""
1156 """return or calculate a delta between two revisions"""
1164 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1157 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1165 return str(self._chunk(rev2))
1158 return str(self._chunk(rev2))
1166
1159
1167 return mdiff.textdiff(self.revision(rev1),
1160 return mdiff.textdiff(self.revision(rev1),
1168 self.revision(rev2))
1161 self.revision(rev2))
1169
1162
1170 def revision(self, nodeorrev, _df=None):
1163 def revision(self, nodeorrev, _df=None):
1171 """return an uncompressed revision of a given node or revision
1164 """return an uncompressed revision of a given node or revision
1172 number.
1165 number.
1173
1166
1174 _df is an existing file handle to read from. It is meant to only be
1167 _df is an existing file handle to read from. It is meant to only be
1175 used internally.
1168 used internally.
1176 """
1169 """
1177 if isinstance(nodeorrev, int):
1170 if isinstance(nodeorrev, int):
1178 rev = nodeorrev
1171 rev = nodeorrev
1179 node = self.node(rev)
1172 node = self.node(rev)
1180 else:
1173 else:
1181 node = nodeorrev
1174 node = nodeorrev
1182 rev = None
1175 rev = None
1183
1176
1184 cachedrev = None
1177 cachedrev = None
1185 if node == nullid:
1178 if node == nullid:
1186 return ""
1179 return ""
1187 if self._cache:
1180 if self._cache:
1188 if self._cache[0] == node:
1181 if self._cache[0] == node:
1189 return self._cache[2]
1182 return self._cache[2]
1190 cachedrev = self._cache[1]
1183 cachedrev = self._cache[1]
1191
1184
1192 # look up what we need to read
1185 # look up what we need to read
1193 text = None
1186 text = None
1194 if rev is None:
1187 if rev is None:
1195 rev = self.rev(node)
1188 rev = self.rev(node)
1196
1189
1197 # check rev flags
1190 # check rev flags
1198 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1191 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1199 raise RevlogError(_('incompatible revision flag %x') %
1192 raise RevlogError(_('incompatible revision flag %x') %
1200 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1193 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1201
1194
1202 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1195 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1203 if stopped:
1196 if stopped:
1204 text = self._cache[2]
1197 text = self._cache[2]
1205
1198
1206 # drop cache to save memory
1199 # drop cache to save memory
1207 self._cache = None
1200 self._cache = None
1208
1201
1209 bins = self._chunks(chain, df=_df)
1202 bins = self._chunks(chain, df=_df)
1210 if text is None:
1203 if text is None:
1211 text = str(bins[0])
1204 text = str(bins[0])
1212 bins = bins[1:]
1205 bins = bins[1:]
1213
1206
1214 text = mdiff.patches(text, bins)
1207 text = mdiff.patches(text, bins)
1215
1208
1216 text = self._checkhash(text, node, rev)
1209 text = self._checkhash(text, node, rev)
1217
1210
1218 self._cache = (node, rev, text)
1211 self._cache = (node, rev, text)
1219 return text
1212 return text
1220
1213
1221 def hash(self, text, p1, p2):
1214 def hash(self, text, p1, p2):
1222 """Compute a node hash.
1215 """Compute a node hash.
1223
1216
1224 Available as a function so that subclasses can replace the hash
1217 Available as a function so that subclasses can replace the hash
1225 as needed.
1218 as needed.
1226 """
1219 """
1227 return hash(text, p1, p2)
1220 return hash(text, p1, p2)
1228
1221
1229 def _checkhash(self, text, node, rev):
1222 def _checkhash(self, text, node, rev):
1230 p1, p2 = self.parents(node)
1223 p1, p2 = self.parents(node)
1231 self.checkhash(text, p1, p2, node, rev)
1224 self.checkhash(text, p1, p2, node, rev)
1232 return text
1225 return text
1233
1226
1234 def checkhash(self, text, p1, p2, node, rev=None):
1227 def checkhash(self, text, p1, p2, node, rev=None):
1235 if node != self.hash(text, p1, p2):
1228 if node != self.hash(text, p1, p2):
1236 revornode = rev
1229 revornode = rev
1237 if revornode is None:
1230 if revornode is None:
1238 revornode = templatefilters.short(hex(node))
1231 revornode = templatefilters.short(hex(node))
1239 raise RevlogError(_("integrity check failed on %s:%s")
1232 raise RevlogError(_("integrity check failed on %s:%s")
1240 % (self.indexfile, revornode))
1233 % (self.indexfile, revornode))
1241
1234
1242 def checkinlinesize(self, tr, fp=None):
1235 def checkinlinesize(self, tr, fp=None):
1243 """Check if the revlog is too big for inline and convert if so.
1236 """Check if the revlog is too big for inline and convert if so.
1244
1237
1245 This should be called after revisions are added to the revlog. If the
1238 This should be called after revisions are added to the revlog. If the
1246 revlog has grown too large to be an inline revlog, it will convert it
1239 revlog has grown too large to be an inline revlog, it will convert it
1247 to use multiple index and data files.
1240 to use multiple index and data files.
1248 """
1241 """
1249 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1242 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1250 return
1243 return
1251
1244
1252 trinfo = tr.find(self.indexfile)
1245 trinfo = tr.find(self.indexfile)
1253 if trinfo is None:
1246 if trinfo is None:
1254 raise RevlogError(_("%s not found in the transaction")
1247 raise RevlogError(_("%s not found in the transaction")
1255 % self.indexfile)
1248 % self.indexfile)
1256
1249
1257 trindex = trinfo[2]
1250 trindex = trinfo[2]
1258 if trindex is not None:
1251 if trindex is not None:
1259 dataoff = self.start(trindex)
1252 dataoff = self.start(trindex)
1260 else:
1253 else:
1261 # revlog was stripped at start of transaction, use all leftover data
1254 # revlog was stripped at start of transaction, use all leftover data
1262 trindex = len(self) - 1
1255 trindex = len(self) - 1
1263 dataoff = self.end(-2)
1256 dataoff = self.end(-2)
1264
1257
1265 tr.add(self.datafile, dataoff)
1258 tr.add(self.datafile, dataoff)
1266
1259
1267 if fp:
1260 if fp:
1268 fp.flush()
1261 fp.flush()
1269 fp.close()
1262 fp.close()
1270
1263
1271 df = self.opener(self.datafile, 'w')
1264 df = self.opener(self.datafile, 'w')
1272 try:
1265 try:
1273 for r in self:
1266 for r in self:
1274 df.write(self._chunkraw(r, r)[1])
1267 df.write(self._chunkraw(r, r)[1])
1275 finally:
1268 finally:
1276 df.close()
1269 df.close()
1277
1270
1278 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1271 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1279 self.version &= ~(REVLOGNGINLINEDATA)
1272 self.version &= ~(REVLOGNGINLINEDATA)
1280 self._inline = False
1273 self._inline = False
1281 for i in self:
1274 for i in self:
1282 e = self._io.packentry(self.index[i], self.node, self.version, i)
1275 e = self._io.packentry(self.index[i], self.node, self.version, i)
1283 fp.write(e)
1276 fp.write(e)
1284
1277
1285 # if we don't call close, the temp file will never replace the
1278 # if we don't call close, the temp file will never replace the
1286 # real index
1279 # real index
1287 fp.close()
1280 fp.close()
1288
1281
1289 tr.replace(self.indexfile, trindex * self._io.size)
1282 tr.replace(self.indexfile, trindex * self._io.size)
1290 self._chunkclear()
1283 self._chunkclear()
1291
1284
1292 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1285 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1293 node=None):
1286 node=None):
1294 """add a revision to the log
1287 """add a revision to the log
1295
1288
1296 text - the revision data to add
1289 text - the revision data to add
1297 transaction - the transaction object used for rollback
1290 transaction - the transaction object used for rollback
1298 link - the linkrev data to add
1291 link - the linkrev data to add
1299 p1, p2 - the parent nodeids of the revision
1292 p1, p2 - the parent nodeids of the revision
1300 cachedelta - an optional precomputed delta
1293 cachedelta - an optional precomputed delta
1301 node - nodeid of revision; typically node is not specified, and it is
1294 node - nodeid of revision; typically node is not specified, and it is
1302 computed by default as hash(text, p1, p2), however subclasses might
1295 computed by default as hash(text, p1, p2), however subclasses might
1303 use different hashing method (and override checkhash() in such case)
1296 use different hashing method (and override checkhash() in such case)
1304 """
1297 """
1305 if link == nullrev:
1298 if link == nullrev:
1306 raise RevlogError(_("attempted to add linkrev -1 to %s")
1299 raise RevlogError(_("attempted to add linkrev -1 to %s")
1307 % self.indexfile)
1300 % self.indexfile)
1308
1301
1309 if len(text) > _maxentrysize:
1302 if len(text) > _maxentrysize:
1310 raise RevlogError(
1303 raise RevlogError(
1311 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1304 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1312 % (self.indexfile, len(text)))
1305 % (self.indexfile, len(text)))
1313
1306
1314 node = node or self.hash(text, p1, p2)
1307 node = node or self.hash(text, p1, p2)
1315 if node in self.nodemap:
1308 if node in self.nodemap:
1316 return node
1309 return node
1317
1310
1318 dfh = None
1311 dfh = None
1319 if not self._inline:
1312 if not self._inline:
1320 dfh = self.opener(self.datafile, "a+")
1313 dfh = self.opener(self.datafile, "a+")
1321 ifh = self.opener(self.indexfile, "a+")
1314 ifh = self.opener(self.indexfile, "a+")
1322 try:
1315 try:
1323 return self._addrevision(node, text, transaction, link, p1, p2,
1316 return self._addrevision(node, text, transaction, link, p1, p2,
1324 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1317 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1325 finally:
1318 finally:
1326 if dfh:
1319 if dfh:
1327 dfh.close()
1320 dfh.close()
1328 ifh.close()
1321 ifh.close()
1329
1322
1330 def compress(self, text):
1323 def compress(self, text):
1331 """ generate a possibly-compressed representation of text """
1324 """ generate a possibly-compressed representation of text """
1332 if not text:
1325 if not text:
1333 return ("", text)
1326 return ("", text)
1334 l = len(text)
1327 l = len(text)
1335 bin = None
1328 bin = None
1336 if l < 44:
1329 if l < 44:
1337 pass
1330 pass
1338 elif l > 1000000:
1331 elif l > 1000000:
1339 # zlib makes an internal copy, thus doubling memory usage for
1332 # zlib makes an internal copy, thus doubling memory usage for
1340 # large files, so lets do this in pieces
1333 # large files, so lets do this in pieces
1341 z = zlib.compressobj()
1334 z = zlib.compressobj()
1342 p = []
1335 p = []
1343 pos = 0
1336 pos = 0
1344 while pos < l:
1337 while pos < l:
1345 pos2 = pos + 2**20
1338 pos2 = pos + 2**20
1346 p.append(z.compress(text[pos:pos2]))
1339 p.append(z.compress(text[pos:pos2]))
1347 pos = pos2
1340 pos = pos2
1348 p.append(z.flush())
1341 p.append(z.flush())
1349 if sum(map(len, p)) < l:
1342 if sum(map(len, p)) < l:
1350 bin = "".join(p)
1343 bin = "".join(p)
1351 else:
1344 else:
1352 bin = _compress(text)
1345 bin = _compress(text)
1353 if bin is None or len(bin) > l:
1346 if bin is None or len(bin) > l:
1354 if text[0] == '\0':
1347 if text[0] == '\0':
1355 return ("", text)
1348 return ("", text)
1356 return ('u', text)
1349 return ('u', text)
1357 return ("", bin)
1350 return ("", bin)
1358
1351
1359 def _isgooddelta(self, d, textlen):
1352 def _isgooddelta(self, d, textlen):
1360 """Returns True if the given delta is good. Good means that it is within
1353 """Returns True if the given delta is good. Good means that it is within
1361 the disk span, disk size, and chain length bounds that we know to be
1354 the disk span, disk size, and chain length bounds that we know to be
1362 performant."""
1355 performant."""
1363 if d is None:
1356 if d is None:
1364 return False
1357 return False
1365
1358
1366 # - 'dist' is the distance from the base revision -- bounding it limits
1359 # - 'dist' is the distance from the base revision -- bounding it limits
1367 # the amount of I/O we need to do.
1360 # the amount of I/O we need to do.
1368 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1361 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1369 # to apply -- bounding it limits the amount of CPU we consume.
1362 # to apply -- bounding it limits the amount of CPU we consume.
1370 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1363 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1371 if (dist > textlen * 4 or l > textlen or
1364 if (dist > textlen * 4 or l > textlen or
1372 compresseddeltalen > textlen * 2 or
1365 compresseddeltalen > textlen * 2 or
1373 (self._maxchainlen and chainlen > self._maxchainlen)):
1366 (self._maxchainlen and chainlen > self._maxchainlen)):
1374 return False
1367 return False
1375
1368
1376 return True
1369 return True
1377
1370
1378 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1371 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1379 cachedelta, ifh, dfh, alwayscache=False):
1372 cachedelta, ifh, dfh, alwayscache=False):
1380 """internal function to add revisions to the log
1373 """internal function to add revisions to the log
1381
1374
1382 see addrevision for argument descriptions.
1375 see addrevision for argument descriptions.
1383 invariants:
1376 invariants:
1384 - text is optional (can be None); if not set, cachedelta must be set.
1377 - text is optional (can be None); if not set, cachedelta must be set.
1385 if both are set, they must correspond to each other.
1378 if both are set, they must correspond to each other.
1386 """
1379 """
1387 btext = [text]
1380 btext = [text]
1388 def buildtext():
1381 def buildtext():
1389 if btext[0] is not None:
1382 if btext[0] is not None:
1390 return btext[0]
1383 return btext[0]
1391 baserev = cachedelta[0]
1384 baserev = cachedelta[0]
1392 delta = cachedelta[1]
1385 delta = cachedelta[1]
1393 # special case deltas which replace entire base; no need to decode
1386 # special case deltas which replace entire base; no need to decode
1394 # base revision. this neatly avoids censored bases, which throw when
1387 # base revision. this neatly avoids censored bases, which throw when
1395 # they're decoded.
1388 # they're decoded.
1396 hlen = struct.calcsize(">lll")
1389 hlen = struct.calcsize(">lll")
1397 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1390 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1398 len(delta) - hlen):
1391 len(delta) - hlen):
1399 btext[0] = delta[hlen:]
1392 btext[0] = delta[hlen:]
1400 else:
1393 else:
1401 if self._inline:
1394 if self._inline:
1402 fh = ifh
1395 fh = ifh
1403 else:
1396 else:
1404 fh = dfh
1397 fh = dfh
1405 basetext = self.revision(self.node(baserev), _df=fh)
1398 basetext = self.revision(self.node(baserev), _df=fh)
1406 btext[0] = mdiff.patch(basetext, delta)
1399 btext[0] = mdiff.patch(basetext, delta)
1407 try:
1400 try:
1408 self.checkhash(btext[0], p1, p2, node)
1401 self.checkhash(btext[0], p1, p2, node)
1409 if flags & REVIDX_ISCENSORED:
1402 if flags & REVIDX_ISCENSORED:
1410 raise RevlogError(_('node %s is not censored') % node)
1403 raise RevlogError(_('node %s is not censored') % node)
1411 except CensoredNodeError:
1404 except CensoredNodeError:
1412 # must pass the censored index flag to add censored revisions
1405 # must pass the censored index flag to add censored revisions
1413 if not flags & REVIDX_ISCENSORED:
1406 if not flags & REVIDX_ISCENSORED:
1414 raise
1407 raise
1415 return btext[0]
1408 return btext[0]
1416
1409
1417 def builddelta(rev):
1410 def builddelta(rev):
1418 # can we use the cached delta?
1411 # can we use the cached delta?
1419 if cachedelta and cachedelta[0] == rev:
1412 if cachedelta and cachedelta[0] == rev:
1420 delta = cachedelta[1]
1413 delta = cachedelta[1]
1421 else:
1414 else:
1422 t = buildtext()
1415 t = buildtext()
1423 if self.iscensored(rev):
1416 if self.iscensored(rev):
1424 # deltas based on a censored revision must replace the
1417 # deltas based on a censored revision must replace the
1425 # full content in one patch, so delta works everywhere
1418 # full content in one patch, so delta works everywhere
1426 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1419 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1427 delta = header + t
1420 delta = header + t
1428 else:
1421 else:
1429 if self._inline:
1422 if self._inline:
1430 fh = ifh
1423 fh = ifh
1431 else:
1424 else:
1432 fh = dfh
1425 fh = dfh
1433 ptext = self.revision(self.node(rev), _df=fh)
1426 ptext = self.revision(self.node(rev), _df=fh)
1434 delta = mdiff.textdiff(ptext, t)
1427 delta = mdiff.textdiff(ptext, t)
1435 data = self.compress(delta)
1428 data = self.compress(delta)
1436 l = len(data[1]) + len(data[0])
1429 l = len(data[1]) + len(data[0])
1437 if basecache[0] == rev:
1430 if basecache[0] == rev:
1438 chainbase = basecache[1]
1431 chainbase = basecache[1]
1439 else:
1432 else:
1440 chainbase = self.chainbase(rev)
1433 chainbase = self.chainbase(rev)
1441 dist = l + offset - self.start(chainbase)
1434 dist = l + offset - self.start(chainbase)
1442 if self._generaldelta:
1435 if self._generaldelta:
1443 base = rev
1436 base = rev
1444 else:
1437 else:
1445 base = chainbase
1438 base = chainbase
1446 chainlen, compresseddeltalen = self._chaininfo(rev)
1439 chainlen, compresseddeltalen = self._chaininfo(rev)
1447 chainlen += 1
1440 chainlen += 1
1448 compresseddeltalen += l
1441 compresseddeltalen += l
1449 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1442 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1450
1443
1451 curr = len(self)
1444 curr = len(self)
1452 prev = curr - 1
1445 prev = curr - 1
1453 base = chainbase = curr
1446 base = chainbase = curr
1454 offset = self.end(prev)
1447 offset = self.end(prev)
1455 delta = None
1448 delta = None
1456 if self._basecache is None:
1449 if self._basecache is None:
1457 self._basecache = (prev, self.chainbase(prev))
1450 self._basecache = (prev, self.chainbase(prev))
1458 basecache = self._basecache
1451 basecache = self._basecache
1459 p1r, p2r = self.rev(p1), self.rev(p2)
1452 p1r, p2r = self.rev(p1), self.rev(p2)
1460
1453
1461 # full versions are inserted when the needed deltas
1454 # full versions are inserted when the needed deltas
1462 # become comparable to the uncompressed text
1455 # become comparable to the uncompressed text
1463 if text is None:
1456 if text is None:
1464 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1457 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1465 cachedelta[1])
1458 cachedelta[1])
1466 else:
1459 else:
1467 textlen = len(text)
1460 textlen = len(text)
1468
1461
1469 # should we try to build a delta?
1462 # should we try to build a delta?
1470 if prev != nullrev:
1463 if prev != nullrev:
1471 tested = set()
1464 tested = set()
1472 if cachedelta and self._generaldelta and self._lazydeltabase:
1465 if cachedelta and self._generaldelta and self._lazydeltabase:
1473 # Assume what we received from the server is a good choice
1466 # Assume what we received from the server is a good choice
1474 # build delta will reuse the cache
1467 # build delta will reuse the cache
1475 candidatedelta = builddelta(cachedelta[0])
1468 candidatedelta = builddelta(cachedelta[0])
1476 tested.add(cachedelta[0])
1469 tested.add(cachedelta[0])
1477 if self._isgooddelta(candidatedelta, textlen):
1470 if self._isgooddelta(candidatedelta, textlen):
1478 delta = candidatedelta
1471 delta = candidatedelta
1479 if delta is None and self._generaldelta:
1472 if delta is None and self._generaldelta:
1480 # exclude already lazy tested base if any
1473 # exclude already lazy tested base if any
1481 parents = [p for p in (p1r, p2r)
1474 parents = [p for p in (p1r, p2r)
1482 if p != nullrev and p not in tested]
1475 if p != nullrev and p not in tested]
1483 if parents and not self._aggressivemergedeltas:
1476 if parents and not self._aggressivemergedeltas:
1484 # Pick whichever parent is closer to us (to minimize the
1477 # Pick whichever parent is closer to us (to minimize the
1485 # chance of having to build a fulltext).
1478 # chance of having to build a fulltext).
1486 parents = [max(parents)]
1479 parents = [max(parents)]
1487 tested.update(parents)
1480 tested.update(parents)
1488 pdeltas = []
1481 pdeltas = []
1489 for p in parents:
1482 for p in parents:
1490 pd = builddelta(p)
1483 pd = builddelta(p)
1491 if self._isgooddelta(pd, textlen):
1484 if self._isgooddelta(pd, textlen):
1492 pdeltas.append(pd)
1485 pdeltas.append(pd)
1493 if pdeltas:
1486 if pdeltas:
1494 delta = min(pdeltas, key=lambda x: x[1])
1487 delta = min(pdeltas, key=lambda x: x[1])
1495 if delta is None and prev not in tested:
1488 if delta is None and prev not in tested:
1496 # other approach failed try against prev to hopefully save us a
1489 # other approach failed try against prev to hopefully save us a
1497 # fulltext.
1490 # fulltext.
1498 candidatedelta = builddelta(prev)
1491 candidatedelta = builddelta(prev)
1499 if self._isgooddelta(candidatedelta, textlen):
1492 if self._isgooddelta(candidatedelta, textlen):
1500 delta = candidatedelta
1493 delta = candidatedelta
1501 if delta is not None:
1494 if delta is not None:
1502 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1495 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1503 else:
1496 else:
1504 text = buildtext()
1497 text = buildtext()
1505 data = self.compress(text)
1498 data = self.compress(text)
1506 l = len(data[1]) + len(data[0])
1499 l = len(data[1]) + len(data[0])
1507 base = chainbase = curr
1500 base = chainbase = curr
1508
1501
1509 e = (offset_type(offset, flags), l, textlen,
1502 e = (offset_type(offset, flags), l, textlen,
1510 base, link, p1r, p2r, node)
1503 base, link, p1r, p2r, node)
1511 self.index.insert(-1, e)
1504 self.index.insert(-1, e)
1512 self.nodemap[node] = curr
1505 self.nodemap[node] = curr
1513
1506
1514 entry = self._io.packentry(e, self.node, self.version, curr)
1507 entry = self._io.packentry(e, self.node, self.version, curr)
1515 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1508 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1516
1509
1517 if alwayscache and text is None:
1510 if alwayscache and text is None:
1518 text = buildtext()
1511 text = buildtext()
1519
1512
1520 if type(text) == str: # only accept immutable objects
1513 if type(text) == str: # only accept immutable objects
1521 self._cache = (node, curr, text)
1514 self._cache = (node, curr, text)
1522 self._basecache = (curr, chainbase)
1515 self._basecache = (curr, chainbase)
1523 return node
1516 return node
1524
1517
1525 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1518 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1526 # Files opened in a+ mode have inconsistent behavior on various
1519 # Files opened in a+ mode have inconsistent behavior on various
1527 # platforms. Windows requires that a file positioning call be made
1520 # platforms. Windows requires that a file positioning call be made
1528 # when the file handle transitions between reads and writes. See
1521 # when the file handle transitions between reads and writes. See
1529 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1522 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1530 # platforms, Python or the platform itself can be buggy. Some versions
1523 # platforms, Python or the platform itself can be buggy. Some versions
1531 # of Solaris have been observed to not append at the end of the file
1524 # of Solaris have been observed to not append at the end of the file
1532 # if the file was seeked to before the end. See issue4943 for more.
1525 # if the file was seeked to before the end. See issue4943 for more.
1533 #
1526 #
1534 # We work around this issue by inserting a seek() before writing.
1527 # We work around this issue by inserting a seek() before writing.
1535 # Note: This is likely not necessary on Python 3.
1528 # Note: This is likely not necessary on Python 3.
1536 ifh.seek(0, os.SEEK_END)
1529 ifh.seek(0, os.SEEK_END)
1537 if dfh:
1530 if dfh:
1538 dfh.seek(0, os.SEEK_END)
1531 dfh.seek(0, os.SEEK_END)
1539
1532
1540 curr = len(self) - 1
1533 curr = len(self) - 1
1541 if not self._inline:
1534 if not self._inline:
1542 transaction.add(self.datafile, offset)
1535 transaction.add(self.datafile, offset)
1543 transaction.add(self.indexfile, curr * len(entry))
1536 transaction.add(self.indexfile, curr * len(entry))
1544 if data[0]:
1537 if data[0]:
1545 dfh.write(data[0])
1538 dfh.write(data[0])
1546 dfh.write(data[1])
1539 dfh.write(data[1])
1547 ifh.write(entry)
1540 ifh.write(entry)
1548 else:
1541 else:
1549 offset += curr * self._io.size
1542 offset += curr * self._io.size
1550 transaction.add(self.indexfile, offset, curr)
1543 transaction.add(self.indexfile, offset, curr)
1551 ifh.write(entry)
1544 ifh.write(entry)
1552 ifh.write(data[0])
1545 ifh.write(data[0])
1553 ifh.write(data[1])
1546 ifh.write(data[1])
1554 self.checkinlinesize(transaction, ifh)
1547 self.checkinlinesize(transaction, ifh)
1555
1548
1556 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1549 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1557 """
1550 """
1558 add a delta group
1551 add a delta group
1559
1552
1560 given a set of deltas, add them to the revision log. the
1553 given a set of deltas, add them to the revision log. the
1561 first delta is against its parent, which should be in our
1554 first delta is against its parent, which should be in our
1562 log, the rest are against the previous delta.
1555 log, the rest are against the previous delta.
1563
1556
1564 If ``addrevisioncb`` is defined, it will be called with arguments of
1557 If ``addrevisioncb`` is defined, it will be called with arguments of
1565 this revlog and the node that was added.
1558 this revlog and the node that was added.
1566 """
1559 """
1567
1560
1568 # track the base of the current delta log
1561 # track the base of the current delta log
1569 content = []
1562 content = []
1570 node = None
1563 node = None
1571
1564
1572 r = len(self)
1565 r = len(self)
1573 end = 0
1566 end = 0
1574 if r:
1567 if r:
1575 end = self.end(r - 1)
1568 end = self.end(r - 1)
1576 ifh = self.opener(self.indexfile, "a+")
1569 ifh = self.opener(self.indexfile, "a+")
1577 isize = r * self._io.size
1570 isize = r * self._io.size
1578 if self._inline:
1571 if self._inline:
1579 transaction.add(self.indexfile, end + isize, r)
1572 transaction.add(self.indexfile, end + isize, r)
1580 dfh = None
1573 dfh = None
1581 else:
1574 else:
1582 transaction.add(self.indexfile, isize, r)
1575 transaction.add(self.indexfile, isize, r)
1583 transaction.add(self.datafile, end)
1576 transaction.add(self.datafile, end)
1584 dfh = self.opener(self.datafile, "a+")
1577 dfh = self.opener(self.datafile, "a+")
1585 def flush():
1578 def flush():
1586 if dfh:
1579 if dfh:
1587 dfh.flush()
1580 dfh.flush()
1588 ifh.flush()
1581 ifh.flush()
1589 try:
1582 try:
1590 # loop through our set of deltas
1583 # loop through our set of deltas
1591 chain = None
1584 chain = None
1592 while True:
1585 while True:
1593 chunkdata = cg.deltachunk(chain)
1586 chunkdata = cg.deltachunk(chain)
1594 if not chunkdata:
1587 if not chunkdata:
1595 break
1588 break
1596 node = chunkdata['node']
1589 node = chunkdata['node']
1597 p1 = chunkdata['p1']
1590 p1 = chunkdata['p1']
1598 p2 = chunkdata['p2']
1591 p2 = chunkdata['p2']
1599 cs = chunkdata['cs']
1592 cs = chunkdata['cs']
1600 deltabase = chunkdata['deltabase']
1593 deltabase = chunkdata['deltabase']
1601 delta = chunkdata['delta']
1594 delta = chunkdata['delta']
1602 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1595 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1603
1596
1604 content.append(node)
1597 content.append(node)
1605
1598
1606 link = linkmapper(cs)
1599 link = linkmapper(cs)
1607 if node in self.nodemap:
1600 if node in self.nodemap:
1608 # this can happen if two branches make the same change
1601 # this can happen if two branches make the same change
1609 chain = node
1602 chain = node
1610 continue
1603 continue
1611
1604
1612 for p in (p1, p2):
1605 for p in (p1, p2):
1613 if p not in self.nodemap:
1606 if p not in self.nodemap:
1614 raise LookupError(p, self.indexfile,
1607 raise LookupError(p, self.indexfile,
1615 _('unknown parent'))
1608 _('unknown parent'))
1616
1609
1617 if deltabase not in self.nodemap:
1610 if deltabase not in self.nodemap:
1618 raise LookupError(deltabase, self.indexfile,
1611 raise LookupError(deltabase, self.indexfile,
1619 _('unknown delta base'))
1612 _('unknown delta base'))
1620
1613
1621 baserev = self.rev(deltabase)
1614 baserev = self.rev(deltabase)
1622
1615
1623 if baserev != nullrev and self.iscensored(baserev):
1616 if baserev != nullrev and self.iscensored(baserev):
1624 # if base is censored, delta must be full replacement in a
1617 # if base is censored, delta must be full replacement in a
1625 # single patch operation
1618 # single patch operation
1626 hlen = struct.calcsize(">lll")
1619 hlen = struct.calcsize(">lll")
1627 oldlen = self.rawsize(baserev)
1620 oldlen = self.rawsize(baserev)
1628 newlen = len(delta) - hlen
1621 newlen = len(delta) - hlen
1629 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1622 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1630 raise error.CensoredBaseError(self.indexfile,
1623 raise error.CensoredBaseError(self.indexfile,
1631 self.node(baserev))
1624 self.node(baserev))
1632
1625
1633 if not flags and self._peek_iscensored(baserev, delta, flush):
1626 if not flags and self._peek_iscensored(baserev, delta, flush):
1634 flags |= REVIDX_ISCENSORED
1627 flags |= REVIDX_ISCENSORED
1635
1628
1636 # We assume consumers of addrevisioncb will want to retrieve
1629 # We assume consumers of addrevisioncb will want to retrieve
1637 # the added revision, which will require a call to
1630 # the added revision, which will require a call to
1638 # revision(). revision() will fast path if there is a cache
1631 # revision(). revision() will fast path if there is a cache
1639 # hit. So, we tell _addrevision() to always cache in this case.
1632 # hit. So, we tell _addrevision() to always cache in this case.
1640 chain = self._addrevision(node, None, transaction, link,
1633 chain = self._addrevision(node, None, transaction, link,
1641 p1, p2, flags, (baserev, delta),
1634 p1, p2, flags, (baserev, delta),
1642 ifh, dfh,
1635 ifh, dfh,
1643 alwayscache=bool(addrevisioncb))
1636 alwayscache=bool(addrevisioncb))
1644
1637
1645 if addrevisioncb:
1638 if addrevisioncb:
1646 addrevisioncb(self, chain)
1639 addrevisioncb(self, chain)
1647
1640
1648 if not dfh and not self._inline:
1641 if not dfh and not self._inline:
1649 # addrevision switched from inline to conventional
1642 # addrevision switched from inline to conventional
1650 # reopen the index
1643 # reopen the index
1651 ifh.close()
1644 ifh.close()
1652 dfh = self.opener(self.datafile, "a+")
1645 dfh = self.opener(self.datafile, "a+")
1653 ifh = self.opener(self.indexfile, "a+")
1646 ifh = self.opener(self.indexfile, "a+")
1654 finally:
1647 finally:
1655 if dfh:
1648 if dfh:
1656 dfh.close()
1649 dfh.close()
1657 ifh.close()
1650 ifh.close()
1658
1651
1659 return content
1652 return content
1660
1653
1661 def iscensored(self, rev):
1654 def iscensored(self, rev):
1662 """Check if a file revision is censored."""
1655 """Check if a file revision is censored."""
1663 return False
1656 return False
1664
1657
1665 def _peek_iscensored(self, baserev, delta, flush):
1658 def _peek_iscensored(self, baserev, delta, flush):
1666 """Quickly check if a delta produces a censored revision."""
1659 """Quickly check if a delta produces a censored revision."""
1667 return False
1660 return False
1668
1661
1669 def getstrippoint(self, minlink):
1662 def getstrippoint(self, minlink):
1670 """find the minimum rev that must be stripped to strip the linkrev
1663 """find the minimum rev that must be stripped to strip the linkrev
1671
1664
1672 Returns a tuple containing the minimum rev and a set of all revs that
1665 Returns a tuple containing the minimum rev and a set of all revs that
1673 have linkrevs that will be broken by this strip.
1666 have linkrevs that will be broken by this strip.
1674 """
1667 """
1675 brokenrevs = set()
1668 brokenrevs = set()
1676 strippoint = len(self)
1669 strippoint = len(self)
1677
1670
1678 heads = {}
1671 heads = {}
1679 futurelargelinkrevs = set()
1672 futurelargelinkrevs = set()
1680 for head in self.headrevs():
1673 for head in self.headrevs():
1681 headlinkrev = self.linkrev(head)
1674 headlinkrev = self.linkrev(head)
1682 heads[head] = headlinkrev
1675 heads[head] = headlinkrev
1683 if headlinkrev >= minlink:
1676 if headlinkrev >= minlink:
1684 futurelargelinkrevs.add(headlinkrev)
1677 futurelargelinkrevs.add(headlinkrev)
1685
1678
1686 # This algorithm involves walking down the rev graph, starting at the
1679 # This algorithm involves walking down the rev graph, starting at the
1687 # heads. Since the revs are topologically sorted according to linkrev,
1680 # heads. Since the revs are topologically sorted according to linkrev,
1688 # once all head linkrevs are below the minlink, we know there are
1681 # once all head linkrevs are below the minlink, we know there are
1689 # no more revs that could have a linkrev greater than minlink.
1682 # no more revs that could have a linkrev greater than minlink.
1690 # So we can stop walking.
1683 # So we can stop walking.
1691 while futurelargelinkrevs:
1684 while futurelargelinkrevs:
1692 strippoint -= 1
1685 strippoint -= 1
1693 linkrev = heads.pop(strippoint)
1686 linkrev = heads.pop(strippoint)
1694
1687
1695 if linkrev < minlink:
1688 if linkrev < minlink:
1696 brokenrevs.add(strippoint)
1689 brokenrevs.add(strippoint)
1697 else:
1690 else:
1698 futurelargelinkrevs.remove(linkrev)
1691 futurelargelinkrevs.remove(linkrev)
1699
1692
1700 for p in self.parentrevs(strippoint):
1693 for p in self.parentrevs(strippoint):
1701 if p != nullrev:
1694 if p != nullrev:
1702 plinkrev = self.linkrev(p)
1695 plinkrev = self.linkrev(p)
1703 heads[p] = plinkrev
1696 heads[p] = plinkrev
1704 if plinkrev >= minlink:
1697 if plinkrev >= minlink:
1705 futurelargelinkrevs.add(plinkrev)
1698 futurelargelinkrevs.add(plinkrev)
1706
1699
1707 return strippoint, brokenrevs
1700 return strippoint, brokenrevs
1708
1701
1709 def strip(self, minlink, transaction):
1702 def strip(self, minlink, transaction):
1710 """truncate the revlog on the first revision with a linkrev >= minlink
1703 """truncate the revlog on the first revision with a linkrev >= minlink
1711
1704
1712 This function is called when we're stripping revision minlink and
1705 This function is called when we're stripping revision minlink and
1713 its descendants from the repository.
1706 its descendants from the repository.
1714
1707
1715 We have to remove all revisions with linkrev >= minlink, because
1708 We have to remove all revisions with linkrev >= minlink, because
1716 the equivalent changelog revisions will be renumbered after the
1709 the equivalent changelog revisions will be renumbered after the
1717 strip.
1710 strip.
1718
1711
1719 So we truncate the revlog on the first of these revisions, and
1712 So we truncate the revlog on the first of these revisions, and
1720 trust that the caller has saved the revisions that shouldn't be
1713 trust that the caller has saved the revisions that shouldn't be
1721 removed and that it'll re-add them after this truncation.
1714 removed and that it'll re-add them after this truncation.
1722 """
1715 """
1723 if len(self) == 0:
1716 if len(self) == 0:
1724 return
1717 return
1725
1718
1726 rev, _ = self.getstrippoint(minlink)
1719 rev, _ = self.getstrippoint(minlink)
1727 if rev == len(self):
1720 if rev == len(self):
1728 return
1721 return
1729
1722
1730 # first truncate the files on disk
1723 # first truncate the files on disk
1731 end = self.start(rev)
1724 end = self.start(rev)
1732 if not self._inline:
1725 if not self._inline:
1733 transaction.add(self.datafile, end)
1726 transaction.add(self.datafile, end)
1734 end = rev * self._io.size
1727 end = rev * self._io.size
1735 else:
1728 else:
1736 end += rev * self._io.size
1729 end += rev * self._io.size
1737
1730
1738 transaction.add(self.indexfile, end)
1731 transaction.add(self.indexfile, end)
1739
1732
1740 # then reset internal state in memory to forget those revisions
1733 # then reset internal state in memory to forget those revisions
1741 self._cache = None
1734 self._cache = None
1742 self._chaininfocache = {}
1735 self._chaininfocache = {}
1743 self._chunkclear()
1736 self._chunkclear()
1744 for x in xrange(rev, len(self)):
1737 for x in xrange(rev, len(self)):
1745 del self.nodemap[self.node(x)]
1738 del self.nodemap[self.node(x)]
1746
1739
1747 del self.index[rev:-1]
1740 del self.index[rev:-1]
1748
1741
1749 def checksize(self):
1742 def checksize(self):
1750 expected = 0
1743 expected = 0
1751 if len(self):
1744 if len(self):
1752 expected = max(0, self.end(len(self) - 1))
1745 expected = max(0, self.end(len(self) - 1))
1753
1746
1754 try:
1747 try:
1755 f = self.opener(self.datafile)
1748 f = self.opener(self.datafile)
1756 f.seek(0, 2)
1749 f.seek(0, 2)
1757 actual = f.tell()
1750 actual = f.tell()
1758 f.close()
1751 f.close()
1759 dd = actual - expected
1752 dd = actual - expected
1760 except IOError as inst:
1753 except IOError as inst:
1761 if inst.errno != errno.ENOENT:
1754 if inst.errno != errno.ENOENT:
1762 raise
1755 raise
1763 dd = 0
1756 dd = 0
1764
1757
1765 try:
1758 try:
1766 f = self.opener(self.indexfile)
1759 f = self.opener(self.indexfile)
1767 f.seek(0, 2)
1760 f.seek(0, 2)
1768 actual = f.tell()
1761 actual = f.tell()
1769 f.close()
1762 f.close()
1770 s = self._io.size
1763 s = self._io.size
1771 i = max(0, actual // s)
1764 i = max(0, actual // s)
1772 di = actual - (i * s)
1765 di = actual - (i * s)
1773 if self._inline:
1766 if self._inline:
1774 databytes = 0
1767 databytes = 0
1775 for r in self:
1768 for r in self:
1776 databytes += max(0, self.length(r))
1769 databytes += max(0, self.length(r))
1777 dd = 0
1770 dd = 0
1778 di = actual - len(self) * s - databytes
1771 di = actual - len(self) * s - databytes
1779 except IOError as inst:
1772 except IOError as inst:
1780 if inst.errno != errno.ENOENT:
1773 if inst.errno != errno.ENOENT:
1781 raise
1774 raise
1782 di = 0
1775 di = 0
1783
1776
1784 return (dd, di)
1777 return (dd, di)
1785
1778
1786 def files(self):
1779 def files(self):
1787 res = [self.indexfile]
1780 res = [self.indexfile]
1788 if not self._inline:
1781 if not self._inline:
1789 res.append(self.datafile)
1782 res.append(self.datafile)
1790 return res
1783 return res
General Comments 0
You need to be logged in to leave comments. Login now