##// END OF EJS Templates
revlog: avoid string slice when decompressing u* chunks...
Gregory Szorc -
r27475:a2e2a8fa default
parent child Browse files
Show More
@@ -1,1786 +1,1786 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import struct
19 import struct
20 import zlib
20 import zlib
21
21
22 # import stuff from node for others to import from revlog
22 # import stuff from node for others to import from revlog
23 from .node import (
23 from .node import (
24 bin,
24 bin,
25 hex,
25 hex,
26 nullid,
26 nullid,
27 nullrev,
27 nullrev,
28 )
28 )
29 from .i18n import _
29 from .i18n import _
30 from . import (
30 from . import (
31 ancestor,
31 ancestor,
32 error,
32 error,
33 mdiff,
33 mdiff,
34 parsers,
34 parsers,
35 templatefilters,
35 templatefilters,
36 util,
36 util,
37 )
37 )
38
38
39 _pack = struct.pack
39 _pack = struct.pack
40 _unpack = struct.unpack
40 _unpack = struct.unpack
41 _compress = zlib.compress
41 _compress = zlib.compress
42 _decompress = zlib.decompress
42 _decompress = zlib.decompress
43 _sha = util.sha1
43 _sha = util.sha1
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = _sha(nullid)
77 _nullhash = _sha(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = _sha(l[0])
95 s = _sha(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return bin[1:]
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
229 # begins at for a revision.
230 self._basecache = None
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
232 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
233 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
234 self._chunkcachesize = 65536
235 self._maxchainlen = None
235 self._maxchainlen = None
236 self._aggressivemergedeltas = False
236 self._aggressivemergedeltas = False
237 self.index = []
237 self.index = []
238 # Mapping of partial identifiers to full nodes.
238 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
239 self._pcache = {}
240 # Mapping of revision integer to full node.
240 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
241 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
242 self._nodepos = None
243
243
244 v = REVLOG_DEFAULT_VERSION
244 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 if 'revlogv1' in opts:
247 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
248 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
249 v |= REVLOGGENERALDELTA
250 else:
250 else:
251 v = 0
251 v = 0
252 if 'chunkcachesize' in opts:
252 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
253 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
254 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
255 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
256 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
259
260 if self._chunkcachesize <= 0:
260 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
262 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
265 'of 2') % self._chunkcachesize)
266
266
267 indexdata = ''
267 indexdata = ''
268 self._initempty = True
268 self._initempty = True
269 try:
269 try:
270 f = self.opener(self.indexfile)
270 f = self.opener(self.indexfile)
271 indexdata = f.read()
271 indexdata = f.read()
272 f.close()
272 f.close()
273 if len(indexdata) > 0:
273 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
275 self._initempty = False
276 except IOError as inst:
276 except IOError as inst:
277 if inst.errno != errno.ENOENT:
277 if inst.errno != errno.ENOENT:
278 raise
278 raise
279
279
280 self.version = v
280 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
281 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
282 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
283 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
284 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
285 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
287 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
290 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
291 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
292 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
293 % (self.indexfile, fmt))
294
294
295 self._io = revlogio()
295 self._io = revlogio()
296 if self.version == REVLOGV0:
296 if self.version == REVLOGV0:
297 self._io = revlogoldio()
297 self._io = revlogoldio()
298 try:
298 try:
299 d = self._io.parseindex(indexdata, self._inline)
299 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
300 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
302 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
303 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
304 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
305 if not self._chunkcache:
306 self._chunkclear()
306 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
307 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
308 self._chaininfocache = {}
309
309
310 def tip(self):
310 def tip(self):
311 return self.node(len(self.index) - 2)
311 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
312 def __contains__(self, rev):
313 return 0 <= rev < len(self)
313 return 0 <= rev < len(self)
314 def __len__(self):
314 def __len__(self):
315 return len(self.index) - 1
315 return len(self.index) - 1
316 def __iter__(self):
316 def __iter__(self):
317 return iter(xrange(len(self)))
317 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
318 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
319 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
320 step = 1
321 if stop is not None:
321 if stop is not None:
322 if start > stop:
322 if start > stop:
323 step = -1
323 step = -1
324 stop += step
324 stop += step
325 else:
325 else:
326 stop = len(self)
326 stop = len(self)
327 return xrange(start, stop, step)
327 return xrange(start, stop, step)
328
328
329 @util.propertycache
329 @util.propertycache
330 def nodemap(self):
330 def nodemap(self):
331 self.rev(self.node(0))
331 self.rev(self.node(0))
332 return self._nodecache
332 return self._nodecache
333
333
334 def hasnode(self, node):
334 def hasnode(self, node):
335 try:
335 try:
336 self.rev(node)
336 self.rev(node)
337 return True
337 return True
338 except KeyError:
338 except KeyError:
339 return False
339 return False
340
340
341 def clearcaches(self):
341 def clearcaches(self):
342 self._cache = None
342 self._cache = None
343 self._basecache = None
343 self._basecache = None
344 self._chunkcache = (0, '')
344 self._chunkcache = (0, '')
345 self._pcache = {}
345 self._pcache = {}
346
346
347 try:
347 try:
348 self._nodecache.clearcaches()
348 self._nodecache.clearcaches()
349 except AttributeError:
349 except AttributeError:
350 self._nodecache = {nullid: nullrev}
350 self._nodecache = {nullid: nullrev}
351 self._nodepos = None
351 self._nodepos = None
352
352
353 def rev(self, node):
353 def rev(self, node):
354 try:
354 try:
355 return self._nodecache[node]
355 return self._nodecache[node]
356 except TypeError:
356 except TypeError:
357 raise
357 raise
358 except RevlogError:
358 except RevlogError:
359 # parsers.c radix tree lookup failed
359 # parsers.c radix tree lookup failed
360 raise LookupError(node, self.indexfile, _('no node'))
360 raise LookupError(node, self.indexfile, _('no node'))
361 except KeyError:
361 except KeyError:
362 # pure python cache lookup failed
362 # pure python cache lookup failed
363 n = self._nodecache
363 n = self._nodecache
364 i = self.index
364 i = self.index
365 p = self._nodepos
365 p = self._nodepos
366 if p is None:
366 if p is None:
367 p = len(i) - 2
367 p = len(i) - 2
368 for r in xrange(p, -1, -1):
368 for r in xrange(p, -1, -1):
369 v = i[r][7]
369 v = i[r][7]
370 n[v] = r
370 n[v] = r
371 if v == node:
371 if v == node:
372 self._nodepos = r - 1
372 self._nodepos = r - 1
373 return r
373 return r
374 raise LookupError(node, self.indexfile, _('no node'))
374 raise LookupError(node, self.indexfile, _('no node'))
375
375
376 def node(self, rev):
376 def node(self, rev):
377 return self.index[rev][7]
377 return self.index[rev][7]
378 def linkrev(self, rev):
378 def linkrev(self, rev):
379 return self.index[rev][4]
379 return self.index[rev][4]
380 def parents(self, node):
380 def parents(self, node):
381 i = self.index
381 i = self.index
382 d = i[self.rev(node)]
382 d = i[self.rev(node)]
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 def parentrevs(self, rev):
384 def parentrevs(self, rev):
385 return self.index[rev][5:7]
385 return self.index[rev][5:7]
386 def start(self, rev):
386 def start(self, rev):
387 return int(self.index[rev][0] >> 16)
387 return int(self.index[rev][0] >> 16)
388 def end(self, rev):
388 def end(self, rev):
389 return self.start(rev) + self.length(rev)
389 return self.start(rev) + self.length(rev)
390 def length(self, rev):
390 def length(self, rev):
391 return self.index[rev][1]
391 return self.index[rev][1]
392 def chainbase(self, rev):
392 def chainbase(self, rev):
393 index = self.index
393 index = self.index
394 base = index[rev][3]
394 base = index[rev][3]
395 while base != rev:
395 while base != rev:
396 rev = base
396 rev = base
397 base = index[rev][3]
397 base = index[rev][3]
398 return base
398 return base
399 def chainlen(self, rev):
399 def chainlen(self, rev):
400 return self._chaininfo(rev)[0]
400 return self._chaininfo(rev)[0]
401
401
402 def _chaininfo(self, rev):
402 def _chaininfo(self, rev):
403 chaininfocache = self._chaininfocache
403 chaininfocache = self._chaininfocache
404 if rev in chaininfocache:
404 if rev in chaininfocache:
405 return chaininfocache[rev]
405 return chaininfocache[rev]
406 index = self.index
406 index = self.index
407 generaldelta = self._generaldelta
407 generaldelta = self._generaldelta
408 iterrev = rev
408 iterrev = rev
409 e = index[iterrev]
409 e = index[iterrev]
410 clen = 0
410 clen = 0
411 compresseddeltalen = 0
411 compresseddeltalen = 0
412 while iterrev != e[3]:
412 while iterrev != e[3]:
413 clen += 1
413 clen += 1
414 compresseddeltalen += e[1]
414 compresseddeltalen += e[1]
415 if generaldelta:
415 if generaldelta:
416 iterrev = e[3]
416 iterrev = e[3]
417 else:
417 else:
418 iterrev -= 1
418 iterrev -= 1
419 if iterrev in chaininfocache:
419 if iterrev in chaininfocache:
420 t = chaininfocache[iterrev]
420 t = chaininfocache[iterrev]
421 clen += t[0]
421 clen += t[0]
422 compresseddeltalen += t[1]
422 compresseddeltalen += t[1]
423 break
423 break
424 e = index[iterrev]
424 e = index[iterrev]
425 else:
425 else:
426 # Add text length of base since decompressing that also takes
426 # Add text length of base since decompressing that also takes
427 # work. For cache hits the length is already included.
427 # work. For cache hits the length is already included.
428 compresseddeltalen += e[1]
428 compresseddeltalen += e[1]
429 r = (clen, compresseddeltalen)
429 r = (clen, compresseddeltalen)
430 chaininfocache[rev] = r
430 chaininfocache[rev] = r
431 return r
431 return r
432
432
433 def _deltachain(self, rev, stoprev=None):
433 def _deltachain(self, rev, stoprev=None):
434 """Obtain the delta chain for a revision.
434 """Obtain the delta chain for a revision.
435
435
436 ``stoprev`` specifies a revision to stop at. If not specified, we
436 ``stoprev`` specifies a revision to stop at. If not specified, we
437 stop at the base of the chain.
437 stop at the base of the chain.
438
438
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 revs in ascending order and ``stopped`` is a bool indicating whether
440 revs in ascending order and ``stopped`` is a bool indicating whether
441 ``stoprev`` was hit.
441 ``stoprev`` was hit.
442 """
442 """
443 chain = []
443 chain = []
444
444
445 # Alias to prevent attribute lookup in tight loop.
445 # Alias to prevent attribute lookup in tight loop.
446 index = self.index
446 index = self.index
447 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
448
448
449 iterrev = rev
449 iterrev = rev
450 e = index[iterrev]
450 e = index[iterrev]
451 while iterrev != e[3] and iterrev != stoprev:
451 while iterrev != e[3] and iterrev != stoprev:
452 chain.append(iterrev)
452 chain.append(iterrev)
453 if generaldelta:
453 if generaldelta:
454 iterrev = e[3]
454 iterrev = e[3]
455 else:
455 else:
456 iterrev -= 1
456 iterrev -= 1
457 e = index[iterrev]
457 e = index[iterrev]
458
458
459 if iterrev == stoprev:
459 if iterrev == stoprev:
460 stopped = True
460 stopped = True
461 else:
461 else:
462 chain.append(iterrev)
462 chain.append(iterrev)
463 stopped = False
463 stopped = False
464
464
465 chain.reverse()
465 chain.reverse()
466 return chain, stopped
466 return chain, stopped
467
467
468 def flags(self, rev):
468 def flags(self, rev):
469 return self.index[rev][0] & 0xFFFF
469 return self.index[rev][0] & 0xFFFF
470 def rawsize(self, rev):
470 def rawsize(self, rev):
471 """return the length of the uncompressed text for a given revision"""
471 """return the length of the uncompressed text for a given revision"""
472 l = self.index[rev][2]
472 l = self.index[rev][2]
473 if l >= 0:
473 if l >= 0:
474 return l
474 return l
475
475
476 t = self.revision(self.node(rev))
476 t = self.revision(self.node(rev))
477 return len(t)
477 return len(t)
478 size = rawsize
478 size = rawsize
479
479
480 def ancestors(self, revs, stoprev=0, inclusive=False):
480 def ancestors(self, revs, stoprev=0, inclusive=False):
481 """Generate the ancestors of 'revs' in reverse topological order.
481 """Generate the ancestors of 'revs' in reverse topological order.
482 Does not generate revs lower than stoprev.
482 Does not generate revs lower than stoprev.
483
483
484 See the documentation for ancestor.lazyancestors for more details."""
484 See the documentation for ancestor.lazyancestors for more details."""
485
485
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 inclusive=inclusive)
487 inclusive=inclusive)
488
488
489 def descendants(self, revs):
489 def descendants(self, revs):
490 """Generate the descendants of 'revs' in revision order.
490 """Generate the descendants of 'revs' in revision order.
491
491
492 Yield a sequence of revision numbers starting with a child of
492 Yield a sequence of revision numbers starting with a child of
493 some rev in revs, i.e., each revision is *not* considered a
493 some rev in revs, i.e., each revision is *not* considered a
494 descendant of itself. Results are ordered by revision number (a
494 descendant of itself. Results are ordered by revision number (a
495 topological sort)."""
495 topological sort)."""
496 first = min(revs)
496 first = min(revs)
497 if first == nullrev:
497 if first == nullrev:
498 for i in self:
498 for i in self:
499 yield i
499 yield i
500 return
500 return
501
501
502 seen = set(revs)
502 seen = set(revs)
503 for i in self.revs(start=first + 1):
503 for i in self.revs(start=first + 1):
504 for x in self.parentrevs(i):
504 for x in self.parentrevs(i):
505 if x != nullrev and x in seen:
505 if x != nullrev and x in seen:
506 seen.add(i)
506 seen.add(i)
507 yield i
507 yield i
508 break
508 break
509
509
510 def findcommonmissing(self, common=None, heads=None):
510 def findcommonmissing(self, common=None, heads=None):
511 """Return a tuple of the ancestors of common and the ancestors of heads
511 """Return a tuple of the ancestors of common and the ancestors of heads
512 that are not ancestors of common. In revset terminology, we return the
512 that are not ancestors of common. In revset terminology, we return the
513 tuple:
513 tuple:
514
514
515 ::common, (::heads) - (::common)
515 ::common, (::heads) - (::common)
516
516
517 The list is sorted by revision number, meaning it is
517 The list is sorted by revision number, meaning it is
518 topologically sorted.
518 topologically sorted.
519
519
520 'heads' and 'common' are both lists of node IDs. If heads is
520 'heads' and 'common' are both lists of node IDs. If heads is
521 not supplied, uses all of the revlog's heads. If common is not
521 not supplied, uses all of the revlog's heads. If common is not
522 supplied, uses nullid."""
522 supplied, uses nullid."""
523 if common is None:
523 if common is None:
524 common = [nullid]
524 common = [nullid]
525 if heads is None:
525 if heads is None:
526 heads = self.heads()
526 heads = self.heads()
527
527
528 common = [self.rev(n) for n in common]
528 common = [self.rev(n) for n in common]
529 heads = [self.rev(n) for n in heads]
529 heads = [self.rev(n) for n in heads]
530
530
531 # we want the ancestors, but inclusive
531 # we want the ancestors, but inclusive
532 class lazyset(object):
532 class lazyset(object):
533 def __init__(self, lazyvalues):
533 def __init__(self, lazyvalues):
534 self.addedvalues = set()
534 self.addedvalues = set()
535 self.lazyvalues = lazyvalues
535 self.lazyvalues = lazyvalues
536
536
537 def __contains__(self, value):
537 def __contains__(self, value):
538 return value in self.addedvalues or value in self.lazyvalues
538 return value in self.addedvalues or value in self.lazyvalues
539
539
540 def __iter__(self):
540 def __iter__(self):
541 added = self.addedvalues
541 added = self.addedvalues
542 for r in added:
542 for r in added:
543 yield r
543 yield r
544 for r in self.lazyvalues:
544 for r in self.lazyvalues:
545 if not r in added:
545 if not r in added:
546 yield r
546 yield r
547
547
548 def add(self, value):
548 def add(self, value):
549 self.addedvalues.add(value)
549 self.addedvalues.add(value)
550
550
551 def update(self, values):
551 def update(self, values):
552 self.addedvalues.update(values)
552 self.addedvalues.update(values)
553
553
554 has = lazyset(self.ancestors(common))
554 has = lazyset(self.ancestors(common))
555 has.add(nullrev)
555 has.add(nullrev)
556 has.update(common)
556 has.update(common)
557
557
558 # take all ancestors from heads that aren't in has
558 # take all ancestors from heads that aren't in has
559 missing = set()
559 missing = set()
560 visit = collections.deque(r for r in heads if r not in has)
560 visit = collections.deque(r for r in heads if r not in has)
561 while visit:
561 while visit:
562 r = visit.popleft()
562 r = visit.popleft()
563 if r in missing:
563 if r in missing:
564 continue
564 continue
565 else:
565 else:
566 missing.add(r)
566 missing.add(r)
567 for p in self.parentrevs(r):
567 for p in self.parentrevs(r):
568 if p not in has:
568 if p not in has:
569 visit.append(p)
569 visit.append(p)
570 missing = list(missing)
570 missing = list(missing)
571 missing.sort()
571 missing.sort()
572 return has, [self.node(r) for r in missing]
572 return has, [self.node(r) for r in missing]
573
573
574 def incrementalmissingrevs(self, common=None):
574 def incrementalmissingrevs(self, common=None):
575 """Return an object that can be used to incrementally compute the
575 """Return an object that can be used to incrementally compute the
576 revision numbers of the ancestors of arbitrary sets that are not
576 revision numbers of the ancestors of arbitrary sets that are not
577 ancestors of common. This is an ancestor.incrementalmissingancestors
577 ancestors of common. This is an ancestor.incrementalmissingancestors
578 object.
578 object.
579
579
580 'common' is a list of revision numbers. If common is not supplied, uses
580 'common' is a list of revision numbers. If common is not supplied, uses
581 nullrev.
581 nullrev.
582 """
582 """
583 if common is None:
583 if common is None:
584 common = [nullrev]
584 common = [nullrev]
585
585
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587
587
588 def findmissingrevs(self, common=None, heads=None):
588 def findmissingrevs(self, common=None, heads=None):
589 """Return the revision numbers of the ancestors of heads that
589 """Return the revision numbers of the ancestors of heads that
590 are not ancestors of common.
590 are not ancestors of common.
591
591
592 More specifically, return a list of revision numbers corresponding to
592 More specifically, return a list of revision numbers corresponding to
593 nodes N such that every N satisfies the following constraints:
593 nodes N such that every N satisfies the following constraints:
594
594
595 1. N is an ancestor of some node in 'heads'
595 1. N is an ancestor of some node in 'heads'
596 2. N is not an ancestor of any node in 'common'
596 2. N is not an ancestor of any node in 'common'
597
597
598 The list is sorted by revision number, meaning it is
598 The list is sorted by revision number, meaning it is
599 topologically sorted.
599 topologically sorted.
600
600
601 'heads' and 'common' are both lists of revision numbers. If heads is
601 'heads' and 'common' are both lists of revision numbers. If heads is
602 not supplied, uses all of the revlog's heads. If common is not
602 not supplied, uses all of the revlog's heads. If common is not
603 supplied, uses nullid."""
603 supplied, uses nullid."""
604 if common is None:
604 if common is None:
605 common = [nullrev]
605 common = [nullrev]
606 if heads is None:
606 if heads is None:
607 heads = self.headrevs()
607 heads = self.headrevs()
608
608
609 inc = self.incrementalmissingrevs(common=common)
609 inc = self.incrementalmissingrevs(common=common)
610 return inc.missingancestors(heads)
610 return inc.missingancestors(heads)
611
611
612 def findmissing(self, common=None, heads=None):
612 def findmissing(self, common=None, heads=None):
613 """Return the ancestors of heads that are not ancestors of common.
613 """Return the ancestors of heads that are not ancestors of common.
614
614
615 More specifically, return a list of nodes N such that every N
615 More specifically, return a list of nodes N such that every N
616 satisfies the following constraints:
616 satisfies the following constraints:
617
617
618 1. N is an ancestor of some node in 'heads'
618 1. N is an ancestor of some node in 'heads'
619 2. N is not an ancestor of any node in 'common'
619 2. N is not an ancestor of any node in 'common'
620
620
621 The list is sorted by revision number, meaning it is
621 The list is sorted by revision number, meaning it is
622 topologically sorted.
622 topologically sorted.
623
623
624 'heads' and 'common' are both lists of node IDs. If heads is
624 'heads' and 'common' are both lists of node IDs. If heads is
625 not supplied, uses all of the revlog's heads. If common is not
625 not supplied, uses all of the revlog's heads. If common is not
626 supplied, uses nullid."""
626 supplied, uses nullid."""
627 if common is None:
627 if common is None:
628 common = [nullid]
628 common = [nullid]
629 if heads is None:
629 if heads is None:
630 heads = self.heads()
630 heads = self.heads()
631
631
632 common = [self.rev(n) for n in common]
632 common = [self.rev(n) for n in common]
633 heads = [self.rev(n) for n in heads]
633 heads = [self.rev(n) for n in heads]
634
634
635 inc = self.incrementalmissingrevs(common=common)
635 inc = self.incrementalmissingrevs(common=common)
636 return [self.node(r) for r in inc.missingancestors(heads)]
636 return [self.node(r) for r in inc.missingancestors(heads)]
637
637
638 def nodesbetween(self, roots=None, heads=None):
638 def nodesbetween(self, roots=None, heads=None):
639 """Return a topological path from 'roots' to 'heads'.
639 """Return a topological path from 'roots' to 'heads'.
640
640
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 topologically sorted list of all nodes N that satisfy both of
642 topologically sorted list of all nodes N that satisfy both of
643 these constraints:
643 these constraints:
644
644
645 1. N is a descendant of some node in 'roots'
645 1. N is a descendant of some node in 'roots'
646 2. N is an ancestor of some node in 'heads'
646 2. N is an ancestor of some node in 'heads'
647
647
648 Every node is considered to be both a descendant and an ancestor
648 Every node is considered to be both a descendant and an ancestor
649 of itself, so every reachable node in 'roots' and 'heads' will be
649 of itself, so every reachable node in 'roots' and 'heads' will be
650 included in 'nodes'.
650 included in 'nodes'.
651
651
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 subset of 'roots' that is returned in 'nodes'. Likewise,
653 subset of 'roots' that is returned in 'nodes'. Likewise,
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
655
655
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 unspecified, uses nullid as the only root. If 'heads' is
657 unspecified, uses nullid as the only root. If 'heads' is
658 unspecified, uses list of all of the revlog's heads."""
658 unspecified, uses list of all of the revlog's heads."""
659 nonodes = ([], [], [])
659 nonodes = ([], [], [])
660 if roots is not None:
660 if roots is not None:
661 roots = list(roots)
661 roots = list(roots)
662 if not roots:
662 if not roots:
663 return nonodes
663 return nonodes
664 lowestrev = min([self.rev(n) for n in roots])
664 lowestrev = min([self.rev(n) for n in roots])
665 else:
665 else:
666 roots = [nullid] # Everybody's a descendant of nullid
666 roots = [nullid] # Everybody's a descendant of nullid
667 lowestrev = nullrev
667 lowestrev = nullrev
668 if (lowestrev == nullrev) and (heads is None):
668 if (lowestrev == nullrev) and (heads is None):
669 # We want _all_ the nodes!
669 # We want _all_ the nodes!
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 if heads is None:
671 if heads is None:
672 # All nodes are ancestors, so the latest ancestor is the last
672 # All nodes are ancestors, so the latest ancestor is the last
673 # node.
673 # node.
674 highestrev = len(self) - 1
674 highestrev = len(self) - 1
675 # Set ancestors to None to signal that every node is an ancestor.
675 # Set ancestors to None to signal that every node is an ancestor.
676 ancestors = None
676 ancestors = None
677 # Set heads to an empty dictionary for later discovery of heads
677 # Set heads to an empty dictionary for later discovery of heads
678 heads = {}
678 heads = {}
679 else:
679 else:
680 heads = list(heads)
680 heads = list(heads)
681 if not heads:
681 if not heads:
682 return nonodes
682 return nonodes
683 ancestors = set()
683 ancestors = set()
684 # Turn heads into a dictionary so we can remove 'fake' heads.
684 # Turn heads into a dictionary so we can remove 'fake' heads.
685 # Also, later we will be using it to filter out the heads we can't
685 # Also, later we will be using it to filter out the heads we can't
686 # find from roots.
686 # find from roots.
687 heads = dict.fromkeys(heads, False)
687 heads = dict.fromkeys(heads, False)
688 # Start at the top and keep marking parents until we're done.
688 # Start at the top and keep marking parents until we're done.
689 nodestotag = set(heads)
689 nodestotag = set(heads)
690 # Remember where the top was so we can use it as a limit later.
690 # Remember where the top was so we can use it as a limit later.
691 highestrev = max([self.rev(n) for n in nodestotag])
691 highestrev = max([self.rev(n) for n in nodestotag])
692 while nodestotag:
692 while nodestotag:
693 # grab a node to tag
693 # grab a node to tag
694 n = nodestotag.pop()
694 n = nodestotag.pop()
695 # Never tag nullid
695 # Never tag nullid
696 if n == nullid:
696 if n == nullid:
697 continue
697 continue
698 # A node's revision number represents its place in a
698 # A node's revision number represents its place in a
699 # topologically sorted list of nodes.
699 # topologically sorted list of nodes.
700 r = self.rev(n)
700 r = self.rev(n)
701 if r >= lowestrev:
701 if r >= lowestrev:
702 if n not in ancestors:
702 if n not in ancestors:
703 # If we are possibly a descendant of one of the roots
703 # If we are possibly a descendant of one of the roots
704 # and we haven't already been marked as an ancestor
704 # and we haven't already been marked as an ancestor
705 ancestors.add(n) # Mark as ancestor
705 ancestors.add(n) # Mark as ancestor
706 # Add non-nullid parents to list of nodes to tag.
706 # Add non-nullid parents to list of nodes to tag.
707 nodestotag.update([p for p in self.parents(n) if
707 nodestotag.update([p for p in self.parents(n) if
708 p != nullid])
708 p != nullid])
709 elif n in heads: # We've seen it before, is it a fake head?
709 elif n in heads: # We've seen it before, is it a fake head?
710 # So it is, real heads should not be the ancestors of
710 # So it is, real heads should not be the ancestors of
711 # any other heads.
711 # any other heads.
712 heads.pop(n)
712 heads.pop(n)
713 if not ancestors:
713 if not ancestors:
714 return nonodes
714 return nonodes
715 # Now that we have our set of ancestors, we want to remove any
715 # Now that we have our set of ancestors, we want to remove any
716 # roots that are not ancestors.
716 # roots that are not ancestors.
717
717
718 # If one of the roots was nullid, everything is included anyway.
718 # If one of the roots was nullid, everything is included anyway.
719 if lowestrev > nullrev:
719 if lowestrev > nullrev:
720 # But, since we weren't, let's recompute the lowest rev to not
720 # But, since we weren't, let's recompute the lowest rev to not
721 # include roots that aren't ancestors.
721 # include roots that aren't ancestors.
722
722
723 # Filter out roots that aren't ancestors of heads
723 # Filter out roots that aren't ancestors of heads
724 roots = [n for n in roots if n in ancestors]
724 roots = [n for n in roots if n in ancestors]
725 # Recompute the lowest revision
725 # Recompute the lowest revision
726 if roots:
726 if roots:
727 lowestrev = min([self.rev(n) for n in roots])
727 lowestrev = min([self.rev(n) for n in roots])
728 else:
728 else:
729 # No more roots? Return empty list
729 # No more roots? Return empty list
730 return nonodes
730 return nonodes
731 else:
731 else:
732 # We are descending from nullid, and don't need to care about
732 # We are descending from nullid, and don't need to care about
733 # any other roots.
733 # any other roots.
734 lowestrev = nullrev
734 lowestrev = nullrev
735 roots = [nullid]
735 roots = [nullid]
736 # Transform our roots list into a set.
736 # Transform our roots list into a set.
737 descendants = set(roots)
737 descendants = set(roots)
738 # Also, keep the original roots so we can filter out roots that aren't
738 # Also, keep the original roots so we can filter out roots that aren't
739 # 'real' roots (i.e. are descended from other roots).
739 # 'real' roots (i.e. are descended from other roots).
740 roots = descendants.copy()
740 roots = descendants.copy()
741 # Our topologically sorted list of output nodes.
741 # Our topologically sorted list of output nodes.
742 orderedout = []
742 orderedout = []
743 # Don't start at nullid since we don't want nullid in our output list,
743 # Don't start at nullid since we don't want nullid in our output list,
744 # and if nullid shows up in descendants, empty parents will look like
744 # and if nullid shows up in descendants, empty parents will look like
745 # they're descendants.
745 # they're descendants.
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 n = self.node(r)
747 n = self.node(r)
748 isdescendant = False
748 isdescendant = False
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 isdescendant = True
750 isdescendant = True
751 elif n in descendants:
751 elif n in descendants:
752 # n is already a descendant
752 # n is already a descendant
753 isdescendant = True
753 isdescendant = True
754 # This check only needs to be done here because all the roots
754 # This check only needs to be done here because all the roots
755 # will start being marked is descendants before the loop.
755 # will start being marked is descendants before the loop.
756 if n in roots:
756 if n in roots:
757 # If n was a root, check if it's a 'real' root.
757 # If n was a root, check if it's a 'real' root.
758 p = tuple(self.parents(n))
758 p = tuple(self.parents(n))
759 # If any of its parents are descendants, it's not a root.
759 # If any of its parents are descendants, it's not a root.
760 if (p[0] in descendants) or (p[1] in descendants):
760 if (p[0] in descendants) or (p[1] in descendants):
761 roots.remove(n)
761 roots.remove(n)
762 else:
762 else:
763 p = tuple(self.parents(n))
763 p = tuple(self.parents(n))
764 # A node is a descendant if either of its parents are
764 # A node is a descendant if either of its parents are
765 # descendants. (We seeded the dependents list with the roots
765 # descendants. (We seeded the dependents list with the roots
766 # up there, remember?)
766 # up there, remember?)
767 if (p[0] in descendants) or (p[1] in descendants):
767 if (p[0] in descendants) or (p[1] in descendants):
768 descendants.add(n)
768 descendants.add(n)
769 isdescendant = True
769 isdescendant = True
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 # Only include nodes that are both descendants and ancestors.
771 # Only include nodes that are both descendants and ancestors.
772 orderedout.append(n)
772 orderedout.append(n)
773 if (ancestors is not None) and (n in heads):
773 if (ancestors is not None) and (n in heads):
774 # We're trying to figure out which heads are reachable
774 # We're trying to figure out which heads are reachable
775 # from roots.
775 # from roots.
776 # Mark this head as having been reached
776 # Mark this head as having been reached
777 heads[n] = True
777 heads[n] = True
778 elif ancestors is None:
778 elif ancestors is None:
779 # Otherwise, we're trying to discover the heads.
779 # Otherwise, we're trying to discover the heads.
780 # Assume this is a head because if it isn't, the next step
780 # Assume this is a head because if it isn't, the next step
781 # will eventually remove it.
781 # will eventually remove it.
782 heads[n] = True
782 heads[n] = True
783 # But, obviously its parents aren't.
783 # But, obviously its parents aren't.
784 for p in self.parents(n):
784 for p in self.parents(n):
785 heads.pop(p, None)
785 heads.pop(p, None)
786 heads = [n for n, flag in heads.iteritems() if flag]
786 heads = [n for n, flag in heads.iteritems() if flag]
787 roots = list(roots)
787 roots = list(roots)
788 assert orderedout
788 assert orderedout
789 assert roots
789 assert roots
790 assert heads
790 assert heads
791 return (orderedout, roots, heads)
791 return (orderedout, roots, heads)
792
792
793 def headrevs(self):
793 def headrevs(self):
794 try:
794 try:
795 return self.index.headrevs()
795 return self.index.headrevs()
796 except AttributeError:
796 except AttributeError:
797 return self._headrevs()
797 return self._headrevs()
798
798
799 def computephases(self, roots):
799 def computephases(self, roots):
800 return self.index.computephasesmapsets(roots)
800 return self.index.computephasesmapsets(roots)
801
801
802 def _headrevs(self):
802 def _headrevs(self):
803 count = len(self)
803 count = len(self)
804 if not count:
804 if not count:
805 return [nullrev]
805 return [nullrev]
806 # we won't iter over filtered rev so nobody is a head at start
806 # we won't iter over filtered rev so nobody is a head at start
807 ishead = [0] * (count + 1)
807 ishead = [0] * (count + 1)
808 index = self.index
808 index = self.index
809 for r in self:
809 for r in self:
810 ishead[r] = 1 # I may be an head
810 ishead[r] = 1 # I may be an head
811 e = index[r]
811 e = index[r]
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 return [r for r, val in enumerate(ishead) if val]
813 return [r for r, val in enumerate(ishead) if val]
814
814
815 def heads(self, start=None, stop=None):
815 def heads(self, start=None, stop=None):
816 """return the list of all nodes that have no children
816 """return the list of all nodes that have no children
817
817
818 if start is specified, only heads that are descendants of
818 if start is specified, only heads that are descendants of
819 start will be returned
819 start will be returned
820 if stop is specified, it will consider all the revs from stop
820 if stop is specified, it will consider all the revs from stop
821 as if they had no children
821 as if they had no children
822 """
822 """
823 if start is None and stop is None:
823 if start is None and stop is None:
824 if not len(self):
824 if not len(self):
825 return [nullid]
825 return [nullid]
826 return [self.node(r) for r in self.headrevs()]
826 return [self.node(r) for r in self.headrevs()]
827
827
828 if start is None:
828 if start is None:
829 start = nullid
829 start = nullid
830 if stop is None:
830 if stop is None:
831 stop = []
831 stop = []
832 stoprevs = set([self.rev(n) for n in stop])
832 stoprevs = set([self.rev(n) for n in stop])
833 startrev = self.rev(start)
833 startrev = self.rev(start)
834 reachable = set((startrev,))
834 reachable = set((startrev,))
835 heads = set((startrev,))
835 heads = set((startrev,))
836
836
837 parentrevs = self.parentrevs
837 parentrevs = self.parentrevs
838 for r in self.revs(start=startrev + 1):
838 for r in self.revs(start=startrev + 1):
839 for p in parentrevs(r):
839 for p in parentrevs(r):
840 if p in reachable:
840 if p in reachable:
841 if r not in stoprevs:
841 if r not in stoprevs:
842 reachable.add(r)
842 reachable.add(r)
843 heads.add(r)
843 heads.add(r)
844 if p in heads and p not in stoprevs:
844 if p in heads and p not in stoprevs:
845 heads.remove(p)
845 heads.remove(p)
846
846
847 return [self.node(r) for r in heads]
847 return [self.node(r) for r in heads]
848
848
849 def children(self, node):
849 def children(self, node):
850 """find the children of a given node"""
850 """find the children of a given node"""
851 c = []
851 c = []
852 p = self.rev(node)
852 p = self.rev(node)
853 for r in self.revs(start=p + 1):
853 for r in self.revs(start=p + 1):
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 if prevs:
855 if prevs:
856 for pr in prevs:
856 for pr in prevs:
857 if pr == p:
857 if pr == p:
858 c.append(self.node(r))
858 c.append(self.node(r))
859 elif p == nullrev:
859 elif p == nullrev:
860 c.append(self.node(r))
860 c.append(self.node(r))
861 return c
861 return c
862
862
863 def descendant(self, start, end):
863 def descendant(self, start, end):
864 if start == nullrev:
864 if start == nullrev:
865 return True
865 return True
866 for i in self.descendants([start]):
866 for i in self.descendants([start]):
867 if i == end:
867 if i == end:
868 return True
868 return True
869 elif i > end:
869 elif i > end:
870 break
870 break
871 return False
871 return False
872
872
873 def commonancestorsheads(self, a, b):
873 def commonancestorsheads(self, a, b):
874 """calculate all the heads of the common ancestors of nodes a and b"""
874 """calculate all the heads of the common ancestors of nodes a and b"""
875 a, b = self.rev(a), self.rev(b)
875 a, b = self.rev(a), self.rev(b)
876 try:
876 try:
877 ancs = self.index.commonancestorsheads(a, b)
877 ancs = self.index.commonancestorsheads(a, b)
878 except (AttributeError, OverflowError): # C implementation failed
878 except (AttributeError, OverflowError): # C implementation failed
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 return map(self.node, ancs)
880 return map(self.node, ancs)
881
881
882 def isancestor(self, a, b):
882 def isancestor(self, a, b):
883 """return True if node a is an ancestor of node b
883 """return True if node a is an ancestor of node b
884
884
885 The implementation of this is trivial but the use of
885 The implementation of this is trivial but the use of
886 commonancestorsheads is not."""
886 commonancestorsheads is not."""
887 return a in self.commonancestorsheads(a, b)
887 return a in self.commonancestorsheads(a, b)
888
888
889 def ancestor(self, a, b):
889 def ancestor(self, a, b):
890 """calculate the "best" common ancestor of nodes a and b"""
890 """calculate the "best" common ancestor of nodes a and b"""
891
891
892 a, b = self.rev(a), self.rev(b)
892 a, b = self.rev(a), self.rev(b)
893 try:
893 try:
894 ancs = self.index.ancestors(a, b)
894 ancs = self.index.ancestors(a, b)
895 except (AttributeError, OverflowError):
895 except (AttributeError, OverflowError):
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 if ancs:
897 if ancs:
898 # choose a consistent winner when there's a tie
898 # choose a consistent winner when there's a tie
899 return min(map(self.node, ancs))
899 return min(map(self.node, ancs))
900 return nullid
900 return nullid
901
901
902 def _match(self, id):
902 def _match(self, id):
903 if isinstance(id, int):
903 if isinstance(id, int):
904 # rev
904 # rev
905 return self.node(id)
905 return self.node(id)
906 if len(id) == 20:
906 if len(id) == 20:
907 # possibly a binary node
907 # possibly a binary node
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 try:
909 try:
910 node = id
910 node = id
911 self.rev(node) # quick search the index
911 self.rev(node) # quick search the index
912 return node
912 return node
913 except LookupError:
913 except LookupError:
914 pass # may be partial hex id
914 pass # may be partial hex id
915 try:
915 try:
916 # str(rev)
916 # str(rev)
917 rev = int(id)
917 rev = int(id)
918 if str(rev) != id:
918 if str(rev) != id:
919 raise ValueError
919 raise ValueError
920 if rev < 0:
920 if rev < 0:
921 rev = len(self) + rev
921 rev = len(self) + rev
922 if rev < 0 or rev >= len(self):
922 if rev < 0 or rev >= len(self):
923 raise ValueError
923 raise ValueError
924 return self.node(rev)
924 return self.node(rev)
925 except (ValueError, OverflowError):
925 except (ValueError, OverflowError):
926 pass
926 pass
927 if len(id) == 40:
927 if len(id) == 40:
928 try:
928 try:
929 # a full hex nodeid?
929 # a full hex nodeid?
930 node = bin(id)
930 node = bin(id)
931 self.rev(node)
931 self.rev(node)
932 return node
932 return node
933 except (TypeError, LookupError):
933 except (TypeError, LookupError):
934 pass
934 pass
935
935
936 def _partialmatch(self, id):
936 def _partialmatch(self, id):
937 try:
937 try:
938 n = self.index.partialmatch(id)
938 n = self.index.partialmatch(id)
939 if n and self.hasnode(n):
939 if n and self.hasnode(n):
940 return n
940 return n
941 return None
941 return None
942 except RevlogError:
942 except RevlogError:
943 # parsers.c radix tree lookup gave multiple matches
943 # parsers.c radix tree lookup gave multiple matches
944 # fall through to slow path that filters hidden revisions
944 # fall through to slow path that filters hidden revisions
945 pass
945 pass
946 except (AttributeError, ValueError):
946 except (AttributeError, ValueError):
947 # we are pure python, or key was too short to search radix tree
947 # we are pure python, or key was too short to search radix tree
948 pass
948 pass
949
949
950 if id in self._pcache:
950 if id in self._pcache:
951 return self._pcache[id]
951 return self._pcache[id]
952
952
953 if len(id) < 40:
953 if len(id) < 40:
954 try:
954 try:
955 # hex(node)[:...]
955 # hex(node)[:...]
956 l = len(id) // 2 # grab an even number of digits
956 l = len(id) // 2 # grab an even number of digits
957 prefix = bin(id[:l * 2])
957 prefix = bin(id[:l * 2])
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
959 nl = [n for n in nl if hex(n).startswith(id) and
959 nl = [n for n in nl if hex(n).startswith(id) and
960 self.hasnode(n)]
960 self.hasnode(n)]
961 if len(nl) > 0:
961 if len(nl) > 0:
962 if len(nl) == 1:
962 if len(nl) == 1:
963 self._pcache[id] = nl[0]
963 self._pcache[id] = nl[0]
964 return nl[0]
964 return nl[0]
965 raise LookupError(id, self.indexfile,
965 raise LookupError(id, self.indexfile,
966 _('ambiguous identifier'))
966 _('ambiguous identifier'))
967 return None
967 return None
968 except TypeError:
968 except TypeError:
969 pass
969 pass
970
970
971 def lookup(self, id):
971 def lookup(self, id):
972 """locate a node based on:
972 """locate a node based on:
973 - revision number or str(revision number)
973 - revision number or str(revision number)
974 - nodeid or subset of hex nodeid
974 - nodeid or subset of hex nodeid
975 """
975 """
976 n = self._match(id)
976 n = self._match(id)
977 if n is not None:
977 if n is not None:
978 return n
978 return n
979 n = self._partialmatch(id)
979 n = self._partialmatch(id)
980 if n:
980 if n:
981 return n
981 return n
982
982
983 raise LookupError(id, self.indexfile, _('no match found'))
983 raise LookupError(id, self.indexfile, _('no match found'))
984
984
985 def cmp(self, node, text):
985 def cmp(self, node, text):
986 """compare text with a given file revision
986 """compare text with a given file revision
987
987
988 returns True if text is different than what is stored.
988 returns True if text is different than what is stored.
989 """
989 """
990 p1, p2 = self.parents(node)
990 p1, p2 = self.parents(node)
991 return hash(text, p1, p2) != node
991 return hash(text, p1, p2) != node
992
992
993 def _addchunk(self, offset, data):
993 def _addchunk(self, offset, data):
994 """Add a segment to the revlog cache.
994 """Add a segment to the revlog cache.
995
995
996 Accepts an absolute offset and the data that is at that location.
996 Accepts an absolute offset and the data that is at that location.
997 """
997 """
998 o, d = self._chunkcache
998 o, d = self._chunkcache
999 # try to add to existing cache
999 # try to add to existing cache
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1001 self._chunkcache = o, d + data
1001 self._chunkcache = o, d + data
1002 else:
1002 else:
1003 self._chunkcache = offset, data
1003 self._chunkcache = offset, data
1004
1004
1005 def _loadchunk(self, offset, length, df=None):
1005 def _loadchunk(self, offset, length, df=None):
1006 """Load a segment of raw data from the revlog.
1006 """Load a segment of raw data from the revlog.
1007
1007
1008 Accepts an absolute offset, length to read, and an optional existing
1008 Accepts an absolute offset, length to read, and an optional existing
1009 file handle to read from.
1009 file handle to read from.
1010
1010
1011 If an existing file handle is passed, it will be seeked and the
1011 If an existing file handle is passed, it will be seeked and the
1012 original seek position will NOT be restored.
1012 original seek position will NOT be restored.
1013
1013
1014 Returns a str or buffer of raw byte data.
1014 Returns a str or buffer of raw byte data.
1015 """
1015 """
1016 if df is not None:
1016 if df is not None:
1017 closehandle = False
1017 closehandle = False
1018 else:
1018 else:
1019 if self._inline:
1019 if self._inline:
1020 df = self.opener(self.indexfile)
1020 df = self.opener(self.indexfile)
1021 else:
1021 else:
1022 df = self.opener(self.datafile)
1022 df = self.opener(self.datafile)
1023 closehandle = True
1023 closehandle = True
1024
1024
1025 # Cache data both forward and backward around the requested
1025 # Cache data both forward and backward around the requested
1026 # data, in a fixed size window. This helps speed up operations
1026 # data, in a fixed size window. This helps speed up operations
1027 # involving reading the revlog backwards.
1027 # involving reading the revlog backwards.
1028 cachesize = self._chunkcachesize
1028 cachesize = self._chunkcachesize
1029 realoffset = offset & ~(cachesize - 1)
1029 realoffset = offset & ~(cachesize - 1)
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1031 - realoffset)
1031 - realoffset)
1032 df.seek(realoffset)
1032 df.seek(realoffset)
1033 d = df.read(reallength)
1033 d = df.read(reallength)
1034 if closehandle:
1034 if closehandle:
1035 df.close()
1035 df.close()
1036 self._addchunk(realoffset, d)
1036 self._addchunk(realoffset, d)
1037 if offset != realoffset or reallength != length:
1037 if offset != realoffset or reallength != length:
1038 return util.buffer(d, offset - realoffset, length)
1038 return util.buffer(d, offset - realoffset, length)
1039 return d
1039 return d
1040
1040
1041 def _getchunk(self, offset, length, df=None):
1041 def _getchunk(self, offset, length, df=None):
1042 """Obtain a segment of raw data from the revlog.
1042 """Obtain a segment of raw data from the revlog.
1043
1043
1044 Accepts an absolute offset, length of bytes to obtain, and an
1044 Accepts an absolute offset, length of bytes to obtain, and an
1045 optional file handle to the already-opened revlog. If the file
1045 optional file handle to the already-opened revlog. If the file
1046 handle is used, it's original seek position will not be preserved.
1046 handle is used, it's original seek position will not be preserved.
1047
1047
1048 Requests for data may be returned from a cache.
1048 Requests for data may be returned from a cache.
1049
1049
1050 Returns a str or a buffer instance of raw byte data.
1050 Returns a str or a buffer instance of raw byte data.
1051 """
1051 """
1052 o, d = self._chunkcache
1052 o, d = self._chunkcache
1053 l = len(d)
1053 l = len(d)
1054
1054
1055 # is it in the cache?
1055 # is it in the cache?
1056 cachestart = offset - o
1056 cachestart = offset - o
1057 cacheend = cachestart + length
1057 cacheend = cachestart + length
1058 if cachestart >= 0 and cacheend <= l:
1058 if cachestart >= 0 and cacheend <= l:
1059 if cachestart == 0 and cacheend == l:
1059 if cachestart == 0 and cacheend == l:
1060 return d # avoid a copy
1060 return d # avoid a copy
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1062
1062
1063 return self._loadchunk(offset, length, df=df)
1063 return self._loadchunk(offset, length, df=df)
1064
1064
1065 def _chunkraw(self, startrev, endrev, df=None):
1065 def _chunkraw(self, startrev, endrev, df=None):
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1067
1067
1068 Accepts the start and end revisions and an optional already-open
1068 Accepts the start and end revisions and an optional already-open
1069 file handle to be used for reading. If the file handle is read, its
1069 file handle to be used for reading. If the file handle is read, its
1070 seek position will not be preserved.
1070 seek position will not be preserved.
1071
1071
1072 Requests for data may be satisfied by a cache.
1072 Requests for data may be satisfied by a cache.
1073
1073
1074 Returns a str or a buffer instance of raw byte data. Callers will
1074 Returns a str or a buffer instance of raw byte data. Callers will
1075 need to call ``self.start(rev)`` and ``self.length()`` to determine
1075 need to call ``self.start(rev)`` and ``self.length()`` to determine
1076 where each revision's data begins and ends.
1076 where each revision's data begins and ends.
1077 """
1077 """
1078 start = self.start(startrev)
1078 start = self.start(startrev)
1079 end = self.end(endrev)
1079 end = self.end(endrev)
1080 if self._inline:
1080 if self._inline:
1081 start += (startrev + 1) * self._io.size
1081 start += (startrev + 1) * self._io.size
1082 end += (endrev + 1) * self._io.size
1082 end += (endrev + 1) * self._io.size
1083 length = end - start
1083 length = end - start
1084 return self._getchunk(start, length, df=df)
1084 return self._getchunk(start, length, df=df)
1085
1085
1086 def _chunk(self, rev, df=None):
1086 def _chunk(self, rev, df=None):
1087 """Obtain a single decompressed chunk for a revision.
1087 """Obtain a single decompressed chunk for a revision.
1088
1088
1089 Accepts an integer revision and an optional already-open file handle
1089 Accepts an integer revision and an optional already-open file handle
1090 to be used for reading. If used, the seek position of the file will not
1090 to be used for reading. If used, the seek position of the file will not
1091 be preserved.
1091 be preserved.
1092
1092
1093 Returns a str holding uncompressed data for the requested revision.
1093 Returns a str holding uncompressed data for the requested revision.
1094 """
1094 """
1095 return decompress(self._chunkraw(rev, rev, df=df))
1095 return decompress(self._chunkraw(rev, rev, df=df))
1096
1096
1097 def _chunks(self, revs, df=None):
1097 def _chunks(self, revs, df=None):
1098 """Obtain decompressed chunks for the specified revisions.
1098 """Obtain decompressed chunks for the specified revisions.
1099
1099
1100 Accepts an iterable of numeric revisions that are assumed to be in
1100 Accepts an iterable of numeric revisions that are assumed to be in
1101 ascending order. Also accepts an optional already-open file handle
1101 ascending order. Also accepts an optional already-open file handle
1102 to be used for reading. If used, the seek position of the file will
1102 to be used for reading. If used, the seek position of the file will
1103 not be preserved.
1103 not be preserved.
1104
1104
1105 This function is similar to calling ``self._chunk()`` multiple times,
1105 This function is similar to calling ``self._chunk()`` multiple times,
1106 but is faster.
1106 but is faster.
1107
1107
1108 Returns a list with decompressed data for each requested revision.
1108 Returns a list with decompressed data for each requested revision.
1109 """
1109 """
1110 if not revs:
1110 if not revs:
1111 return []
1111 return []
1112 start = self.start
1112 start = self.start
1113 length = self.length
1113 length = self.length
1114 inline = self._inline
1114 inline = self._inline
1115 iosize = self._io.size
1115 iosize = self._io.size
1116 buffer = util.buffer
1116 buffer = util.buffer
1117
1117
1118 l = []
1118 l = []
1119 ladd = l.append
1119 ladd = l.append
1120
1120
1121 # preload the cache
1121 # preload the cache
1122 try:
1122 try:
1123 while True:
1123 while True:
1124 # ensure that the cache doesn't change out from under us
1124 # ensure that the cache doesn't change out from under us
1125 _cache = self._chunkcache
1125 _cache = self._chunkcache
1126 self._chunkraw(revs[0], revs[-1], df=df)
1126 self._chunkraw(revs[0], revs[-1], df=df)
1127 if _cache == self._chunkcache:
1127 if _cache == self._chunkcache:
1128 break
1128 break
1129 offset, data = _cache
1129 offset, data = _cache
1130 except OverflowError:
1130 except OverflowError:
1131 # issue4215 - we can't cache a run of chunks greater than
1131 # issue4215 - we can't cache a run of chunks greater than
1132 # 2G on Windows
1132 # 2G on Windows
1133 return [self._chunk(rev, df=df) for rev in revs]
1133 return [self._chunk(rev, df=df) for rev in revs]
1134
1134
1135 for rev in revs:
1135 for rev in revs:
1136 chunkstart = start(rev)
1136 chunkstart = start(rev)
1137 if inline:
1137 if inline:
1138 chunkstart += (rev + 1) * iosize
1138 chunkstart += (rev + 1) * iosize
1139 chunklength = length(rev)
1139 chunklength = length(rev)
1140 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1140 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1141
1141
1142 return l
1142 return l
1143
1143
1144 def _chunkclear(self):
1144 def _chunkclear(self):
1145 """Clear the raw chunk cache."""
1145 """Clear the raw chunk cache."""
1146 self._chunkcache = (0, '')
1146 self._chunkcache = (0, '')
1147
1147
1148 def deltaparent(self, rev):
1148 def deltaparent(self, rev):
1149 """return deltaparent of the given revision"""
1149 """return deltaparent of the given revision"""
1150 base = self.index[rev][3]
1150 base = self.index[rev][3]
1151 if base == rev:
1151 if base == rev:
1152 return nullrev
1152 return nullrev
1153 elif self._generaldelta:
1153 elif self._generaldelta:
1154 return base
1154 return base
1155 else:
1155 else:
1156 return rev - 1
1156 return rev - 1
1157
1157
1158 def revdiff(self, rev1, rev2):
1158 def revdiff(self, rev1, rev2):
1159 """return or calculate a delta between two revisions"""
1159 """return or calculate a delta between two revisions"""
1160 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1160 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1161 return str(self._chunk(rev2))
1161 return str(self._chunk(rev2))
1162
1162
1163 return mdiff.textdiff(self.revision(rev1),
1163 return mdiff.textdiff(self.revision(rev1),
1164 self.revision(rev2))
1164 self.revision(rev2))
1165
1165
1166 def revision(self, nodeorrev, _df=None):
1166 def revision(self, nodeorrev, _df=None):
1167 """return an uncompressed revision of a given node or revision
1167 """return an uncompressed revision of a given node or revision
1168 number.
1168 number.
1169
1169
1170 _df is an existing file handle to read from. It is meant to only be
1170 _df is an existing file handle to read from. It is meant to only be
1171 used internally.
1171 used internally.
1172 """
1172 """
1173 if isinstance(nodeorrev, int):
1173 if isinstance(nodeorrev, int):
1174 rev = nodeorrev
1174 rev = nodeorrev
1175 node = self.node(rev)
1175 node = self.node(rev)
1176 else:
1176 else:
1177 node = nodeorrev
1177 node = nodeorrev
1178 rev = None
1178 rev = None
1179
1179
1180 cachedrev = None
1180 cachedrev = None
1181 if node == nullid:
1181 if node == nullid:
1182 return ""
1182 return ""
1183 if self._cache:
1183 if self._cache:
1184 if self._cache[0] == node:
1184 if self._cache[0] == node:
1185 return self._cache[2]
1185 return self._cache[2]
1186 cachedrev = self._cache[1]
1186 cachedrev = self._cache[1]
1187
1187
1188 # look up what we need to read
1188 # look up what we need to read
1189 text = None
1189 text = None
1190 if rev is None:
1190 if rev is None:
1191 rev = self.rev(node)
1191 rev = self.rev(node)
1192
1192
1193 # check rev flags
1193 # check rev flags
1194 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1194 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1195 raise RevlogError(_('incompatible revision flag %x') %
1195 raise RevlogError(_('incompatible revision flag %x') %
1196 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1196 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1197
1197
1198 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1198 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1199 if stopped:
1199 if stopped:
1200 text = self._cache[2]
1200 text = self._cache[2]
1201
1201
1202 # drop cache to save memory
1202 # drop cache to save memory
1203 self._cache = None
1203 self._cache = None
1204
1204
1205 bins = self._chunks(chain, df=_df)
1205 bins = self._chunks(chain, df=_df)
1206 if text is None:
1206 if text is None:
1207 text = str(bins[0])
1207 text = str(bins[0])
1208 bins = bins[1:]
1208 bins = bins[1:]
1209
1209
1210 text = mdiff.patches(text, bins)
1210 text = mdiff.patches(text, bins)
1211
1211
1212 text = self._checkhash(text, node, rev)
1212 text = self._checkhash(text, node, rev)
1213
1213
1214 self._cache = (node, rev, text)
1214 self._cache = (node, rev, text)
1215 return text
1215 return text
1216
1216
1217 def hash(self, text, p1, p2):
1217 def hash(self, text, p1, p2):
1218 """Compute a node hash.
1218 """Compute a node hash.
1219
1219
1220 Available as a function so that subclasses can replace the hash
1220 Available as a function so that subclasses can replace the hash
1221 as needed.
1221 as needed.
1222 """
1222 """
1223 return hash(text, p1, p2)
1223 return hash(text, p1, p2)
1224
1224
1225 def _checkhash(self, text, node, rev):
1225 def _checkhash(self, text, node, rev):
1226 p1, p2 = self.parents(node)
1226 p1, p2 = self.parents(node)
1227 self.checkhash(text, p1, p2, node, rev)
1227 self.checkhash(text, p1, p2, node, rev)
1228 return text
1228 return text
1229
1229
1230 def checkhash(self, text, p1, p2, node, rev=None):
1230 def checkhash(self, text, p1, p2, node, rev=None):
1231 if node != self.hash(text, p1, p2):
1231 if node != self.hash(text, p1, p2):
1232 revornode = rev
1232 revornode = rev
1233 if revornode is None:
1233 if revornode is None:
1234 revornode = templatefilters.short(hex(node))
1234 revornode = templatefilters.short(hex(node))
1235 raise RevlogError(_("integrity check failed on %s:%s")
1235 raise RevlogError(_("integrity check failed on %s:%s")
1236 % (self.indexfile, revornode))
1236 % (self.indexfile, revornode))
1237
1237
1238 def checkinlinesize(self, tr, fp=None):
1238 def checkinlinesize(self, tr, fp=None):
1239 """Check if the revlog is too big for inline and convert if so.
1239 """Check if the revlog is too big for inline and convert if so.
1240
1240
1241 This should be called after revisions are added to the revlog. If the
1241 This should be called after revisions are added to the revlog. If the
1242 revlog has grown too large to be an inline revlog, it will convert it
1242 revlog has grown too large to be an inline revlog, it will convert it
1243 to use multiple index and data files.
1243 to use multiple index and data files.
1244 """
1244 """
1245 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1245 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1246 return
1246 return
1247
1247
1248 trinfo = tr.find(self.indexfile)
1248 trinfo = tr.find(self.indexfile)
1249 if trinfo is None:
1249 if trinfo is None:
1250 raise RevlogError(_("%s not found in the transaction")
1250 raise RevlogError(_("%s not found in the transaction")
1251 % self.indexfile)
1251 % self.indexfile)
1252
1252
1253 trindex = trinfo[2]
1253 trindex = trinfo[2]
1254 if trindex is not None:
1254 if trindex is not None:
1255 dataoff = self.start(trindex)
1255 dataoff = self.start(trindex)
1256 else:
1256 else:
1257 # revlog was stripped at start of transaction, use all leftover data
1257 # revlog was stripped at start of transaction, use all leftover data
1258 trindex = len(self) - 1
1258 trindex = len(self) - 1
1259 dataoff = self.end(-2)
1259 dataoff = self.end(-2)
1260
1260
1261 tr.add(self.datafile, dataoff)
1261 tr.add(self.datafile, dataoff)
1262
1262
1263 if fp:
1263 if fp:
1264 fp.flush()
1264 fp.flush()
1265 fp.close()
1265 fp.close()
1266
1266
1267 df = self.opener(self.datafile, 'w')
1267 df = self.opener(self.datafile, 'w')
1268 try:
1268 try:
1269 for r in self:
1269 for r in self:
1270 df.write(self._chunkraw(r, r))
1270 df.write(self._chunkraw(r, r))
1271 finally:
1271 finally:
1272 df.close()
1272 df.close()
1273
1273
1274 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1274 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1275 self.version &= ~(REVLOGNGINLINEDATA)
1275 self.version &= ~(REVLOGNGINLINEDATA)
1276 self._inline = False
1276 self._inline = False
1277 for i in self:
1277 for i in self:
1278 e = self._io.packentry(self.index[i], self.node, self.version, i)
1278 e = self._io.packentry(self.index[i], self.node, self.version, i)
1279 fp.write(e)
1279 fp.write(e)
1280
1280
1281 # if we don't call close, the temp file will never replace the
1281 # if we don't call close, the temp file will never replace the
1282 # real index
1282 # real index
1283 fp.close()
1283 fp.close()
1284
1284
1285 tr.replace(self.indexfile, trindex * self._io.size)
1285 tr.replace(self.indexfile, trindex * self._io.size)
1286 self._chunkclear()
1286 self._chunkclear()
1287
1287
1288 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1288 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1289 node=None):
1289 node=None):
1290 """add a revision to the log
1290 """add a revision to the log
1291
1291
1292 text - the revision data to add
1292 text - the revision data to add
1293 transaction - the transaction object used for rollback
1293 transaction - the transaction object used for rollback
1294 link - the linkrev data to add
1294 link - the linkrev data to add
1295 p1, p2 - the parent nodeids of the revision
1295 p1, p2 - the parent nodeids of the revision
1296 cachedelta - an optional precomputed delta
1296 cachedelta - an optional precomputed delta
1297 node - nodeid of revision; typically node is not specified, and it is
1297 node - nodeid of revision; typically node is not specified, and it is
1298 computed by default as hash(text, p1, p2), however subclasses might
1298 computed by default as hash(text, p1, p2), however subclasses might
1299 use different hashing method (and override checkhash() in such case)
1299 use different hashing method (and override checkhash() in such case)
1300 """
1300 """
1301 if link == nullrev:
1301 if link == nullrev:
1302 raise RevlogError(_("attempted to add linkrev -1 to %s")
1302 raise RevlogError(_("attempted to add linkrev -1 to %s")
1303 % self.indexfile)
1303 % self.indexfile)
1304
1304
1305 if len(text) > _maxentrysize:
1305 if len(text) > _maxentrysize:
1306 raise RevlogError(
1306 raise RevlogError(
1307 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1307 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1308 % (self.indexfile, len(text)))
1308 % (self.indexfile, len(text)))
1309
1309
1310 node = node or self.hash(text, p1, p2)
1310 node = node or self.hash(text, p1, p2)
1311 if node in self.nodemap:
1311 if node in self.nodemap:
1312 return node
1312 return node
1313
1313
1314 dfh = None
1314 dfh = None
1315 if not self._inline:
1315 if not self._inline:
1316 dfh = self.opener(self.datafile, "a+")
1316 dfh = self.opener(self.datafile, "a+")
1317 ifh = self.opener(self.indexfile, "a+")
1317 ifh = self.opener(self.indexfile, "a+")
1318 try:
1318 try:
1319 return self._addrevision(node, text, transaction, link, p1, p2,
1319 return self._addrevision(node, text, transaction, link, p1, p2,
1320 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1320 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1321 finally:
1321 finally:
1322 if dfh:
1322 if dfh:
1323 dfh.close()
1323 dfh.close()
1324 ifh.close()
1324 ifh.close()
1325
1325
1326 def compress(self, text):
1326 def compress(self, text):
1327 """ generate a possibly-compressed representation of text """
1327 """ generate a possibly-compressed representation of text """
1328 if not text:
1328 if not text:
1329 return ("", text)
1329 return ("", text)
1330 l = len(text)
1330 l = len(text)
1331 bin = None
1331 bin = None
1332 if l < 44:
1332 if l < 44:
1333 pass
1333 pass
1334 elif l > 1000000:
1334 elif l > 1000000:
1335 # zlib makes an internal copy, thus doubling memory usage for
1335 # zlib makes an internal copy, thus doubling memory usage for
1336 # large files, so lets do this in pieces
1336 # large files, so lets do this in pieces
1337 z = zlib.compressobj()
1337 z = zlib.compressobj()
1338 p = []
1338 p = []
1339 pos = 0
1339 pos = 0
1340 while pos < l:
1340 while pos < l:
1341 pos2 = pos + 2**20
1341 pos2 = pos + 2**20
1342 p.append(z.compress(text[pos:pos2]))
1342 p.append(z.compress(text[pos:pos2]))
1343 pos = pos2
1343 pos = pos2
1344 p.append(z.flush())
1344 p.append(z.flush())
1345 if sum(map(len, p)) < l:
1345 if sum(map(len, p)) < l:
1346 bin = "".join(p)
1346 bin = "".join(p)
1347 else:
1347 else:
1348 bin = _compress(text)
1348 bin = _compress(text)
1349 if bin is None or len(bin) > l:
1349 if bin is None or len(bin) > l:
1350 if text[0] == '\0':
1350 if text[0] == '\0':
1351 return ("", text)
1351 return ("", text)
1352 return ('u', text)
1352 return ('u', text)
1353 return ("", bin)
1353 return ("", bin)
1354
1354
1355 def _isgooddelta(self, d, textlen):
1355 def _isgooddelta(self, d, textlen):
1356 """Returns True if the given delta is good. Good means that it is within
1356 """Returns True if the given delta is good. Good means that it is within
1357 the disk span, disk size, and chain length bounds that we know to be
1357 the disk span, disk size, and chain length bounds that we know to be
1358 performant."""
1358 performant."""
1359 if d is None:
1359 if d is None:
1360 return False
1360 return False
1361
1361
1362 # - 'dist' is the distance from the base revision -- bounding it limits
1362 # - 'dist' is the distance from the base revision -- bounding it limits
1363 # the amount of I/O we need to do.
1363 # the amount of I/O we need to do.
1364 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1364 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1365 # to apply -- bounding it limits the amount of CPU we consume.
1365 # to apply -- bounding it limits the amount of CPU we consume.
1366 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1366 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1367 if (dist > textlen * 4 or l > textlen or
1367 if (dist > textlen * 4 or l > textlen or
1368 compresseddeltalen > textlen * 2 or
1368 compresseddeltalen > textlen * 2 or
1369 (self._maxchainlen and chainlen > self._maxchainlen)):
1369 (self._maxchainlen and chainlen > self._maxchainlen)):
1370 return False
1370 return False
1371
1371
1372 return True
1372 return True
1373
1373
1374 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1374 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1375 cachedelta, ifh, dfh, alwayscache=False):
1375 cachedelta, ifh, dfh, alwayscache=False):
1376 """internal function to add revisions to the log
1376 """internal function to add revisions to the log
1377
1377
1378 see addrevision for argument descriptions.
1378 see addrevision for argument descriptions.
1379 invariants:
1379 invariants:
1380 - text is optional (can be None); if not set, cachedelta must be set.
1380 - text is optional (can be None); if not set, cachedelta must be set.
1381 if both are set, they must correspond to each other.
1381 if both are set, they must correspond to each other.
1382 """
1382 """
1383 btext = [text]
1383 btext = [text]
1384 def buildtext():
1384 def buildtext():
1385 if btext[0] is not None:
1385 if btext[0] is not None:
1386 return btext[0]
1386 return btext[0]
1387 baserev = cachedelta[0]
1387 baserev = cachedelta[0]
1388 delta = cachedelta[1]
1388 delta = cachedelta[1]
1389 # special case deltas which replace entire base; no need to decode
1389 # special case deltas which replace entire base; no need to decode
1390 # base revision. this neatly avoids censored bases, which throw when
1390 # base revision. this neatly avoids censored bases, which throw when
1391 # they're decoded.
1391 # they're decoded.
1392 hlen = struct.calcsize(">lll")
1392 hlen = struct.calcsize(">lll")
1393 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1393 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1394 len(delta) - hlen):
1394 len(delta) - hlen):
1395 btext[0] = delta[hlen:]
1395 btext[0] = delta[hlen:]
1396 else:
1396 else:
1397 if self._inline:
1397 if self._inline:
1398 fh = ifh
1398 fh = ifh
1399 else:
1399 else:
1400 fh = dfh
1400 fh = dfh
1401 basetext = self.revision(self.node(baserev), _df=fh)
1401 basetext = self.revision(self.node(baserev), _df=fh)
1402 btext[0] = mdiff.patch(basetext, delta)
1402 btext[0] = mdiff.patch(basetext, delta)
1403 try:
1403 try:
1404 self.checkhash(btext[0], p1, p2, node)
1404 self.checkhash(btext[0], p1, p2, node)
1405 if flags & REVIDX_ISCENSORED:
1405 if flags & REVIDX_ISCENSORED:
1406 raise RevlogError(_('node %s is not censored') % node)
1406 raise RevlogError(_('node %s is not censored') % node)
1407 except CensoredNodeError:
1407 except CensoredNodeError:
1408 # must pass the censored index flag to add censored revisions
1408 # must pass the censored index flag to add censored revisions
1409 if not flags & REVIDX_ISCENSORED:
1409 if not flags & REVIDX_ISCENSORED:
1410 raise
1410 raise
1411 return btext[0]
1411 return btext[0]
1412
1412
1413 def builddelta(rev):
1413 def builddelta(rev):
1414 # can we use the cached delta?
1414 # can we use the cached delta?
1415 if cachedelta and cachedelta[0] == rev:
1415 if cachedelta and cachedelta[0] == rev:
1416 delta = cachedelta[1]
1416 delta = cachedelta[1]
1417 else:
1417 else:
1418 t = buildtext()
1418 t = buildtext()
1419 if self.iscensored(rev):
1419 if self.iscensored(rev):
1420 # deltas based on a censored revision must replace the
1420 # deltas based on a censored revision must replace the
1421 # full content in one patch, so delta works everywhere
1421 # full content in one patch, so delta works everywhere
1422 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1422 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1423 delta = header + t
1423 delta = header + t
1424 else:
1424 else:
1425 if self._inline:
1425 if self._inline:
1426 fh = ifh
1426 fh = ifh
1427 else:
1427 else:
1428 fh = dfh
1428 fh = dfh
1429 ptext = self.revision(self.node(rev), _df=fh)
1429 ptext = self.revision(self.node(rev), _df=fh)
1430 delta = mdiff.textdiff(ptext, t)
1430 delta = mdiff.textdiff(ptext, t)
1431 data = self.compress(delta)
1431 data = self.compress(delta)
1432 l = len(data[1]) + len(data[0])
1432 l = len(data[1]) + len(data[0])
1433 if basecache[0] == rev:
1433 if basecache[0] == rev:
1434 chainbase = basecache[1]
1434 chainbase = basecache[1]
1435 else:
1435 else:
1436 chainbase = self.chainbase(rev)
1436 chainbase = self.chainbase(rev)
1437 dist = l + offset - self.start(chainbase)
1437 dist = l + offset - self.start(chainbase)
1438 if self._generaldelta:
1438 if self._generaldelta:
1439 base = rev
1439 base = rev
1440 else:
1440 else:
1441 base = chainbase
1441 base = chainbase
1442 chainlen, compresseddeltalen = self._chaininfo(rev)
1442 chainlen, compresseddeltalen = self._chaininfo(rev)
1443 chainlen += 1
1443 chainlen += 1
1444 compresseddeltalen += l
1444 compresseddeltalen += l
1445 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1445 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1446
1446
1447 curr = len(self)
1447 curr = len(self)
1448 prev = curr - 1
1448 prev = curr - 1
1449 base = chainbase = curr
1449 base = chainbase = curr
1450 offset = self.end(prev)
1450 offset = self.end(prev)
1451 delta = None
1451 delta = None
1452 if self._basecache is None:
1452 if self._basecache is None:
1453 self._basecache = (prev, self.chainbase(prev))
1453 self._basecache = (prev, self.chainbase(prev))
1454 basecache = self._basecache
1454 basecache = self._basecache
1455 p1r, p2r = self.rev(p1), self.rev(p2)
1455 p1r, p2r = self.rev(p1), self.rev(p2)
1456
1456
1457 # full versions are inserted when the needed deltas
1457 # full versions are inserted when the needed deltas
1458 # become comparable to the uncompressed text
1458 # become comparable to the uncompressed text
1459 if text is None:
1459 if text is None:
1460 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1460 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1461 cachedelta[1])
1461 cachedelta[1])
1462 else:
1462 else:
1463 textlen = len(text)
1463 textlen = len(text)
1464
1464
1465 # should we try to build a delta?
1465 # should we try to build a delta?
1466 if prev != nullrev:
1466 if prev != nullrev:
1467 tested = set()
1467 tested = set()
1468 if cachedelta and self._generaldelta and self._lazydeltabase:
1468 if cachedelta and self._generaldelta and self._lazydeltabase:
1469 # Assume what we received from the server is a good choice
1469 # Assume what we received from the server is a good choice
1470 # build delta will reuse the cache
1470 # build delta will reuse the cache
1471 candidatedelta = builddelta(cachedelta[0])
1471 candidatedelta = builddelta(cachedelta[0])
1472 tested.add(cachedelta[0])
1472 tested.add(cachedelta[0])
1473 if self._isgooddelta(candidatedelta, textlen):
1473 if self._isgooddelta(candidatedelta, textlen):
1474 delta = candidatedelta
1474 delta = candidatedelta
1475 if delta is None and self._generaldelta:
1475 if delta is None and self._generaldelta:
1476 # exclude already lazy tested base if any
1476 # exclude already lazy tested base if any
1477 parents = [p for p in (p1r, p2r)
1477 parents = [p for p in (p1r, p2r)
1478 if p != nullrev and p not in tested]
1478 if p != nullrev and p not in tested]
1479 if parents and not self._aggressivemergedeltas:
1479 if parents and not self._aggressivemergedeltas:
1480 # Pick whichever parent is closer to us (to minimize the
1480 # Pick whichever parent is closer to us (to minimize the
1481 # chance of having to build a fulltext).
1481 # chance of having to build a fulltext).
1482 parents = [max(parents)]
1482 parents = [max(parents)]
1483 tested.update(parents)
1483 tested.update(parents)
1484 pdeltas = []
1484 pdeltas = []
1485 for p in parents:
1485 for p in parents:
1486 pd = builddelta(p)
1486 pd = builddelta(p)
1487 if self._isgooddelta(pd, textlen):
1487 if self._isgooddelta(pd, textlen):
1488 pdeltas.append(pd)
1488 pdeltas.append(pd)
1489 if pdeltas:
1489 if pdeltas:
1490 delta = min(pdeltas, key=lambda x: x[1])
1490 delta = min(pdeltas, key=lambda x: x[1])
1491 if delta is None and prev not in tested:
1491 if delta is None and prev not in tested:
1492 # other approach failed try against prev to hopefully save us a
1492 # other approach failed try against prev to hopefully save us a
1493 # fulltext.
1493 # fulltext.
1494 candidatedelta = builddelta(prev)
1494 candidatedelta = builddelta(prev)
1495 if self._isgooddelta(candidatedelta, textlen):
1495 if self._isgooddelta(candidatedelta, textlen):
1496 delta = candidatedelta
1496 delta = candidatedelta
1497 if delta is not None:
1497 if delta is not None:
1498 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1498 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1499 else:
1499 else:
1500 text = buildtext()
1500 text = buildtext()
1501 data = self.compress(text)
1501 data = self.compress(text)
1502 l = len(data[1]) + len(data[0])
1502 l = len(data[1]) + len(data[0])
1503 base = chainbase = curr
1503 base = chainbase = curr
1504
1504
1505 e = (offset_type(offset, flags), l, textlen,
1505 e = (offset_type(offset, flags), l, textlen,
1506 base, link, p1r, p2r, node)
1506 base, link, p1r, p2r, node)
1507 self.index.insert(-1, e)
1507 self.index.insert(-1, e)
1508 self.nodemap[node] = curr
1508 self.nodemap[node] = curr
1509
1509
1510 entry = self._io.packentry(e, self.node, self.version, curr)
1510 entry = self._io.packentry(e, self.node, self.version, curr)
1511 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1511 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1512
1512
1513 if alwayscache and text is None:
1513 if alwayscache and text is None:
1514 text = buildtext()
1514 text = buildtext()
1515
1515
1516 if type(text) == str: # only accept immutable objects
1516 if type(text) == str: # only accept immutable objects
1517 self._cache = (node, curr, text)
1517 self._cache = (node, curr, text)
1518 self._basecache = (curr, chainbase)
1518 self._basecache = (curr, chainbase)
1519 return node
1519 return node
1520
1520
1521 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1521 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1522 # Files opened in a+ mode have inconsistent behavior on various
1522 # Files opened in a+ mode have inconsistent behavior on various
1523 # platforms. Windows requires that a file positioning call be made
1523 # platforms. Windows requires that a file positioning call be made
1524 # when the file handle transitions between reads and writes. See
1524 # when the file handle transitions between reads and writes. See
1525 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1525 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1526 # platforms, Python or the platform itself can be buggy. Some versions
1526 # platforms, Python or the platform itself can be buggy. Some versions
1527 # of Solaris have been observed to not append at the end of the file
1527 # of Solaris have been observed to not append at the end of the file
1528 # if the file was seeked to before the end. See issue4943 for more.
1528 # if the file was seeked to before the end. See issue4943 for more.
1529 #
1529 #
1530 # We work around this issue by inserting a seek() before writing.
1530 # We work around this issue by inserting a seek() before writing.
1531 # Note: This is likely not necessary on Python 3.
1531 # Note: This is likely not necessary on Python 3.
1532 ifh.seek(0, os.SEEK_END)
1532 ifh.seek(0, os.SEEK_END)
1533 if dfh:
1533 if dfh:
1534 dfh.seek(0, os.SEEK_END)
1534 dfh.seek(0, os.SEEK_END)
1535
1535
1536 curr = len(self) - 1
1536 curr = len(self) - 1
1537 if not self._inline:
1537 if not self._inline:
1538 transaction.add(self.datafile, offset)
1538 transaction.add(self.datafile, offset)
1539 transaction.add(self.indexfile, curr * len(entry))
1539 transaction.add(self.indexfile, curr * len(entry))
1540 if data[0]:
1540 if data[0]:
1541 dfh.write(data[0])
1541 dfh.write(data[0])
1542 dfh.write(data[1])
1542 dfh.write(data[1])
1543 ifh.write(entry)
1543 ifh.write(entry)
1544 else:
1544 else:
1545 offset += curr * self._io.size
1545 offset += curr * self._io.size
1546 transaction.add(self.indexfile, offset, curr)
1546 transaction.add(self.indexfile, offset, curr)
1547 ifh.write(entry)
1547 ifh.write(entry)
1548 ifh.write(data[0])
1548 ifh.write(data[0])
1549 ifh.write(data[1])
1549 ifh.write(data[1])
1550 self.checkinlinesize(transaction, ifh)
1550 self.checkinlinesize(transaction, ifh)
1551
1551
1552 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1552 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1553 """
1553 """
1554 add a delta group
1554 add a delta group
1555
1555
1556 given a set of deltas, add them to the revision log. the
1556 given a set of deltas, add them to the revision log. the
1557 first delta is against its parent, which should be in our
1557 first delta is against its parent, which should be in our
1558 log, the rest are against the previous delta.
1558 log, the rest are against the previous delta.
1559
1559
1560 If ``addrevisioncb`` is defined, it will be called with arguments of
1560 If ``addrevisioncb`` is defined, it will be called with arguments of
1561 this revlog and the node that was added.
1561 this revlog and the node that was added.
1562 """
1562 """
1563
1563
1564 # track the base of the current delta log
1564 # track the base of the current delta log
1565 content = []
1565 content = []
1566 node = None
1566 node = None
1567
1567
1568 r = len(self)
1568 r = len(self)
1569 end = 0
1569 end = 0
1570 if r:
1570 if r:
1571 end = self.end(r - 1)
1571 end = self.end(r - 1)
1572 ifh = self.opener(self.indexfile, "a+")
1572 ifh = self.opener(self.indexfile, "a+")
1573 isize = r * self._io.size
1573 isize = r * self._io.size
1574 if self._inline:
1574 if self._inline:
1575 transaction.add(self.indexfile, end + isize, r)
1575 transaction.add(self.indexfile, end + isize, r)
1576 dfh = None
1576 dfh = None
1577 else:
1577 else:
1578 transaction.add(self.indexfile, isize, r)
1578 transaction.add(self.indexfile, isize, r)
1579 transaction.add(self.datafile, end)
1579 transaction.add(self.datafile, end)
1580 dfh = self.opener(self.datafile, "a+")
1580 dfh = self.opener(self.datafile, "a+")
1581 def flush():
1581 def flush():
1582 if dfh:
1582 if dfh:
1583 dfh.flush()
1583 dfh.flush()
1584 ifh.flush()
1584 ifh.flush()
1585 try:
1585 try:
1586 # loop through our set of deltas
1586 # loop through our set of deltas
1587 chain = None
1587 chain = None
1588 while True:
1588 while True:
1589 chunkdata = cg.deltachunk(chain)
1589 chunkdata = cg.deltachunk(chain)
1590 if not chunkdata:
1590 if not chunkdata:
1591 break
1591 break
1592 node = chunkdata['node']
1592 node = chunkdata['node']
1593 p1 = chunkdata['p1']
1593 p1 = chunkdata['p1']
1594 p2 = chunkdata['p2']
1594 p2 = chunkdata['p2']
1595 cs = chunkdata['cs']
1595 cs = chunkdata['cs']
1596 deltabase = chunkdata['deltabase']
1596 deltabase = chunkdata['deltabase']
1597 delta = chunkdata['delta']
1597 delta = chunkdata['delta']
1598 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1598 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1599
1599
1600 content.append(node)
1600 content.append(node)
1601
1601
1602 link = linkmapper(cs)
1602 link = linkmapper(cs)
1603 if node in self.nodemap:
1603 if node in self.nodemap:
1604 # this can happen if two branches make the same change
1604 # this can happen if two branches make the same change
1605 chain = node
1605 chain = node
1606 continue
1606 continue
1607
1607
1608 for p in (p1, p2):
1608 for p in (p1, p2):
1609 if p not in self.nodemap:
1609 if p not in self.nodemap:
1610 raise LookupError(p, self.indexfile,
1610 raise LookupError(p, self.indexfile,
1611 _('unknown parent'))
1611 _('unknown parent'))
1612
1612
1613 if deltabase not in self.nodemap:
1613 if deltabase not in self.nodemap:
1614 raise LookupError(deltabase, self.indexfile,
1614 raise LookupError(deltabase, self.indexfile,
1615 _('unknown delta base'))
1615 _('unknown delta base'))
1616
1616
1617 baserev = self.rev(deltabase)
1617 baserev = self.rev(deltabase)
1618
1618
1619 if baserev != nullrev and self.iscensored(baserev):
1619 if baserev != nullrev and self.iscensored(baserev):
1620 # if base is censored, delta must be full replacement in a
1620 # if base is censored, delta must be full replacement in a
1621 # single patch operation
1621 # single patch operation
1622 hlen = struct.calcsize(">lll")
1622 hlen = struct.calcsize(">lll")
1623 oldlen = self.rawsize(baserev)
1623 oldlen = self.rawsize(baserev)
1624 newlen = len(delta) - hlen
1624 newlen = len(delta) - hlen
1625 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1625 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1626 raise error.CensoredBaseError(self.indexfile,
1626 raise error.CensoredBaseError(self.indexfile,
1627 self.node(baserev))
1627 self.node(baserev))
1628
1628
1629 if not flags and self._peek_iscensored(baserev, delta, flush):
1629 if not flags and self._peek_iscensored(baserev, delta, flush):
1630 flags |= REVIDX_ISCENSORED
1630 flags |= REVIDX_ISCENSORED
1631
1631
1632 # We assume consumers of addrevisioncb will want to retrieve
1632 # We assume consumers of addrevisioncb will want to retrieve
1633 # the added revision, which will require a call to
1633 # the added revision, which will require a call to
1634 # revision(). revision() will fast path if there is a cache
1634 # revision(). revision() will fast path if there is a cache
1635 # hit. So, we tell _addrevision() to always cache in this case.
1635 # hit. So, we tell _addrevision() to always cache in this case.
1636 chain = self._addrevision(node, None, transaction, link,
1636 chain = self._addrevision(node, None, transaction, link,
1637 p1, p2, flags, (baserev, delta),
1637 p1, p2, flags, (baserev, delta),
1638 ifh, dfh,
1638 ifh, dfh,
1639 alwayscache=bool(addrevisioncb))
1639 alwayscache=bool(addrevisioncb))
1640
1640
1641 if addrevisioncb:
1641 if addrevisioncb:
1642 addrevisioncb(self, chain)
1642 addrevisioncb(self, chain)
1643
1643
1644 if not dfh and not self._inline:
1644 if not dfh and not self._inline:
1645 # addrevision switched from inline to conventional
1645 # addrevision switched from inline to conventional
1646 # reopen the index
1646 # reopen the index
1647 ifh.close()
1647 ifh.close()
1648 dfh = self.opener(self.datafile, "a+")
1648 dfh = self.opener(self.datafile, "a+")
1649 ifh = self.opener(self.indexfile, "a+")
1649 ifh = self.opener(self.indexfile, "a+")
1650 finally:
1650 finally:
1651 if dfh:
1651 if dfh:
1652 dfh.close()
1652 dfh.close()
1653 ifh.close()
1653 ifh.close()
1654
1654
1655 return content
1655 return content
1656
1656
1657 def iscensored(self, rev):
1657 def iscensored(self, rev):
1658 """Check if a file revision is censored."""
1658 """Check if a file revision is censored."""
1659 return False
1659 return False
1660
1660
1661 def _peek_iscensored(self, baserev, delta, flush):
1661 def _peek_iscensored(self, baserev, delta, flush):
1662 """Quickly check if a delta produces a censored revision."""
1662 """Quickly check if a delta produces a censored revision."""
1663 return False
1663 return False
1664
1664
1665 def getstrippoint(self, minlink):
1665 def getstrippoint(self, minlink):
1666 """find the minimum rev that must be stripped to strip the linkrev
1666 """find the minimum rev that must be stripped to strip the linkrev
1667
1667
1668 Returns a tuple containing the minimum rev and a set of all revs that
1668 Returns a tuple containing the minimum rev and a set of all revs that
1669 have linkrevs that will be broken by this strip.
1669 have linkrevs that will be broken by this strip.
1670 """
1670 """
1671 brokenrevs = set()
1671 brokenrevs = set()
1672 strippoint = len(self)
1672 strippoint = len(self)
1673
1673
1674 heads = {}
1674 heads = {}
1675 futurelargelinkrevs = set()
1675 futurelargelinkrevs = set()
1676 for head in self.headrevs():
1676 for head in self.headrevs():
1677 headlinkrev = self.linkrev(head)
1677 headlinkrev = self.linkrev(head)
1678 heads[head] = headlinkrev
1678 heads[head] = headlinkrev
1679 if headlinkrev >= minlink:
1679 if headlinkrev >= minlink:
1680 futurelargelinkrevs.add(headlinkrev)
1680 futurelargelinkrevs.add(headlinkrev)
1681
1681
1682 # This algorithm involves walking down the rev graph, starting at the
1682 # This algorithm involves walking down the rev graph, starting at the
1683 # heads. Since the revs are topologically sorted according to linkrev,
1683 # heads. Since the revs are topologically sorted according to linkrev,
1684 # once all head linkrevs are below the minlink, we know there are
1684 # once all head linkrevs are below the minlink, we know there are
1685 # no more revs that could have a linkrev greater than minlink.
1685 # no more revs that could have a linkrev greater than minlink.
1686 # So we can stop walking.
1686 # So we can stop walking.
1687 while futurelargelinkrevs:
1687 while futurelargelinkrevs:
1688 strippoint -= 1
1688 strippoint -= 1
1689 linkrev = heads.pop(strippoint)
1689 linkrev = heads.pop(strippoint)
1690
1690
1691 if linkrev < minlink:
1691 if linkrev < minlink:
1692 brokenrevs.add(strippoint)
1692 brokenrevs.add(strippoint)
1693 else:
1693 else:
1694 futurelargelinkrevs.remove(linkrev)
1694 futurelargelinkrevs.remove(linkrev)
1695
1695
1696 for p in self.parentrevs(strippoint):
1696 for p in self.parentrevs(strippoint):
1697 if p != nullrev:
1697 if p != nullrev:
1698 plinkrev = self.linkrev(p)
1698 plinkrev = self.linkrev(p)
1699 heads[p] = plinkrev
1699 heads[p] = plinkrev
1700 if plinkrev >= minlink:
1700 if plinkrev >= minlink:
1701 futurelargelinkrevs.add(plinkrev)
1701 futurelargelinkrevs.add(plinkrev)
1702
1702
1703 return strippoint, brokenrevs
1703 return strippoint, brokenrevs
1704
1704
1705 def strip(self, minlink, transaction):
1705 def strip(self, minlink, transaction):
1706 """truncate the revlog on the first revision with a linkrev >= minlink
1706 """truncate the revlog on the first revision with a linkrev >= minlink
1707
1707
1708 This function is called when we're stripping revision minlink and
1708 This function is called when we're stripping revision minlink and
1709 its descendants from the repository.
1709 its descendants from the repository.
1710
1710
1711 We have to remove all revisions with linkrev >= minlink, because
1711 We have to remove all revisions with linkrev >= minlink, because
1712 the equivalent changelog revisions will be renumbered after the
1712 the equivalent changelog revisions will be renumbered after the
1713 strip.
1713 strip.
1714
1714
1715 So we truncate the revlog on the first of these revisions, and
1715 So we truncate the revlog on the first of these revisions, and
1716 trust that the caller has saved the revisions that shouldn't be
1716 trust that the caller has saved the revisions that shouldn't be
1717 removed and that it'll re-add them after this truncation.
1717 removed and that it'll re-add them after this truncation.
1718 """
1718 """
1719 if len(self) == 0:
1719 if len(self) == 0:
1720 return
1720 return
1721
1721
1722 rev, _ = self.getstrippoint(minlink)
1722 rev, _ = self.getstrippoint(minlink)
1723 if rev == len(self):
1723 if rev == len(self):
1724 return
1724 return
1725
1725
1726 # first truncate the files on disk
1726 # first truncate the files on disk
1727 end = self.start(rev)
1727 end = self.start(rev)
1728 if not self._inline:
1728 if not self._inline:
1729 transaction.add(self.datafile, end)
1729 transaction.add(self.datafile, end)
1730 end = rev * self._io.size
1730 end = rev * self._io.size
1731 else:
1731 else:
1732 end += rev * self._io.size
1732 end += rev * self._io.size
1733
1733
1734 transaction.add(self.indexfile, end)
1734 transaction.add(self.indexfile, end)
1735
1735
1736 # then reset internal state in memory to forget those revisions
1736 # then reset internal state in memory to forget those revisions
1737 self._cache = None
1737 self._cache = None
1738 self._chaininfocache = {}
1738 self._chaininfocache = {}
1739 self._chunkclear()
1739 self._chunkclear()
1740 for x in xrange(rev, len(self)):
1740 for x in xrange(rev, len(self)):
1741 del self.nodemap[self.node(x)]
1741 del self.nodemap[self.node(x)]
1742
1742
1743 del self.index[rev:-1]
1743 del self.index[rev:-1]
1744
1744
1745 def checksize(self):
1745 def checksize(self):
1746 expected = 0
1746 expected = 0
1747 if len(self):
1747 if len(self):
1748 expected = max(0, self.end(len(self) - 1))
1748 expected = max(0, self.end(len(self) - 1))
1749
1749
1750 try:
1750 try:
1751 f = self.opener(self.datafile)
1751 f = self.opener(self.datafile)
1752 f.seek(0, 2)
1752 f.seek(0, 2)
1753 actual = f.tell()
1753 actual = f.tell()
1754 f.close()
1754 f.close()
1755 dd = actual - expected
1755 dd = actual - expected
1756 except IOError as inst:
1756 except IOError as inst:
1757 if inst.errno != errno.ENOENT:
1757 if inst.errno != errno.ENOENT:
1758 raise
1758 raise
1759 dd = 0
1759 dd = 0
1760
1760
1761 try:
1761 try:
1762 f = self.opener(self.indexfile)
1762 f = self.opener(self.indexfile)
1763 f.seek(0, 2)
1763 f.seek(0, 2)
1764 actual = f.tell()
1764 actual = f.tell()
1765 f.close()
1765 f.close()
1766 s = self._io.size
1766 s = self._io.size
1767 i = max(0, actual // s)
1767 i = max(0, actual // s)
1768 di = actual - (i * s)
1768 di = actual - (i * s)
1769 if self._inline:
1769 if self._inline:
1770 databytes = 0
1770 databytes = 0
1771 for r in self:
1771 for r in self:
1772 databytes += max(0, self.length(r))
1772 databytes += max(0, self.length(r))
1773 dd = 0
1773 dd = 0
1774 di = actual - len(self) * s - databytes
1774 di = actual - len(self) * s - databytes
1775 except IOError as inst:
1775 except IOError as inst:
1776 if inst.errno != errno.ENOENT:
1776 if inst.errno != errno.ENOENT:
1777 raise
1777 raise
1778 di = 0
1778 di = 0
1779
1779
1780 return (dd, di)
1780 return (dd, di)
1781
1781
1782 def files(self):
1782 def files(self):
1783 res = [self.indexfile]
1783 res = [self.indexfile]
1784 if not self._inline:
1784 if not self._inline:
1785 res.append(self.datafile)
1785 res.append(self.datafile)
1786 return res
1786 return res
General Comments 0
You need to be logged in to leave comments. Login now