##// END OF EJS Templates
revlog: inline start() and end() for perf reasons...
Gregory Szorc -
r30288:ceddc3d9 default
parent child Browse files
Show More
@@ -1,1811 +1,1817
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215
215
216 If checkambig, indexfile is opened with checkambig=True at
216 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
217 writing, to avoid file stat ambiguity.
218 """
218 """
219 def __init__(self, opener, indexfile, checkambig=False):
219 def __init__(self, opener, indexfile, checkambig=False):
220 """
220 """
221 create a revlog object
221 create a revlog object
222
222
223 opener is a function that abstracts the file opening operation
223 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
224 and can be used to implement COW semantics or the like.
225 """
225 """
226 self.indexfile = indexfile
226 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
227 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
228 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
229 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
230 # avoid file stat ambiguity.
231 self._checkambig = checkambig
231 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
232 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
233 self._cache = None
234 # Maps rev to chain base rev.
234 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
235 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
237 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
238 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
239 self._chunkcachesize = 65536
240 self._maxchainlen = None
240 self._maxchainlen = None
241 self._aggressivemergedeltas = False
241 self._aggressivemergedeltas = False
242 self.index = []
242 self.index = []
243 # Mapping of partial identifiers to full nodes.
243 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
244 self._pcache = {}
245 # Mapping of revision integer to full node.
245 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
246 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
247 self._nodepos = None
248
248
249 v = REVLOG_DEFAULT_VERSION
249 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
250 opts = getattr(opener, 'options', None)
251 if opts is not None:
251 if opts is not None:
252 if 'revlogv1' in opts:
252 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
253 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
254 v |= REVLOGGENERALDELTA
255 else:
255 else:
256 v = 0
256 v = 0
257 if 'chunkcachesize' in opts:
257 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
258 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
259 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
260 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
261 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
264
265 if self._chunkcachesize <= 0:
265 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
267 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
270 'of 2') % self._chunkcachesize)
271
271
272 indexdata = ''
272 indexdata = ''
273 self._initempty = True
273 self._initempty = True
274 try:
274 try:
275 f = self.opener(self.indexfile)
275 f = self.opener(self.indexfile)
276 indexdata = f.read()
276 indexdata = f.read()
277 f.close()
277 f.close()
278 if len(indexdata) > 0:
278 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
280 self._initempty = False
281 except IOError as inst:
281 except IOError as inst:
282 if inst.errno != errno.ENOENT:
282 if inst.errno != errno.ENOENT:
283 raise
283 raise
284
284
285 self.version = v
285 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
286 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
287 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
288 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
289 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
290 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
292 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
295 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
296 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
297 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
298 % (self.indexfile, fmt))
299
299
300 self.storedeltachains = True
300 self.storedeltachains = True
301
301
302 self._io = revlogio()
302 self._io = revlogio()
303 if self.version == REVLOGV0:
303 if self.version == REVLOGV0:
304 self._io = revlogoldio()
304 self._io = revlogoldio()
305 try:
305 try:
306 d = self._io.parseindex(indexdata, self._inline)
306 d = self._io.parseindex(indexdata, self._inline)
307 except (ValueError, IndexError):
307 except (ValueError, IndexError):
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 self.index, nodemap, self._chunkcache = d
309 self.index, nodemap, self._chunkcache = d
310 if nodemap is not None:
310 if nodemap is not None:
311 self.nodemap = self._nodecache = nodemap
311 self.nodemap = self._nodecache = nodemap
312 if not self._chunkcache:
312 if not self._chunkcache:
313 self._chunkclear()
313 self._chunkclear()
314 # revnum -> (chain-length, sum-delta-length)
314 # revnum -> (chain-length, sum-delta-length)
315 self._chaininfocache = {}
315 self._chaininfocache = {}
316
316
317 def tip(self):
317 def tip(self):
318 return self.node(len(self.index) - 2)
318 return self.node(len(self.index) - 2)
319 def __contains__(self, rev):
319 def __contains__(self, rev):
320 return 0 <= rev < len(self)
320 return 0 <= rev < len(self)
321 def __len__(self):
321 def __len__(self):
322 return len(self.index) - 1
322 return len(self.index) - 1
323 def __iter__(self):
323 def __iter__(self):
324 return iter(xrange(len(self)))
324 return iter(xrange(len(self)))
325 def revs(self, start=0, stop=None):
325 def revs(self, start=0, stop=None):
326 """iterate over all rev in this revlog (from start to stop)"""
326 """iterate over all rev in this revlog (from start to stop)"""
327 step = 1
327 step = 1
328 if stop is not None:
328 if stop is not None:
329 if start > stop:
329 if start > stop:
330 step = -1
330 step = -1
331 stop += step
331 stop += step
332 else:
332 else:
333 stop = len(self)
333 stop = len(self)
334 return xrange(start, stop, step)
334 return xrange(start, stop, step)
335
335
336 @util.propertycache
336 @util.propertycache
337 def nodemap(self):
337 def nodemap(self):
338 self.rev(self.node(0))
338 self.rev(self.node(0))
339 return self._nodecache
339 return self._nodecache
340
340
341 def hasnode(self, node):
341 def hasnode(self, node):
342 try:
342 try:
343 self.rev(node)
343 self.rev(node)
344 return True
344 return True
345 except KeyError:
345 except KeyError:
346 return False
346 return False
347
347
348 def clearcaches(self):
348 def clearcaches(self):
349 self._cache = None
349 self._cache = None
350 self._chainbasecache.clear()
350 self._chainbasecache.clear()
351 self._chunkcache = (0, '')
351 self._chunkcache = (0, '')
352 self._pcache = {}
352 self._pcache = {}
353
353
354 try:
354 try:
355 self._nodecache.clearcaches()
355 self._nodecache.clearcaches()
356 except AttributeError:
356 except AttributeError:
357 self._nodecache = {nullid: nullrev}
357 self._nodecache = {nullid: nullrev}
358 self._nodepos = None
358 self._nodepos = None
359
359
360 def rev(self, node):
360 def rev(self, node):
361 try:
361 try:
362 return self._nodecache[node]
362 return self._nodecache[node]
363 except TypeError:
363 except TypeError:
364 raise
364 raise
365 except RevlogError:
365 except RevlogError:
366 # parsers.c radix tree lookup failed
366 # parsers.c radix tree lookup failed
367 raise LookupError(node, self.indexfile, _('no node'))
367 raise LookupError(node, self.indexfile, _('no node'))
368 except KeyError:
368 except KeyError:
369 # pure python cache lookup failed
369 # pure python cache lookup failed
370 n = self._nodecache
370 n = self._nodecache
371 i = self.index
371 i = self.index
372 p = self._nodepos
372 p = self._nodepos
373 if p is None:
373 if p is None:
374 p = len(i) - 2
374 p = len(i) - 2
375 for r in xrange(p, -1, -1):
375 for r in xrange(p, -1, -1):
376 v = i[r][7]
376 v = i[r][7]
377 n[v] = r
377 n[v] = r
378 if v == node:
378 if v == node:
379 self._nodepos = r - 1
379 self._nodepos = r - 1
380 return r
380 return r
381 raise LookupError(node, self.indexfile, _('no node'))
381 raise LookupError(node, self.indexfile, _('no node'))
382
382
383 # Accessors for index entries.
383 # Accessors for index entries.
384
384
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 # are flags.
386 # are flags.
387 def start(self, rev):
387 def start(self, rev):
388 return int(self.index[rev][0] >> 16)
388 return int(self.index[rev][0] >> 16)
389
389
390 def flags(self, rev):
390 def flags(self, rev):
391 return self.index[rev][0] & 0xFFFF
391 return self.index[rev][0] & 0xFFFF
392
392
393 def length(self, rev):
393 def length(self, rev):
394 return self.index[rev][1]
394 return self.index[rev][1]
395
395
396 def rawsize(self, rev):
396 def rawsize(self, rev):
397 """return the length of the uncompressed text for a given revision"""
397 """return the length of the uncompressed text for a given revision"""
398 l = self.index[rev][2]
398 l = self.index[rev][2]
399 if l >= 0:
399 if l >= 0:
400 return l
400 return l
401
401
402 t = self.revision(self.node(rev))
402 t = self.revision(self.node(rev))
403 return len(t)
403 return len(t)
404 size = rawsize
404 size = rawsize
405
405
406 def chainbase(self, rev):
406 def chainbase(self, rev):
407 base = self._chainbasecache.get(rev)
407 base = self._chainbasecache.get(rev)
408 if base is not None:
408 if base is not None:
409 return base
409 return base
410
410
411 index = self.index
411 index = self.index
412 base = index[rev][3]
412 base = index[rev][3]
413 while base != rev:
413 while base != rev:
414 rev = base
414 rev = base
415 base = index[rev][3]
415 base = index[rev][3]
416
416
417 self._chainbasecache[rev] = base
417 self._chainbasecache[rev] = base
418 return base
418 return base
419
419
420 def linkrev(self, rev):
420 def linkrev(self, rev):
421 return self.index[rev][4]
421 return self.index[rev][4]
422
422
423 def parentrevs(self, rev):
423 def parentrevs(self, rev):
424 return self.index[rev][5:7]
424 return self.index[rev][5:7]
425
425
426 def node(self, rev):
426 def node(self, rev):
427 return self.index[rev][7]
427 return self.index[rev][7]
428
428
429 # Derived from index values.
429 # Derived from index values.
430
430
431 def end(self, rev):
431 def end(self, rev):
432 return self.start(rev) + self.length(rev)
432 return self.start(rev) + self.length(rev)
433
433
434 def parents(self, node):
434 def parents(self, node):
435 i = self.index
435 i = self.index
436 d = i[self.rev(node)]
436 d = i[self.rev(node)]
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438
438
439 def chainlen(self, rev):
439 def chainlen(self, rev):
440 return self._chaininfo(rev)[0]
440 return self._chaininfo(rev)[0]
441
441
442 def _chaininfo(self, rev):
442 def _chaininfo(self, rev):
443 chaininfocache = self._chaininfocache
443 chaininfocache = self._chaininfocache
444 if rev in chaininfocache:
444 if rev in chaininfocache:
445 return chaininfocache[rev]
445 return chaininfocache[rev]
446 index = self.index
446 index = self.index
447 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
448 iterrev = rev
448 iterrev = rev
449 e = index[iterrev]
449 e = index[iterrev]
450 clen = 0
450 clen = 0
451 compresseddeltalen = 0
451 compresseddeltalen = 0
452 while iterrev != e[3]:
452 while iterrev != e[3]:
453 clen += 1
453 clen += 1
454 compresseddeltalen += e[1]
454 compresseddeltalen += e[1]
455 if generaldelta:
455 if generaldelta:
456 iterrev = e[3]
456 iterrev = e[3]
457 else:
457 else:
458 iterrev -= 1
458 iterrev -= 1
459 if iterrev in chaininfocache:
459 if iterrev in chaininfocache:
460 t = chaininfocache[iterrev]
460 t = chaininfocache[iterrev]
461 clen += t[0]
461 clen += t[0]
462 compresseddeltalen += t[1]
462 compresseddeltalen += t[1]
463 break
463 break
464 e = index[iterrev]
464 e = index[iterrev]
465 else:
465 else:
466 # Add text length of base since decompressing that also takes
466 # Add text length of base since decompressing that also takes
467 # work. For cache hits the length is already included.
467 # work. For cache hits the length is already included.
468 compresseddeltalen += e[1]
468 compresseddeltalen += e[1]
469 r = (clen, compresseddeltalen)
469 r = (clen, compresseddeltalen)
470 chaininfocache[rev] = r
470 chaininfocache[rev] = r
471 return r
471 return r
472
472
473 def _deltachain(self, rev, stoprev=None):
473 def _deltachain(self, rev, stoprev=None):
474 """Obtain the delta chain for a revision.
474 """Obtain the delta chain for a revision.
475
475
476 ``stoprev`` specifies a revision to stop at. If not specified, we
476 ``stoprev`` specifies a revision to stop at. If not specified, we
477 stop at the base of the chain.
477 stop at the base of the chain.
478
478
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 revs in ascending order and ``stopped`` is a bool indicating whether
480 revs in ascending order and ``stopped`` is a bool indicating whether
481 ``stoprev`` was hit.
481 ``stoprev`` was hit.
482 """
482 """
483 chain = []
483 chain = []
484
484
485 # Alias to prevent attribute lookup in tight loop.
485 # Alias to prevent attribute lookup in tight loop.
486 index = self.index
486 index = self.index
487 generaldelta = self._generaldelta
487 generaldelta = self._generaldelta
488
488
489 iterrev = rev
489 iterrev = rev
490 e = index[iterrev]
490 e = index[iterrev]
491 while iterrev != e[3] and iterrev != stoprev:
491 while iterrev != e[3] and iterrev != stoprev:
492 chain.append(iterrev)
492 chain.append(iterrev)
493 if generaldelta:
493 if generaldelta:
494 iterrev = e[3]
494 iterrev = e[3]
495 else:
495 else:
496 iterrev -= 1
496 iterrev -= 1
497 e = index[iterrev]
497 e = index[iterrev]
498
498
499 if iterrev == stoprev:
499 if iterrev == stoprev:
500 stopped = True
500 stopped = True
501 else:
501 else:
502 chain.append(iterrev)
502 chain.append(iterrev)
503 stopped = False
503 stopped = False
504
504
505 chain.reverse()
505 chain.reverse()
506 return chain, stopped
506 return chain, stopped
507
507
508 def ancestors(self, revs, stoprev=0, inclusive=False):
508 def ancestors(self, revs, stoprev=0, inclusive=False):
509 """Generate the ancestors of 'revs' in reverse topological order.
509 """Generate the ancestors of 'revs' in reverse topological order.
510 Does not generate revs lower than stoprev.
510 Does not generate revs lower than stoprev.
511
511
512 See the documentation for ancestor.lazyancestors for more details."""
512 See the documentation for ancestor.lazyancestors for more details."""
513
513
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 inclusive=inclusive)
515 inclusive=inclusive)
516
516
517 def descendants(self, revs):
517 def descendants(self, revs):
518 """Generate the descendants of 'revs' in revision order.
518 """Generate the descendants of 'revs' in revision order.
519
519
520 Yield a sequence of revision numbers starting with a child of
520 Yield a sequence of revision numbers starting with a child of
521 some rev in revs, i.e., each revision is *not* considered a
521 some rev in revs, i.e., each revision is *not* considered a
522 descendant of itself. Results are ordered by revision number (a
522 descendant of itself. Results are ordered by revision number (a
523 topological sort)."""
523 topological sort)."""
524 first = min(revs)
524 first = min(revs)
525 if first == nullrev:
525 if first == nullrev:
526 for i in self:
526 for i in self:
527 yield i
527 yield i
528 return
528 return
529
529
530 seen = set(revs)
530 seen = set(revs)
531 for i in self.revs(start=first + 1):
531 for i in self.revs(start=first + 1):
532 for x in self.parentrevs(i):
532 for x in self.parentrevs(i):
533 if x != nullrev and x in seen:
533 if x != nullrev and x in seen:
534 seen.add(i)
534 seen.add(i)
535 yield i
535 yield i
536 break
536 break
537
537
538 def findcommonmissing(self, common=None, heads=None):
538 def findcommonmissing(self, common=None, heads=None):
539 """Return a tuple of the ancestors of common and the ancestors of heads
539 """Return a tuple of the ancestors of common and the ancestors of heads
540 that are not ancestors of common. In revset terminology, we return the
540 that are not ancestors of common. In revset terminology, we return the
541 tuple:
541 tuple:
542
542
543 ::common, (::heads) - (::common)
543 ::common, (::heads) - (::common)
544
544
545 The list is sorted by revision number, meaning it is
545 The list is sorted by revision number, meaning it is
546 topologically sorted.
546 topologically sorted.
547
547
548 'heads' and 'common' are both lists of node IDs. If heads is
548 'heads' and 'common' are both lists of node IDs. If heads is
549 not supplied, uses all of the revlog's heads. If common is not
549 not supplied, uses all of the revlog's heads. If common is not
550 supplied, uses nullid."""
550 supplied, uses nullid."""
551 if common is None:
551 if common is None:
552 common = [nullid]
552 common = [nullid]
553 if heads is None:
553 if heads is None:
554 heads = self.heads()
554 heads = self.heads()
555
555
556 common = [self.rev(n) for n in common]
556 common = [self.rev(n) for n in common]
557 heads = [self.rev(n) for n in heads]
557 heads = [self.rev(n) for n in heads]
558
558
559 # we want the ancestors, but inclusive
559 # we want the ancestors, but inclusive
560 class lazyset(object):
560 class lazyset(object):
561 def __init__(self, lazyvalues):
561 def __init__(self, lazyvalues):
562 self.addedvalues = set()
562 self.addedvalues = set()
563 self.lazyvalues = lazyvalues
563 self.lazyvalues = lazyvalues
564
564
565 def __contains__(self, value):
565 def __contains__(self, value):
566 return value in self.addedvalues or value in self.lazyvalues
566 return value in self.addedvalues or value in self.lazyvalues
567
567
568 def __iter__(self):
568 def __iter__(self):
569 added = self.addedvalues
569 added = self.addedvalues
570 for r in added:
570 for r in added:
571 yield r
571 yield r
572 for r in self.lazyvalues:
572 for r in self.lazyvalues:
573 if not r in added:
573 if not r in added:
574 yield r
574 yield r
575
575
576 def add(self, value):
576 def add(self, value):
577 self.addedvalues.add(value)
577 self.addedvalues.add(value)
578
578
579 def update(self, values):
579 def update(self, values):
580 self.addedvalues.update(values)
580 self.addedvalues.update(values)
581
581
582 has = lazyset(self.ancestors(common))
582 has = lazyset(self.ancestors(common))
583 has.add(nullrev)
583 has.add(nullrev)
584 has.update(common)
584 has.update(common)
585
585
586 # take all ancestors from heads that aren't in has
586 # take all ancestors from heads that aren't in has
587 missing = set()
587 missing = set()
588 visit = collections.deque(r for r in heads if r not in has)
588 visit = collections.deque(r for r in heads if r not in has)
589 while visit:
589 while visit:
590 r = visit.popleft()
590 r = visit.popleft()
591 if r in missing:
591 if r in missing:
592 continue
592 continue
593 else:
593 else:
594 missing.add(r)
594 missing.add(r)
595 for p in self.parentrevs(r):
595 for p in self.parentrevs(r):
596 if p not in has:
596 if p not in has:
597 visit.append(p)
597 visit.append(p)
598 missing = list(missing)
598 missing = list(missing)
599 missing.sort()
599 missing.sort()
600 return has, [self.node(r) for r in missing]
600 return has, [self.node(r) for r in missing]
601
601
602 def incrementalmissingrevs(self, common=None):
602 def incrementalmissingrevs(self, common=None):
603 """Return an object that can be used to incrementally compute the
603 """Return an object that can be used to incrementally compute the
604 revision numbers of the ancestors of arbitrary sets that are not
604 revision numbers of the ancestors of arbitrary sets that are not
605 ancestors of common. This is an ancestor.incrementalmissingancestors
605 ancestors of common. This is an ancestor.incrementalmissingancestors
606 object.
606 object.
607
607
608 'common' is a list of revision numbers. If common is not supplied, uses
608 'common' is a list of revision numbers. If common is not supplied, uses
609 nullrev.
609 nullrev.
610 """
610 """
611 if common is None:
611 if common is None:
612 common = [nullrev]
612 common = [nullrev]
613
613
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615
615
616 def findmissingrevs(self, common=None, heads=None):
616 def findmissingrevs(self, common=None, heads=None):
617 """Return the revision numbers of the ancestors of heads that
617 """Return the revision numbers of the ancestors of heads that
618 are not ancestors of common.
618 are not ancestors of common.
619
619
620 More specifically, return a list of revision numbers corresponding to
620 More specifically, return a list of revision numbers corresponding to
621 nodes N such that every N satisfies the following constraints:
621 nodes N such that every N satisfies the following constraints:
622
622
623 1. N is an ancestor of some node in 'heads'
623 1. N is an ancestor of some node in 'heads'
624 2. N is not an ancestor of any node in 'common'
624 2. N is not an ancestor of any node in 'common'
625
625
626 The list is sorted by revision number, meaning it is
626 The list is sorted by revision number, meaning it is
627 topologically sorted.
627 topologically sorted.
628
628
629 'heads' and 'common' are both lists of revision numbers. If heads is
629 'heads' and 'common' are both lists of revision numbers. If heads is
630 not supplied, uses all of the revlog's heads. If common is not
630 not supplied, uses all of the revlog's heads. If common is not
631 supplied, uses nullid."""
631 supplied, uses nullid."""
632 if common is None:
632 if common is None:
633 common = [nullrev]
633 common = [nullrev]
634 if heads is None:
634 if heads is None:
635 heads = self.headrevs()
635 heads = self.headrevs()
636
636
637 inc = self.incrementalmissingrevs(common=common)
637 inc = self.incrementalmissingrevs(common=common)
638 return inc.missingancestors(heads)
638 return inc.missingancestors(heads)
639
639
640 def findmissing(self, common=None, heads=None):
640 def findmissing(self, common=None, heads=None):
641 """Return the ancestors of heads that are not ancestors of common.
641 """Return the ancestors of heads that are not ancestors of common.
642
642
643 More specifically, return a list of nodes N such that every N
643 More specifically, return a list of nodes N such that every N
644 satisfies the following constraints:
644 satisfies the following constraints:
645
645
646 1. N is an ancestor of some node in 'heads'
646 1. N is an ancestor of some node in 'heads'
647 2. N is not an ancestor of any node in 'common'
647 2. N is not an ancestor of any node in 'common'
648
648
649 The list is sorted by revision number, meaning it is
649 The list is sorted by revision number, meaning it is
650 topologically sorted.
650 topologically sorted.
651
651
652 'heads' and 'common' are both lists of node IDs. If heads is
652 'heads' and 'common' are both lists of node IDs. If heads is
653 not supplied, uses all of the revlog's heads. If common is not
653 not supplied, uses all of the revlog's heads. If common is not
654 supplied, uses nullid."""
654 supplied, uses nullid."""
655 if common is None:
655 if common is None:
656 common = [nullid]
656 common = [nullid]
657 if heads is None:
657 if heads is None:
658 heads = self.heads()
658 heads = self.heads()
659
659
660 common = [self.rev(n) for n in common]
660 common = [self.rev(n) for n in common]
661 heads = [self.rev(n) for n in heads]
661 heads = [self.rev(n) for n in heads]
662
662
663 inc = self.incrementalmissingrevs(common=common)
663 inc = self.incrementalmissingrevs(common=common)
664 return [self.node(r) for r in inc.missingancestors(heads)]
664 return [self.node(r) for r in inc.missingancestors(heads)]
665
665
666 def nodesbetween(self, roots=None, heads=None):
666 def nodesbetween(self, roots=None, heads=None):
667 """Return a topological path from 'roots' to 'heads'.
667 """Return a topological path from 'roots' to 'heads'.
668
668
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 topologically sorted list of all nodes N that satisfy both of
670 topologically sorted list of all nodes N that satisfy both of
671 these constraints:
671 these constraints:
672
672
673 1. N is a descendant of some node in 'roots'
673 1. N is a descendant of some node in 'roots'
674 2. N is an ancestor of some node in 'heads'
674 2. N is an ancestor of some node in 'heads'
675
675
676 Every node is considered to be both a descendant and an ancestor
676 Every node is considered to be both a descendant and an ancestor
677 of itself, so every reachable node in 'roots' and 'heads' will be
677 of itself, so every reachable node in 'roots' and 'heads' will be
678 included in 'nodes'.
678 included in 'nodes'.
679
679
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 subset of 'roots' that is returned in 'nodes'. Likewise,
681 subset of 'roots' that is returned in 'nodes'. Likewise,
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
683
683
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 unspecified, uses nullid as the only root. If 'heads' is
685 unspecified, uses nullid as the only root. If 'heads' is
686 unspecified, uses list of all of the revlog's heads."""
686 unspecified, uses list of all of the revlog's heads."""
687 nonodes = ([], [], [])
687 nonodes = ([], [], [])
688 if roots is not None:
688 if roots is not None:
689 roots = list(roots)
689 roots = list(roots)
690 if not roots:
690 if not roots:
691 return nonodes
691 return nonodes
692 lowestrev = min([self.rev(n) for n in roots])
692 lowestrev = min([self.rev(n) for n in roots])
693 else:
693 else:
694 roots = [nullid] # Everybody's a descendant of nullid
694 roots = [nullid] # Everybody's a descendant of nullid
695 lowestrev = nullrev
695 lowestrev = nullrev
696 if (lowestrev == nullrev) and (heads is None):
696 if (lowestrev == nullrev) and (heads is None):
697 # We want _all_ the nodes!
697 # We want _all_ the nodes!
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 if heads is None:
699 if heads is None:
700 # All nodes are ancestors, so the latest ancestor is the last
700 # All nodes are ancestors, so the latest ancestor is the last
701 # node.
701 # node.
702 highestrev = len(self) - 1
702 highestrev = len(self) - 1
703 # Set ancestors to None to signal that every node is an ancestor.
703 # Set ancestors to None to signal that every node is an ancestor.
704 ancestors = None
704 ancestors = None
705 # Set heads to an empty dictionary for later discovery of heads
705 # Set heads to an empty dictionary for later discovery of heads
706 heads = {}
706 heads = {}
707 else:
707 else:
708 heads = list(heads)
708 heads = list(heads)
709 if not heads:
709 if not heads:
710 return nonodes
710 return nonodes
711 ancestors = set()
711 ancestors = set()
712 # Turn heads into a dictionary so we can remove 'fake' heads.
712 # Turn heads into a dictionary so we can remove 'fake' heads.
713 # Also, later we will be using it to filter out the heads we can't
713 # Also, later we will be using it to filter out the heads we can't
714 # find from roots.
714 # find from roots.
715 heads = dict.fromkeys(heads, False)
715 heads = dict.fromkeys(heads, False)
716 # Start at the top and keep marking parents until we're done.
716 # Start at the top and keep marking parents until we're done.
717 nodestotag = set(heads)
717 nodestotag = set(heads)
718 # Remember where the top was so we can use it as a limit later.
718 # Remember where the top was so we can use it as a limit later.
719 highestrev = max([self.rev(n) for n in nodestotag])
719 highestrev = max([self.rev(n) for n in nodestotag])
720 while nodestotag:
720 while nodestotag:
721 # grab a node to tag
721 # grab a node to tag
722 n = nodestotag.pop()
722 n = nodestotag.pop()
723 # Never tag nullid
723 # Never tag nullid
724 if n == nullid:
724 if n == nullid:
725 continue
725 continue
726 # A node's revision number represents its place in a
726 # A node's revision number represents its place in a
727 # topologically sorted list of nodes.
727 # topologically sorted list of nodes.
728 r = self.rev(n)
728 r = self.rev(n)
729 if r >= lowestrev:
729 if r >= lowestrev:
730 if n not in ancestors:
730 if n not in ancestors:
731 # If we are possibly a descendant of one of the roots
731 # If we are possibly a descendant of one of the roots
732 # and we haven't already been marked as an ancestor
732 # and we haven't already been marked as an ancestor
733 ancestors.add(n) # Mark as ancestor
733 ancestors.add(n) # Mark as ancestor
734 # Add non-nullid parents to list of nodes to tag.
734 # Add non-nullid parents to list of nodes to tag.
735 nodestotag.update([p for p in self.parents(n) if
735 nodestotag.update([p for p in self.parents(n) if
736 p != nullid])
736 p != nullid])
737 elif n in heads: # We've seen it before, is it a fake head?
737 elif n in heads: # We've seen it before, is it a fake head?
738 # So it is, real heads should not be the ancestors of
738 # So it is, real heads should not be the ancestors of
739 # any other heads.
739 # any other heads.
740 heads.pop(n)
740 heads.pop(n)
741 if not ancestors:
741 if not ancestors:
742 return nonodes
742 return nonodes
743 # Now that we have our set of ancestors, we want to remove any
743 # Now that we have our set of ancestors, we want to remove any
744 # roots that are not ancestors.
744 # roots that are not ancestors.
745
745
746 # If one of the roots was nullid, everything is included anyway.
746 # If one of the roots was nullid, everything is included anyway.
747 if lowestrev > nullrev:
747 if lowestrev > nullrev:
748 # But, since we weren't, let's recompute the lowest rev to not
748 # But, since we weren't, let's recompute the lowest rev to not
749 # include roots that aren't ancestors.
749 # include roots that aren't ancestors.
750
750
751 # Filter out roots that aren't ancestors of heads
751 # Filter out roots that aren't ancestors of heads
752 roots = [n for n in roots if n in ancestors]
752 roots = [n for n in roots if n in ancestors]
753 # Recompute the lowest revision
753 # Recompute the lowest revision
754 if roots:
754 if roots:
755 lowestrev = min([self.rev(n) for n in roots])
755 lowestrev = min([self.rev(n) for n in roots])
756 else:
756 else:
757 # No more roots? Return empty list
757 # No more roots? Return empty list
758 return nonodes
758 return nonodes
759 else:
759 else:
760 # We are descending from nullid, and don't need to care about
760 # We are descending from nullid, and don't need to care about
761 # any other roots.
761 # any other roots.
762 lowestrev = nullrev
762 lowestrev = nullrev
763 roots = [nullid]
763 roots = [nullid]
764 # Transform our roots list into a set.
764 # Transform our roots list into a set.
765 descendants = set(roots)
765 descendants = set(roots)
766 # Also, keep the original roots so we can filter out roots that aren't
766 # Also, keep the original roots so we can filter out roots that aren't
767 # 'real' roots (i.e. are descended from other roots).
767 # 'real' roots (i.e. are descended from other roots).
768 roots = descendants.copy()
768 roots = descendants.copy()
769 # Our topologically sorted list of output nodes.
769 # Our topologically sorted list of output nodes.
770 orderedout = []
770 orderedout = []
771 # Don't start at nullid since we don't want nullid in our output list,
771 # Don't start at nullid since we don't want nullid in our output list,
772 # and if nullid shows up in descendants, empty parents will look like
772 # and if nullid shows up in descendants, empty parents will look like
773 # they're descendants.
773 # they're descendants.
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 n = self.node(r)
775 n = self.node(r)
776 isdescendant = False
776 isdescendant = False
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 isdescendant = True
778 isdescendant = True
779 elif n in descendants:
779 elif n in descendants:
780 # n is already a descendant
780 # n is already a descendant
781 isdescendant = True
781 isdescendant = True
782 # This check only needs to be done here because all the roots
782 # This check only needs to be done here because all the roots
783 # will start being marked is descendants before the loop.
783 # will start being marked is descendants before the loop.
784 if n in roots:
784 if n in roots:
785 # If n was a root, check if it's a 'real' root.
785 # If n was a root, check if it's a 'real' root.
786 p = tuple(self.parents(n))
786 p = tuple(self.parents(n))
787 # If any of its parents are descendants, it's not a root.
787 # If any of its parents are descendants, it's not a root.
788 if (p[0] in descendants) or (p[1] in descendants):
788 if (p[0] in descendants) or (p[1] in descendants):
789 roots.remove(n)
789 roots.remove(n)
790 else:
790 else:
791 p = tuple(self.parents(n))
791 p = tuple(self.parents(n))
792 # A node is a descendant if either of its parents are
792 # A node is a descendant if either of its parents are
793 # descendants. (We seeded the dependents list with the roots
793 # descendants. (We seeded the dependents list with the roots
794 # up there, remember?)
794 # up there, remember?)
795 if (p[0] in descendants) or (p[1] in descendants):
795 if (p[0] in descendants) or (p[1] in descendants):
796 descendants.add(n)
796 descendants.add(n)
797 isdescendant = True
797 isdescendant = True
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 # Only include nodes that are both descendants and ancestors.
799 # Only include nodes that are both descendants and ancestors.
800 orderedout.append(n)
800 orderedout.append(n)
801 if (ancestors is not None) and (n in heads):
801 if (ancestors is not None) and (n in heads):
802 # We're trying to figure out which heads are reachable
802 # We're trying to figure out which heads are reachable
803 # from roots.
803 # from roots.
804 # Mark this head as having been reached
804 # Mark this head as having been reached
805 heads[n] = True
805 heads[n] = True
806 elif ancestors is None:
806 elif ancestors is None:
807 # Otherwise, we're trying to discover the heads.
807 # Otherwise, we're trying to discover the heads.
808 # Assume this is a head because if it isn't, the next step
808 # Assume this is a head because if it isn't, the next step
809 # will eventually remove it.
809 # will eventually remove it.
810 heads[n] = True
810 heads[n] = True
811 # But, obviously its parents aren't.
811 # But, obviously its parents aren't.
812 for p in self.parents(n):
812 for p in self.parents(n):
813 heads.pop(p, None)
813 heads.pop(p, None)
814 heads = [n for n, flag in heads.iteritems() if flag]
814 heads = [n for n, flag in heads.iteritems() if flag]
815 roots = list(roots)
815 roots = list(roots)
816 assert orderedout
816 assert orderedout
817 assert roots
817 assert roots
818 assert heads
818 assert heads
819 return (orderedout, roots, heads)
819 return (orderedout, roots, heads)
820
820
821 def headrevs(self):
821 def headrevs(self):
822 try:
822 try:
823 return self.index.headrevs()
823 return self.index.headrevs()
824 except AttributeError:
824 except AttributeError:
825 return self._headrevs()
825 return self._headrevs()
826
826
827 def computephases(self, roots):
827 def computephases(self, roots):
828 return self.index.computephasesmapsets(roots)
828 return self.index.computephasesmapsets(roots)
829
829
830 def _headrevs(self):
830 def _headrevs(self):
831 count = len(self)
831 count = len(self)
832 if not count:
832 if not count:
833 return [nullrev]
833 return [nullrev]
834 # we won't iter over filtered rev so nobody is a head at start
834 # we won't iter over filtered rev so nobody is a head at start
835 ishead = [0] * (count + 1)
835 ishead = [0] * (count + 1)
836 index = self.index
836 index = self.index
837 for r in self:
837 for r in self:
838 ishead[r] = 1 # I may be an head
838 ishead[r] = 1 # I may be an head
839 e = index[r]
839 e = index[r]
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 return [r for r, val in enumerate(ishead) if val]
841 return [r for r, val in enumerate(ishead) if val]
842
842
843 def heads(self, start=None, stop=None):
843 def heads(self, start=None, stop=None):
844 """return the list of all nodes that have no children
844 """return the list of all nodes that have no children
845
845
846 if start is specified, only heads that are descendants of
846 if start is specified, only heads that are descendants of
847 start will be returned
847 start will be returned
848 if stop is specified, it will consider all the revs from stop
848 if stop is specified, it will consider all the revs from stop
849 as if they had no children
849 as if they had no children
850 """
850 """
851 if start is None and stop is None:
851 if start is None and stop is None:
852 if not len(self):
852 if not len(self):
853 return [nullid]
853 return [nullid]
854 return [self.node(r) for r in self.headrevs()]
854 return [self.node(r) for r in self.headrevs()]
855
855
856 if start is None:
856 if start is None:
857 start = nullid
857 start = nullid
858 if stop is None:
858 if stop is None:
859 stop = []
859 stop = []
860 stoprevs = set([self.rev(n) for n in stop])
860 stoprevs = set([self.rev(n) for n in stop])
861 startrev = self.rev(start)
861 startrev = self.rev(start)
862 reachable = set((startrev,))
862 reachable = set((startrev,))
863 heads = set((startrev,))
863 heads = set((startrev,))
864
864
865 parentrevs = self.parentrevs
865 parentrevs = self.parentrevs
866 for r in self.revs(start=startrev + 1):
866 for r in self.revs(start=startrev + 1):
867 for p in parentrevs(r):
867 for p in parentrevs(r):
868 if p in reachable:
868 if p in reachable:
869 if r not in stoprevs:
869 if r not in stoprevs:
870 reachable.add(r)
870 reachable.add(r)
871 heads.add(r)
871 heads.add(r)
872 if p in heads and p not in stoprevs:
872 if p in heads and p not in stoprevs:
873 heads.remove(p)
873 heads.remove(p)
874
874
875 return [self.node(r) for r in heads]
875 return [self.node(r) for r in heads]
876
876
877 def children(self, node):
877 def children(self, node):
878 """find the children of a given node"""
878 """find the children of a given node"""
879 c = []
879 c = []
880 p = self.rev(node)
880 p = self.rev(node)
881 for r in self.revs(start=p + 1):
881 for r in self.revs(start=p + 1):
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 if prevs:
883 if prevs:
884 for pr in prevs:
884 for pr in prevs:
885 if pr == p:
885 if pr == p:
886 c.append(self.node(r))
886 c.append(self.node(r))
887 elif p == nullrev:
887 elif p == nullrev:
888 c.append(self.node(r))
888 c.append(self.node(r))
889 return c
889 return c
890
890
891 def descendant(self, start, end):
891 def descendant(self, start, end):
892 if start == nullrev:
892 if start == nullrev:
893 return True
893 return True
894 for i in self.descendants([start]):
894 for i in self.descendants([start]):
895 if i == end:
895 if i == end:
896 return True
896 return True
897 elif i > end:
897 elif i > end:
898 break
898 break
899 return False
899 return False
900
900
901 def commonancestorsheads(self, a, b):
901 def commonancestorsheads(self, a, b):
902 """calculate all the heads of the common ancestors of nodes a and b"""
902 """calculate all the heads of the common ancestors of nodes a and b"""
903 a, b = self.rev(a), self.rev(b)
903 a, b = self.rev(a), self.rev(b)
904 try:
904 try:
905 ancs = self.index.commonancestorsheads(a, b)
905 ancs = self.index.commonancestorsheads(a, b)
906 except (AttributeError, OverflowError): # C implementation failed
906 except (AttributeError, OverflowError): # C implementation failed
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 return map(self.node, ancs)
908 return map(self.node, ancs)
909
909
910 def isancestor(self, a, b):
910 def isancestor(self, a, b):
911 """return True if node a is an ancestor of node b
911 """return True if node a is an ancestor of node b
912
912
913 The implementation of this is trivial but the use of
913 The implementation of this is trivial but the use of
914 commonancestorsheads is not."""
914 commonancestorsheads is not."""
915 return a in self.commonancestorsheads(a, b)
915 return a in self.commonancestorsheads(a, b)
916
916
917 def ancestor(self, a, b):
917 def ancestor(self, a, b):
918 """calculate the "best" common ancestor of nodes a and b"""
918 """calculate the "best" common ancestor of nodes a and b"""
919
919
920 a, b = self.rev(a), self.rev(b)
920 a, b = self.rev(a), self.rev(b)
921 try:
921 try:
922 ancs = self.index.ancestors(a, b)
922 ancs = self.index.ancestors(a, b)
923 except (AttributeError, OverflowError):
923 except (AttributeError, OverflowError):
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 if ancs:
925 if ancs:
926 # choose a consistent winner when there's a tie
926 # choose a consistent winner when there's a tie
927 return min(map(self.node, ancs))
927 return min(map(self.node, ancs))
928 return nullid
928 return nullid
929
929
930 def _match(self, id):
930 def _match(self, id):
931 if isinstance(id, int):
931 if isinstance(id, int):
932 # rev
932 # rev
933 return self.node(id)
933 return self.node(id)
934 if len(id) == 20:
934 if len(id) == 20:
935 # possibly a binary node
935 # possibly a binary node
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 try:
937 try:
938 node = id
938 node = id
939 self.rev(node) # quick search the index
939 self.rev(node) # quick search the index
940 return node
940 return node
941 except LookupError:
941 except LookupError:
942 pass # may be partial hex id
942 pass # may be partial hex id
943 try:
943 try:
944 # str(rev)
944 # str(rev)
945 rev = int(id)
945 rev = int(id)
946 if str(rev) != id:
946 if str(rev) != id:
947 raise ValueError
947 raise ValueError
948 if rev < 0:
948 if rev < 0:
949 rev = len(self) + rev
949 rev = len(self) + rev
950 if rev < 0 or rev >= len(self):
950 if rev < 0 or rev >= len(self):
951 raise ValueError
951 raise ValueError
952 return self.node(rev)
952 return self.node(rev)
953 except (ValueError, OverflowError):
953 except (ValueError, OverflowError):
954 pass
954 pass
955 if len(id) == 40:
955 if len(id) == 40:
956 try:
956 try:
957 # a full hex nodeid?
957 # a full hex nodeid?
958 node = bin(id)
958 node = bin(id)
959 self.rev(node)
959 self.rev(node)
960 return node
960 return node
961 except (TypeError, LookupError):
961 except (TypeError, LookupError):
962 pass
962 pass
963
963
964 def _partialmatch(self, id):
964 def _partialmatch(self, id):
965 try:
965 try:
966 n = self.index.partialmatch(id)
966 n = self.index.partialmatch(id)
967 if n and self.hasnode(n):
967 if n and self.hasnode(n):
968 return n
968 return n
969 return None
969 return None
970 except RevlogError:
970 except RevlogError:
971 # parsers.c radix tree lookup gave multiple matches
971 # parsers.c radix tree lookup gave multiple matches
972 # fast path: for unfiltered changelog, radix tree is accurate
972 # fast path: for unfiltered changelog, radix tree is accurate
973 if not getattr(self, 'filteredrevs', None):
973 if not getattr(self, 'filteredrevs', None):
974 raise LookupError(id, self.indexfile,
974 raise LookupError(id, self.indexfile,
975 _('ambiguous identifier'))
975 _('ambiguous identifier'))
976 # fall through to slow path that filters hidden revisions
976 # fall through to slow path that filters hidden revisions
977 except (AttributeError, ValueError):
977 except (AttributeError, ValueError):
978 # we are pure python, or key was too short to search radix tree
978 # we are pure python, or key was too short to search radix tree
979 pass
979 pass
980
980
981 if id in self._pcache:
981 if id in self._pcache:
982 return self._pcache[id]
982 return self._pcache[id]
983
983
984 if len(id) < 40:
984 if len(id) < 40:
985 try:
985 try:
986 # hex(node)[:...]
986 # hex(node)[:...]
987 l = len(id) // 2 # grab an even number of digits
987 l = len(id) // 2 # grab an even number of digits
988 prefix = bin(id[:l * 2])
988 prefix = bin(id[:l * 2])
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 nl = [n for n in nl if hex(n).startswith(id) and
990 nl = [n for n in nl if hex(n).startswith(id) and
991 self.hasnode(n)]
991 self.hasnode(n)]
992 if len(nl) > 0:
992 if len(nl) > 0:
993 if len(nl) == 1:
993 if len(nl) == 1:
994 self._pcache[id] = nl[0]
994 self._pcache[id] = nl[0]
995 return nl[0]
995 return nl[0]
996 raise LookupError(id, self.indexfile,
996 raise LookupError(id, self.indexfile,
997 _('ambiguous identifier'))
997 _('ambiguous identifier'))
998 return None
998 return None
999 except TypeError:
999 except TypeError:
1000 pass
1000 pass
1001
1001
1002 def lookup(self, id):
1002 def lookup(self, id):
1003 """locate a node based on:
1003 """locate a node based on:
1004 - revision number or str(revision number)
1004 - revision number or str(revision number)
1005 - nodeid or subset of hex nodeid
1005 - nodeid or subset of hex nodeid
1006 """
1006 """
1007 n = self._match(id)
1007 n = self._match(id)
1008 if n is not None:
1008 if n is not None:
1009 return n
1009 return n
1010 n = self._partialmatch(id)
1010 n = self._partialmatch(id)
1011 if n:
1011 if n:
1012 return n
1012 return n
1013
1013
1014 raise LookupError(id, self.indexfile, _('no match found'))
1014 raise LookupError(id, self.indexfile, _('no match found'))
1015
1015
1016 def cmp(self, node, text):
1016 def cmp(self, node, text):
1017 """compare text with a given file revision
1017 """compare text with a given file revision
1018
1018
1019 returns True if text is different than what is stored.
1019 returns True if text is different than what is stored.
1020 """
1020 """
1021 p1, p2 = self.parents(node)
1021 p1, p2 = self.parents(node)
1022 return hash(text, p1, p2) != node
1022 return hash(text, p1, p2) != node
1023
1023
1024 def _addchunk(self, offset, data):
1024 def _addchunk(self, offset, data):
1025 """Add a segment to the revlog cache.
1025 """Add a segment to the revlog cache.
1026
1026
1027 Accepts an absolute offset and the data that is at that location.
1027 Accepts an absolute offset and the data that is at that location.
1028 """
1028 """
1029 o, d = self._chunkcache
1029 o, d = self._chunkcache
1030 # try to add to existing cache
1030 # try to add to existing cache
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 self._chunkcache = o, d + data
1032 self._chunkcache = o, d + data
1033 else:
1033 else:
1034 self._chunkcache = offset, data
1034 self._chunkcache = offset, data
1035
1035
1036 def _loadchunk(self, offset, length, df=None):
1036 def _loadchunk(self, offset, length, df=None):
1037 """Load a segment of raw data from the revlog.
1037 """Load a segment of raw data from the revlog.
1038
1038
1039 Accepts an absolute offset, length to read, and an optional existing
1039 Accepts an absolute offset, length to read, and an optional existing
1040 file handle to read from.
1040 file handle to read from.
1041
1041
1042 If an existing file handle is passed, it will be seeked and the
1042 If an existing file handle is passed, it will be seeked and the
1043 original seek position will NOT be restored.
1043 original seek position will NOT be restored.
1044
1044
1045 Returns a str or buffer of raw byte data.
1045 Returns a str or buffer of raw byte data.
1046 """
1046 """
1047 if df is not None:
1047 if df is not None:
1048 closehandle = False
1048 closehandle = False
1049 else:
1049 else:
1050 if self._inline:
1050 if self._inline:
1051 df = self.opener(self.indexfile)
1051 df = self.opener(self.indexfile)
1052 else:
1052 else:
1053 df = self.opener(self.datafile)
1053 df = self.opener(self.datafile)
1054 closehandle = True
1054 closehandle = True
1055
1055
1056 # Cache data both forward and backward around the requested
1056 # Cache data both forward and backward around the requested
1057 # data, in a fixed size window. This helps speed up operations
1057 # data, in a fixed size window. This helps speed up operations
1058 # involving reading the revlog backwards.
1058 # involving reading the revlog backwards.
1059 cachesize = self._chunkcachesize
1059 cachesize = self._chunkcachesize
1060 realoffset = offset & ~(cachesize - 1)
1060 realoffset = offset & ~(cachesize - 1)
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 - realoffset)
1062 - realoffset)
1063 df.seek(realoffset)
1063 df.seek(realoffset)
1064 d = df.read(reallength)
1064 d = df.read(reallength)
1065 if closehandle:
1065 if closehandle:
1066 df.close()
1066 df.close()
1067 self._addchunk(realoffset, d)
1067 self._addchunk(realoffset, d)
1068 if offset != realoffset or reallength != length:
1068 if offset != realoffset or reallength != length:
1069 return util.buffer(d, offset - realoffset, length)
1069 return util.buffer(d, offset - realoffset, length)
1070 return d
1070 return d
1071
1071
1072 def _getchunk(self, offset, length, df=None):
1072 def _getchunk(self, offset, length, df=None):
1073 """Obtain a segment of raw data from the revlog.
1073 """Obtain a segment of raw data from the revlog.
1074
1074
1075 Accepts an absolute offset, length of bytes to obtain, and an
1075 Accepts an absolute offset, length of bytes to obtain, and an
1076 optional file handle to the already-opened revlog. If the file
1076 optional file handle to the already-opened revlog. If the file
1077 handle is used, it's original seek position will not be preserved.
1077 handle is used, it's original seek position will not be preserved.
1078
1078
1079 Requests for data may be returned from a cache.
1079 Requests for data may be returned from a cache.
1080
1080
1081 Returns a str or a buffer instance of raw byte data.
1081 Returns a str or a buffer instance of raw byte data.
1082 """
1082 """
1083 o, d = self._chunkcache
1083 o, d = self._chunkcache
1084 l = len(d)
1084 l = len(d)
1085
1085
1086 # is it in the cache?
1086 # is it in the cache?
1087 cachestart = offset - o
1087 cachestart = offset - o
1088 cacheend = cachestart + length
1088 cacheend = cachestart + length
1089 if cachestart >= 0 and cacheend <= l:
1089 if cachestart >= 0 and cacheend <= l:
1090 if cachestart == 0 and cacheend == l:
1090 if cachestart == 0 and cacheend == l:
1091 return d # avoid a copy
1091 return d # avoid a copy
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1093
1093
1094 return self._loadchunk(offset, length, df=df)
1094 return self._loadchunk(offset, length, df=df)
1095
1095
1096 def _chunkraw(self, startrev, endrev, df=None):
1096 def _chunkraw(self, startrev, endrev, df=None):
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1098
1098
1099 Accepts the start and end revisions and an optional already-open
1099 Accepts the start and end revisions and an optional already-open
1100 file handle to be used for reading. If the file handle is read, its
1100 file handle to be used for reading. If the file handle is read, its
1101 seek position will not be preserved.
1101 seek position will not be preserved.
1102
1102
1103 Requests for data may be satisfied by a cache.
1103 Requests for data may be satisfied by a cache.
1104
1104
1105 Returns a 2-tuple of (offset, data) for the requested range of
1105 Returns a 2-tuple of (offset, data) for the requested range of
1106 revisions. Offset is the integer offset from the beginning of the
1106 revisions. Offset is the integer offset from the beginning of the
1107 revlog and data is a str or buffer of the raw byte data.
1107 revlog and data is a str or buffer of the raw byte data.
1108
1108
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 to determine where each revision's data begins and ends.
1110 to determine where each revision's data begins and ends.
1111 """
1111 """
1112 start = self.start(startrev)
1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 end = self.end(endrev)
1113 # (functions are expensive).
1114 index = self.index
1115 istart = index[startrev]
1116 iend = index[endrev]
1117 start = int(istart[0] >> 16)
1118 end = int(iend[0] >> 16) + iend[1]
1119
1114 if self._inline:
1120 if self._inline:
1115 start += (startrev + 1) * self._io.size
1121 start += (startrev + 1) * self._io.size
1116 end += (endrev + 1) * self._io.size
1122 end += (endrev + 1) * self._io.size
1117 length = end - start
1123 length = end - start
1118
1124
1119 return start, self._getchunk(start, length, df=df)
1125 return start, self._getchunk(start, length, df=df)
1120
1126
1121 def _chunk(self, rev, df=None):
1127 def _chunk(self, rev, df=None):
1122 """Obtain a single decompressed chunk for a revision.
1128 """Obtain a single decompressed chunk for a revision.
1123
1129
1124 Accepts an integer revision and an optional already-open file handle
1130 Accepts an integer revision and an optional already-open file handle
1125 to be used for reading. If used, the seek position of the file will not
1131 to be used for reading. If used, the seek position of the file will not
1126 be preserved.
1132 be preserved.
1127
1133
1128 Returns a str holding uncompressed data for the requested revision.
1134 Returns a str holding uncompressed data for the requested revision.
1129 """
1135 """
1130 return decompress(self._chunkraw(rev, rev, df=df)[1])
1136 return decompress(self._chunkraw(rev, rev, df=df)[1])
1131
1137
1132 def _chunks(self, revs, df=None):
1138 def _chunks(self, revs, df=None):
1133 """Obtain decompressed chunks for the specified revisions.
1139 """Obtain decompressed chunks for the specified revisions.
1134
1140
1135 Accepts an iterable of numeric revisions that are assumed to be in
1141 Accepts an iterable of numeric revisions that are assumed to be in
1136 ascending order. Also accepts an optional already-open file handle
1142 ascending order. Also accepts an optional already-open file handle
1137 to be used for reading. If used, the seek position of the file will
1143 to be used for reading. If used, the seek position of the file will
1138 not be preserved.
1144 not be preserved.
1139
1145
1140 This function is similar to calling ``self._chunk()`` multiple times,
1146 This function is similar to calling ``self._chunk()`` multiple times,
1141 but is faster.
1147 but is faster.
1142
1148
1143 Returns a list with decompressed data for each requested revision.
1149 Returns a list with decompressed data for each requested revision.
1144 """
1150 """
1145 if not revs:
1151 if not revs:
1146 return []
1152 return []
1147 start = self.start
1153 start = self.start
1148 length = self.length
1154 length = self.length
1149 inline = self._inline
1155 inline = self._inline
1150 iosize = self._io.size
1156 iosize = self._io.size
1151 buffer = util.buffer
1157 buffer = util.buffer
1152
1158
1153 l = []
1159 l = []
1154 ladd = l.append
1160 ladd = l.append
1155
1161
1156 try:
1162 try:
1157 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1163 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1158 except OverflowError:
1164 except OverflowError:
1159 # issue4215 - we can't cache a run of chunks greater than
1165 # issue4215 - we can't cache a run of chunks greater than
1160 # 2G on Windows
1166 # 2G on Windows
1161 return [self._chunk(rev, df=df) for rev in revs]
1167 return [self._chunk(rev, df=df) for rev in revs]
1162
1168
1163 for rev in revs:
1169 for rev in revs:
1164 chunkstart = start(rev)
1170 chunkstart = start(rev)
1165 if inline:
1171 if inline:
1166 chunkstart += (rev + 1) * iosize
1172 chunkstart += (rev + 1) * iosize
1167 chunklength = length(rev)
1173 chunklength = length(rev)
1168 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1174 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1169
1175
1170 return l
1176 return l
1171
1177
1172 def _chunkclear(self):
1178 def _chunkclear(self):
1173 """Clear the raw chunk cache."""
1179 """Clear the raw chunk cache."""
1174 self._chunkcache = (0, '')
1180 self._chunkcache = (0, '')
1175
1181
1176 def deltaparent(self, rev):
1182 def deltaparent(self, rev):
1177 """return deltaparent of the given revision"""
1183 """return deltaparent of the given revision"""
1178 base = self.index[rev][3]
1184 base = self.index[rev][3]
1179 if base == rev:
1185 if base == rev:
1180 return nullrev
1186 return nullrev
1181 elif self._generaldelta:
1187 elif self._generaldelta:
1182 return base
1188 return base
1183 else:
1189 else:
1184 return rev - 1
1190 return rev - 1
1185
1191
1186 def revdiff(self, rev1, rev2):
1192 def revdiff(self, rev1, rev2):
1187 """return or calculate a delta between two revisions"""
1193 """return or calculate a delta between two revisions"""
1188 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1194 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1189 return str(self._chunk(rev2))
1195 return str(self._chunk(rev2))
1190
1196
1191 return mdiff.textdiff(self.revision(rev1),
1197 return mdiff.textdiff(self.revision(rev1),
1192 self.revision(rev2))
1198 self.revision(rev2))
1193
1199
1194 def revision(self, nodeorrev, _df=None):
1200 def revision(self, nodeorrev, _df=None):
1195 """return an uncompressed revision of a given node or revision
1201 """return an uncompressed revision of a given node or revision
1196 number.
1202 number.
1197
1203
1198 _df is an existing file handle to read from. It is meant to only be
1204 _df is an existing file handle to read from. It is meant to only be
1199 used internally.
1205 used internally.
1200 """
1206 """
1201 if isinstance(nodeorrev, int):
1207 if isinstance(nodeorrev, int):
1202 rev = nodeorrev
1208 rev = nodeorrev
1203 node = self.node(rev)
1209 node = self.node(rev)
1204 else:
1210 else:
1205 node = nodeorrev
1211 node = nodeorrev
1206 rev = None
1212 rev = None
1207
1213
1208 cachedrev = None
1214 cachedrev = None
1209 if node == nullid:
1215 if node == nullid:
1210 return ""
1216 return ""
1211 if self._cache:
1217 if self._cache:
1212 if self._cache[0] == node:
1218 if self._cache[0] == node:
1213 return self._cache[2]
1219 return self._cache[2]
1214 cachedrev = self._cache[1]
1220 cachedrev = self._cache[1]
1215
1221
1216 # look up what we need to read
1222 # look up what we need to read
1217 text = None
1223 text = None
1218 if rev is None:
1224 if rev is None:
1219 rev = self.rev(node)
1225 rev = self.rev(node)
1220
1226
1221 # check rev flags
1227 # check rev flags
1222 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1228 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1223 raise RevlogError(_('incompatible revision flag %x') %
1229 raise RevlogError(_('incompatible revision flag %x') %
1224 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1230 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1225
1231
1226 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1232 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1227 if stopped:
1233 if stopped:
1228 text = self._cache[2]
1234 text = self._cache[2]
1229
1235
1230 # drop cache to save memory
1236 # drop cache to save memory
1231 self._cache = None
1237 self._cache = None
1232
1238
1233 bins = self._chunks(chain, df=_df)
1239 bins = self._chunks(chain, df=_df)
1234 if text is None:
1240 if text is None:
1235 text = str(bins[0])
1241 text = str(bins[0])
1236 bins = bins[1:]
1242 bins = bins[1:]
1237
1243
1238 text = mdiff.patches(text, bins)
1244 text = mdiff.patches(text, bins)
1239
1245
1240 text = self._checkhash(text, node, rev)
1246 text = self._checkhash(text, node, rev)
1241
1247
1242 self._cache = (node, rev, text)
1248 self._cache = (node, rev, text)
1243 return text
1249 return text
1244
1250
1245 def hash(self, text, p1, p2):
1251 def hash(self, text, p1, p2):
1246 """Compute a node hash.
1252 """Compute a node hash.
1247
1253
1248 Available as a function so that subclasses can replace the hash
1254 Available as a function so that subclasses can replace the hash
1249 as needed.
1255 as needed.
1250 """
1256 """
1251 return hash(text, p1, p2)
1257 return hash(text, p1, p2)
1252
1258
1253 def _checkhash(self, text, node, rev):
1259 def _checkhash(self, text, node, rev):
1254 p1, p2 = self.parents(node)
1260 p1, p2 = self.parents(node)
1255 self.checkhash(text, p1, p2, node, rev)
1261 self.checkhash(text, p1, p2, node, rev)
1256 return text
1262 return text
1257
1263
1258 def checkhash(self, text, p1, p2, node, rev=None):
1264 def checkhash(self, text, p1, p2, node, rev=None):
1259 if node != self.hash(text, p1, p2):
1265 if node != self.hash(text, p1, p2):
1260 revornode = rev
1266 revornode = rev
1261 if revornode is None:
1267 if revornode is None:
1262 revornode = templatefilters.short(hex(node))
1268 revornode = templatefilters.short(hex(node))
1263 raise RevlogError(_("integrity check failed on %s:%s")
1269 raise RevlogError(_("integrity check failed on %s:%s")
1264 % (self.indexfile, revornode))
1270 % (self.indexfile, revornode))
1265
1271
1266 def checkinlinesize(self, tr, fp=None):
1272 def checkinlinesize(self, tr, fp=None):
1267 """Check if the revlog is too big for inline and convert if so.
1273 """Check if the revlog is too big for inline and convert if so.
1268
1274
1269 This should be called after revisions are added to the revlog. If the
1275 This should be called after revisions are added to the revlog. If the
1270 revlog has grown too large to be an inline revlog, it will convert it
1276 revlog has grown too large to be an inline revlog, it will convert it
1271 to use multiple index and data files.
1277 to use multiple index and data files.
1272 """
1278 """
1273 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1279 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1274 return
1280 return
1275
1281
1276 trinfo = tr.find(self.indexfile)
1282 trinfo = tr.find(self.indexfile)
1277 if trinfo is None:
1283 if trinfo is None:
1278 raise RevlogError(_("%s not found in the transaction")
1284 raise RevlogError(_("%s not found in the transaction")
1279 % self.indexfile)
1285 % self.indexfile)
1280
1286
1281 trindex = trinfo[2]
1287 trindex = trinfo[2]
1282 if trindex is not None:
1288 if trindex is not None:
1283 dataoff = self.start(trindex)
1289 dataoff = self.start(trindex)
1284 else:
1290 else:
1285 # revlog was stripped at start of transaction, use all leftover data
1291 # revlog was stripped at start of transaction, use all leftover data
1286 trindex = len(self) - 1
1292 trindex = len(self) - 1
1287 dataoff = self.end(-2)
1293 dataoff = self.end(-2)
1288
1294
1289 tr.add(self.datafile, dataoff)
1295 tr.add(self.datafile, dataoff)
1290
1296
1291 if fp:
1297 if fp:
1292 fp.flush()
1298 fp.flush()
1293 fp.close()
1299 fp.close()
1294
1300
1295 df = self.opener(self.datafile, 'w')
1301 df = self.opener(self.datafile, 'w')
1296 try:
1302 try:
1297 for r in self:
1303 for r in self:
1298 df.write(self._chunkraw(r, r)[1])
1304 df.write(self._chunkraw(r, r)[1])
1299 finally:
1305 finally:
1300 df.close()
1306 df.close()
1301
1307
1302 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1308 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1303 checkambig=self._checkambig)
1309 checkambig=self._checkambig)
1304 self.version &= ~(REVLOGNGINLINEDATA)
1310 self.version &= ~(REVLOGNGINLINEDATA)
1305 self._inline = False
1311 self._inline = False
1306 for i in self:
1312 for i in self:
1307 e = self._io.packentry(self.index[i], self.node, self.version, i)
1313 e = self._io.packentry(self.index[i], self.node, self.version, i)
1308 fp.write(e)
1314 fp.write(e)
1309
1315
1310 # if we don't call close, the temp file will never replace the
1316 # if we don't call close, the temp file will never replace the
1311 # real index
1317 # real index
1312 fp.close()
1318 fp.close()
1313
1319
1314 tr.replace(self.indexfile, trindex * self._io.size)
1320 tr.replace(self.indexfile, trindex * self._io.size)
1315 self._chunkclear()
1321 self._chunkclear()
1316
1322
1317 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1323 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1318 node=None):
1324 node=None):
1319 """add a revision to the log
1325 """add a revision to the log
1320
1326
1321 text - the revision data to add
1327 text - the revision data to add
1322 transaction - the transaction object used for rollback
1328 transaction - the transaction object used for rollback
1323 link - the linkrev data to add
1329 link - the linkrev data to add
1324 p1, p2 - the parent nodeids of the revision
1330 p1, p2 - the parent nodeids of the revision
1325 cachedelta - an optional precomputed delta
1331 cachedelta - an optional precomputed delta
1326 node - nodeid of revision; typically node is not specified, and it is
1332 node - nodeid of revision; typically node is not specified, and it is
1327 computed by default as hash(text, p1, p2), however subclasses might
1333 computed by default as hash(text, p1, p2), however subclasses might
1328 use different hashing method (and override checkhash() in such case)
1334 use different hashing method (and override checkhash() in such case)
1329 """
1335 """
1330 if link == nullrev:
1336 if link == nullrev:
1331 raise RevlogError(_("attempted to add linkrev -1 to %s")
1337 raise RevlogError(_("attempted to add linkrev -1 to %s")
1332 % self.indexfile)
1338 % self.indexfile)
1333
1339
1334 if len(text) > _maxentrysize:
1340 if len(text) > _maxentrysize:
1335 raise RevlogError(
1341 raise RevlogError(
1336 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1342 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1337 % (self.indexfile, len(text)))
1343 % (self.indexfile, len(text)))
1338
1344
1339 node = node or self.hash(text, p1, p2)
1345 node = node or self.hash(text, p1, p2)
1340 if node in self.nodemap:
1346 if node in self.nodemap:
1341 return node
1347 return node
1342
1348
1343 dfh = None
1349 dfh = None
1344 if not self._inline:
1350 if not self._inline:
1345 dfh = self.opener(self.datafile, "a+")
1351 dfh = self.opener(self.datafile, "a+")
1346 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1352 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1347 try:
1353 try:
1348 return self._addrevision(node, text, transaction, link, p1, p2,
1354 return self._addrevision(node, text, transaction, link, p1, p2,
1349 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1355 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1350 finally:
1356 finally:
1351 if dfh:
1357 if dfh:
1352 dfh.close()
1358 dfh.close()
1353 ifh.close()
1359 ifh.close()
1354
1360
1355 def compress(self, text):
1361 def compress(self, text):
1356 """ generate a possibly-compressed representation of text """
1362 """ generate a possibly-compressed representation of text """
1357 if not text:
1363 if not text:
1358 return ("", text)
1364 return ("", text)
1359 l = len(text)
1365 l = len(text)
1360 bin = None
1366 bin = None
1361 if l < 44:
1367 if l < 44:
1362 pass
1368 pass
1363 elif l > 1000000:
1369 elif l > 1000000:
1364 # zlib makes an internal copy, thus doubling memory usage for
1370 # zlib makes an internal copy, thus doubling memory usage for
1365 # large files, so lets do this in pieces
1371 # large files, so lets do this in pieces
1366 z = zlib.compressobj()
1372 z = zlib.compressobj()
1367 p = []
1373 p = []
1368 pos = 0
1374 pos = 0
1369 while pos < l:
1375 while pos < l:
1370 pos2 = pos + 2**20
1376 pos2 = pos + 2**20
1371 p.append(z.compress(text[pos:pos2]))
1377 p.append(z.compress(text[pos:pos2]))
1372 pos = pos2
1378 pos = pos2
1373 p.append(z.flush())
1379 p.append(z.flush())
1374 if sum(map(len, p)) < l:
1380 if sum(map(len, p)) < l:
1375 bin = "".join(p)
1381 bin = "".join(p)
1376 else:
1382 else:
1377 bin = _compress(text)
1383 bin = _compress(text)
1378 if bin is None or len(bin) > l:
1384 if bin is None or len(bin) > l:
1379 if text[0] == '\0':
1385 if text[0] == '\0':
1380 return ("", text)
1386 return ("", text)
1381 return ('u', text)
1387 return ('u', text)
1382 return ("", bin)
1388 return ("", bin)
1383
1389
1384 def _isgooddelta(self, d, textlen):
1390 def _isgooddelta(self, d, textlen):
1385 """Returns True if the given delta is good. Good means that it is within
1391 """Returns True if the given delta is good. Good means that it is within
1386 the disk span, disk size, and chain length bounds that we know to be
1392 the disk span, disk size, and chain length bounds that we know to be
1387 performant."""
1393 performant."""
1388 if d is None:
1394 if d is None:
1389 return False
1395 return False
1390
1396
1391 # - 'dist' is the distance from the base revision -- bounding it limits
1397 # - 'dist' is the distance from the base revision -- bounding it limits
1392 # the amount of I/O we need to do.
1398 # the amount of I/O we need to do.
1393 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1399 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1394 # to apply -- bounding it limits the amount of CPU we consume.
1400 # to apply -- bounding it limits the amount of CPU we consume.
1395 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1401 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1396 if (dist > textlen * 4 or l > textlen or
1402 if (dist > textlen * 4 or l > textlen or
1397 compresseddeltalen > textlen * 2 or
1403 compresseddeltalen > textlen * 2 or
1398 (self._maxchainlen and chainlen > self._maxchainlen)):
1404 (self._maxchainlen and chainlen > self._maxchainlen)):
1399 return False
1405 return False
1400
1406
1401 return True
1407 return True
1402
1408
1403 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1409 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1404 cachedelta, ifh, dfh, alwayscache=False):
1410 cachedelta, ifh, dfh, alwayscache=False):
1405 """internal function to add revisions to the log
1411 """internal function to add revisions to the log
1406
1412
1407 see addrevision for argument descriptions.
1413 see addrevision for argument descriptions.
1408 invariants:
1414 invariants:
1409 - text is optional (can be None); if not set, cachedelta must be set.
1415 - text is optional (can be None); if not set, cachedelta must be set.
1410 if both are set, they must correspond to each other.
1416 if both are set, they must correspond to each other.
1411 """
1417 """
1412 btext = [text]
1418 btext = [text]
1413 def buildtext():
1419 def buildtext():
1414 if btext[0] is not None:
1420 if btext[0] is not None:
1415 return btext[0]
1421 return btext[0]
1416 baserev = cachedelta[0]
1422 baserev = cachedelta[0]
1417 delta = cachedelta[1]
1423 delta = cachedelta[1]
1418 # special case deltas which replace entire base; no need to decode
1424 # special case deltas which replace entire base; no need to decode
1419 # base revision. this neatly avoids censored bases, which throw when
1425 # base revision. this neatly avoids censored bases, which throw when
1420 # they're decoded.
1426 # they're decoded.
1421 hlen = struct.calcsize(">lll")
1427 hlen = struct.calcsize(">lll")
1422 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1428 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1423 len(delta) - hlen):
1429 len(delta) - hlen):
1424 btext[0] = delta[hlen:]
1430 btext[0] = delta[hlen:]
1425 else:
1431 else:
1426 if self._inline:
1432 if self._inline:
1427 fh = ifh
1433 fh = ifh
1428 else:
1434 else:
1429 fh = dfh
1435 fh = dfh
1430 basetext = self.revision(self.node(baserev), _df=fh)
1436 basetext = self.revision(self.node(baserev), _df=fh)
1431 btext[0] = mdiff.patch(basetext, delta)
1437 btext[0] = mdiff.patch(basetext, delta)
1432 try:
1438 try:
1433 self.checkhash(btext[0], p1, p2, node)
1439 self.checkhash(btext[0], p1, p2, node)
1434 if flags & REVIDX_ISCENSORED:
1440 if flags & REVIDX_ISCENSORED:
1435 raise RevlogError(_('node %s is not censored') % node)
1441 raise RevlogError(_('node %s is not censored') % node)
1436 except CensoredNodeError:
1442 except CensoredNodeError:
1437 # must pass the censored index flag to add censored revisions
1443 # must pass the censored index flag to add censored revisions
1438 if not flags & REVIDX_ISCENSORED:
1444 if not flags & REVIDX_ISCENSORED:
1439 raise
1445 raise
1440 return btext[0]
1446 return btext[0]
1441
1447
1442 def builddelta(rev):
1448 def builddelta(rev):
1443 # can we use the cached delta?
1449 # can we use the cached delta?
1444 if cachedelta and cachedelta[0] == rev:
1450 if cachedelta and cachedelta[0] == rev:
1445 delta = cachedelta[1]
1451 delta = cachedelta[1]
1446 else:
1452 else:
1447 t = buildtext()
1453 t = buildtext()
1448 if self.iscensored(rev):
1454 if self.iscensored(rev):
1449 # deltas based on a censored revision must replace the
1455 # deltas based on a censored revision must replace the
1450 # full content in one patch, so delta works everywhere
1456 # full content in one patch, so delta works everywhere
1451 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1457 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1452 delta = header + t
1458 delta = header + t
1453 else:
1459 else:
1454 if self._inline:
1460 if self._inline:
1455 fh = ifh
1461 fh = ifh
1456 else:
1462 else:
1457 fh = dfh
1463 fh = dfh
1458 ptext = self.revision(self.node(rev), _df=fh)
1464 ptext = self.revision(self.node(rev), _df=fh)
1459 delta = mdiff.textdiff(ptext, t)
1465 delta = mdiff.textdiff(ptext, t)
1460 header, data = self.compress(delta)
1466 header, data = self.compress(delta)
1461 deltalen = len(header) + len(data)
1467 deltalen = len(header) + len(data)
1462 chainbase = self.chainbase(rev)
1468 chainbase = self.chainbase(rev)
1463 dist = deltalen + offset - self.start(chainbase)
1469 dist = deltalen + offset - self.start(chainbase)
1464 if self._generaldelta:
1470 if self._generaldelta:
1465 base = rev
1471 base = rev
1466 else:
1472 else:
1467 base = chainbase
1473 base = chainbase
1468 chainlen, compresseddeltalen = self._chaininfo(rev)
1474 chainlen, compresseddeltalen = self._chaininfo(rev)
1469 chainlen += 1
1475 chainlen += 1
1470 compresseddeltalen += deltalen
1476 compresseddeltalen += deltalen
1471 return (dist, deltalen, (header, data), base,
1477 return (dist, deltalen, (header, data), base,
1472 chainbase, chainlen, compresseddeltalen)
1478 chainbase, chainlen, compresseddeltalen)
1473
1479
1474 curr = len(self)
1480 curr = len(self)
1475 prev = curr - 1
1481 prev = curr - 1
1476 offset = self.end(prev)
1482 offset = self.end(prev)
1477 delta = None
1483 delta = None
1478 p1r, p2r = self.rev(p1), self.rev(p2)
1484 p1r, p2r = self.rev(p1), self.rev(p2)
1479
1485
1480 # full versions are inserted when the needed deltas
1486 # full versions are inserted when the needed deltas
1481 # become comparable to the uncompressed text
1487 # become comparable to the uncompressed text
1482 if text is None:
1488 if text is None:
1483 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1489 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1484 cachedelta[1])
1490 cachedelta[1])
1485 else:
1491 else:
1486 textlen = len(text)
1492 textlen = len(text)
1487
1493
1488 # should we try to build a delta?
1494 # should we try to build a delta?
1489 if prev != nullrev and self.storedeltachains:
1495 if prev != nullrev and self.storedeltachains:
1490 tested = set()
1496 tested = set()
1491 # This condition is true most of the time when processing
1497 # This condition is true most of the time when processing
1492 # changegroup data into a generaldelta repo. The only time it
1498 # changegroup data into a generaldelta repo. The only time it
1493 # isn't true is if this is the first revision in a delta chain
1499 # isn't true is if this is the first revision in a delta chain
1494 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1500 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1495 if cachedelta and self._generaldelta and self._lazydeltabase:
1501 if cachedelta and self._generaldelta and self._lazydeltabase:
1496 # Assume what we received from the server is a good choice
1502 # Assume what we received from the server is a good choice
1497 # build delta will reuse the cache
1503 # build delta will reuse the cache
1498 candidatedelta = builddelta(cachedelta[0])
1504 candidatedelta = builddelta(cachedelta[0])
1499 tested.add(cachedelta[0])
1505 tested.add(cachedelta[0])
1500 if self._isgooddelta(candidatedelta, textlen):
1506 if self._isgooddelta(candidatedelta, textlen):
1501 delta = candidatedelta
1507 delta = candidatedelta
1502 if delta is None and self._generaldelta:
1508 if delta is None and self._generaldelta:
1503 # exclude already lazy tested base if any
1509 # exclude already lazy tested base if any
1504 parents = [p for p in (p1r, p2r)
1510 parents = [p for p in (p1r, p2r)
1505 if p != nullrev and p not in tested]
1511 if p != nullrev and p not in tested]
1506 if parents and not self._aggressivemergedeltas:
1512 if parents and not self._aggressivemergedeltas:
1507 # Pick whichever parent is closer to us (to minimize the
1513 # Pick whichever parent is closer to us (to minimize the
1508 # chance of having to build a fulltext).
1514 # chance of having to build a fulltext).
1509 parents = [max(parents)]
1515 parents = [max(parents)]
1510 tested.update(parents)
1516 tested.update(parents)
1511 pdeltas = []
1517 pdeltas = []
1512 for p in parents:
1518 for p in parents:
1513 pd = builddelta(p)
1519 pd = builddelta(p)
1514 if self._isgooddelta(pd, textlen):
1520 if self._isgooddelta(pd, textlen):
1515 pdeltas.append(pd)
1521 pdeltas.append(pd)
1516 if pdeltas:
1522 if pdeltas:
1517 delta = min(pdeltas, key=lambda x: x[1])
1523 delta = min(pdeltas, key=lambda x: x[1])
1518 if delta is None and prev not in tested:
1524 if delta is None and prev not in tested:
1519 # other approach failed try against prev to hopefully save us a
1525 # other approach failed try against prev to hopefully save us a
1520 # fulltext.
1526 # fulltext.
1521 candidatedelta = builddelta(prev)
1527 candidatedelta = builddelta(prev)
1522 if self._isgooddelta(candidatedelta, textlen):
1528 if self._isgooddelta(candidatedelta, textlen):
1523 delta = candidatedelta
1529 delta = candidatedelta
1524 if delta is not None:
1530 if delta is not None:
1525 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1531 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1526 else:
1532 else:
1527 text = buildtext()
1533 text = buildtext()
1528 data = self.compress(text)
1534 data = self.compress(text)
1529 l = len(data[1]) + len(data[0])
1535 l = len(data[1]) + len(data[0])
1530 base = chainbase = curr
1536 base = chainbase = curr
1531
1537
1532 e = (offset_type(offset, flags), l, textlen,
1538 e = (offset_type(offset, flags), l, textlen,
1533 base, link, p1r, p2r, node)
1539 base, link, p1r, p2r, node)
1534 self.index.insert(-1, e)
1540 self.index.insert(-1, e)
1535 self.nodemap[node] = curr
1541 self.nodemap[node] = curr
1536
1542
1537 entry = self._io.packentry(e, self.node, self.version, curr)
1543 entry = self._io.packentry(e, self.node, self.version, curr)
1538 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1544 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1539
1545
1540 if alwayscache and text is None:
1546 if alwayscache and text is None:
1541 text = buildtext()
1547 text = buildtext()
1542
1548
1543 if type(text) == str: # only accept immutable objects
1549 if type(text) == str: # only accept immutable objects
1544 self._cache = (node, curr, text)
1550 self._cache = (node, curr, text)
1545 self._chainbasecache[curr] = chainbase
1551 self._chainbasecache[curr] = chainbase
1546 return node
1552 return node
1547
1553
1548 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1554 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1549 # Files opened in a+ mode have inconsistent behavior on various
1555 # Files opened in a+ mode have inconsistent behavior on various
1550 # platforms. Windows requires that a file positioning call be made
1556 # platforms. Windows requires that a file positioning call be made
1551 # when the file handle transitions between reads and writes. See
1557 # when the file handle transitions between reads and writes. See
1552 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1558 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1553 # platforms, Python or the platform itself can be buggy. Some versions
1559 # platforms, Python or the platform itself can be buggy. Some versions
1554 # of Solaris have been observed to not append at the end of the file
1560 # of Solaris have been observed to not append at the end of the file
1555 # if the file was seeked to before the end. See issue4943 for more.
1561 # if the file was seeked to before the end. See issue4943 for more.
1556 #
1562 #
1557 # We work around this issue by inserting a seek() before writing.
1563 # We work around this issue by inserting a seek() before writing.
1558 # Note: This is likely not necessary on Python 3.
1564 # Note: This is likely not necessary on Python 3.
1559 ifh.seek(0, os.SEEK_END)
1565 ifh.seek(0, os.SEEK_END)
1560 if dfh:
1566 if dfh:
1561 dfh.seek(0, os.SEEK_END)
1567 dfh.seek(0, os.SEEK_END)
1562
1568
1563 curr = len(self) - 1
1569 curr = len(self) - 1
1564 if not self._inline:
1570 if not self._inline:
1565 transaction.add(self.datafile, offset)
1571 transaction.add(self.datafile, offset)
1566 transaction.add(self.indexfile, curr * len(entry))
1572 transaction.add(self.indexfile, curr * len(entry))
1567 if data[0]:
1573 if data[0]:
1568 dfh.write(data[0])
1574 dfh.write(data[0])
1569 dfh.write(data[1])
1575 dfh.write(data[1])
1570 ifh.write(entry)
1576 ifh.write(entry)
1571 else:
1577 else:
1572 offset += curr * self._io.size
1578 offset += curr * self._io.size
1573 transaction.add(self.indexfile, offset, curr)
1579 transaction.add(self.indexfile, offset, curr)
1574 ifh.write(entry)
1580 ifh.write(entry)
1575 ifh.write(data[0])
1581 ifh.write(data[0])
1576 ifh.write(data[1])
1582 ifh.write(data[1])
1577 self.checkinlinesize(transaction, ifh)
1583 self.checkinlinesize(transaction, ifh)
1578
1584
1579 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1585 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1580 """
1586 """
1581 add a delta group
1587 add a delta group
1582
1588
1583 given a set of deltas, add them to the revision log. the
1589 given a set of deltas, add them to the revision log. the
1584 first delta is against its parent, which should be in our
1590 first delta is against its parent, which should be in our
1585 log, the rest are against the previous delta.
1591 log, the rest are against the previous delta.
1586
1592
1587 If ``addrevisioncb`` is defined, it will be called with arguments of
1593 If ``addrevisioncb`` is defined, it will be called with arguments of
1588 this revlog and the node that was added.
1594 this revlog and the node that was added.
1589 """
1595 """
1590
1596
1591 # track the base of the current delta log
1597 # track the base of the current delta log
1592 content = []
1598 content = []
1593 node = None
1599 node = None
1594
1600
1595 r = len(self)
1601 r = len(self)
1596 end = 0
1602 end = 0
1597 if r:
1603 if r:
1598 end = self.end(r - 1)
1604 end = self.end(r - 1)
1599 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1605 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1600 isize = r * self._io.size
1606 isize = r * self._io.size
1601 if self._inline:
1607 if self._inline:
1602 transaction.add(self.indexfile, end + isize, r)
1608 transaction.add(self.indexfile, end + isize, r)
1603 dfh = None
1609 dfh = None
1604 else:
1610 else:
1605 transaction.add(self.indexfile, isize, r)
1611 transaction.add(self.indexfile, isize, r)
1606 transaction.add(self.datafile, end)
1612 transaction.add(self.datafile, end)
1607 dfh = self.opener(self.datafile, "a+")
1613 dfh = self.opener(self.datafile, "a+")
1608 def flush():
1614 def flush():
1609 if dfh:
1615 if dfh:
1610 dfh.flush()
1616 dfh.flush()
1611 ifh.flush()
1617 ifh.flush()
1612 try:
1618 try:
1613 # loop through our set of deltas
1619 # loop through our set of deltas
1614 chain = None
1620 chain = None
1615 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1621 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1616 node = chunkdata['node']
1622 node = chunkdata['node']
1617 p1 = chunkdata['p1']
1623 p1 = chunkdata['p1']
1618 p2 = chunkdata['p2']
1624 p2 = chunkdata['p2']
1619 cs = chunkdata['cs']
1625 cs = chunkdata['cs']
1620 deltabase = chunkdata['deltabase']
1626 deltabase = chunkdata['deltabase']
1621 delta = chunkdata['delta']
1627 delta = chunkdata['delta']
1622 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1628 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1623
1629
1624 content.append(node)
1630 content.append(node)
1625
1631
1626 link = linkmapper(cs)
1632 link = linkmapper(cs)
1627 if node in self.nodemap:
1633 if node in self.nodemap:
1628 # this can happen if two branches make the same change
1634 # this can happen if two branches make the same change
1629 chain = node
1635 chain = node
1630 continue
1636 continue
1631
1637
1632 for p in (p1, p2):
1638 for p in (p1, p2):
1633 if p not in self.nodemap:
1639 if p not in self.nodemap:
1634 raise LookupError(p, self.indexfile,
1640 raise LookupError(p, self.indexfile,
1635 _('unknown parent'))
1641 _('unknown parent'))
1636
1642
1637 if deltabase not in self.nodemap:
1643 if deltabase not in self.nodemap:
1638 raise LookupError(deltabase, self.indexfile,
1644 raise LookupError(deltabase, self.indexfile,
1639 _('unknown delta base'))
1645 _('unknown delta base'))
1640
1646
1641 baserev = self.rev(deltabase)
1647 baserev = self.rev(deltabase)
1642
1648
1643 if baserev != nullrev and self.iscensored(baserev):
1649 if baserev != nullrev and self.iscensored(baserev):
1644 # if base is censored, delta must be full replacement in a
1650 # if base is censored, delta must be full replacement in a
1645 # single patch operation
1651 # single patch operation
1646 hlen = struct.calcsize(">lll")
1652 hlen = struct.calcsize(">lll")
1647 oldlen = self.rawsize(baserev)
1653 oldlen = self.rawsize(baserev)
1648 newlen = len(delta) - hlen
1654 newlen = len(delta) - hlen
1649 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1655 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1650 raise error.CensoredBaseError(self.indexfile,
1656 raise error.CensoredBaseError(self.indexfile,
1651 self.node(baserev))
1657 self.node(baserev))
1652
1658
1653 if not flags and self._peek_iscensored(baserev, delta, flush):
1659 if not flags and self._peek_iscensored(baserev, delta, flush):
1654 flags |= REVIDX_ISCENSORED
1660 flags |= REVIDX_ISCENSORED
1655
1661
1656 # We assume consumers of addrevisioncb will want to retrieve
1662 # We assume consumers of addrevisioncb will want to retrieve
1657 # the added revision, which will require a call to
1663 # the added revision, which will require a call to
1658 # revision(). revision() will fast path if there is a cache
1664 # revision(). revision() will fast path if there is a cache
1659 # hit. So, we tell _addrevision() to always cache in this case.
1665 # hit. So, we tell _addrevision() to always cache in this case.
1660 chain = self._addrevision(node, None, transaction, link,
1666 chain = self._addrevision(node, None, transaction, link,
1661 p1, p2, flags, (baserev, delta),
1667 p1, p2, flags, (baserev, delta),
1662 ifh, dfh,
1668 ifh, dfh,
1663 alwayscache=bool(addrevisioncb))
1669 alwayscache=bool(addrevisioncb))
1664
1670
1665 if addrevisioncb:
1671 if addrevisioncb:
1666 addrevisioncb(self, chain)
1672 addrevisioncb(self, chain)
1667
1673
1668 if not dfh and not self._inline:
1674 if not dfh and not self._inline:
1669 # addrevision switched from inline to conventional
1675 # addrevision switched from inline to conventional
1670 # reopen the index
1676 # reopen the index
1671 ifh.close()
1677 ifh.close()
1672 dfh = self.opener(self.datafile, "a+")
1678 dfh = self.opener(self.datafile, "a+")
1673 ifh = self.opener(self.indexfile, "a+",
1679 ifh = self.opener(self.indexfile, "a+",
1674 checkambig=self._checkambig)
1680 checkambig=self._checkambig)
1675 finally:
1681 finally:
1676 if dfh:
1682 if dfh:
1677 dfh.close()
1683 dfh.close()
1678 ifh.close()
1684 ifh.close()
1679
1685
1680 return content
1686 return content
1681
1687
1682 def iscensored(self, rev):
1688 def iscensored(self, rev):
1683 """Check if a file revision is censored."""
1689 """Check if a file revision is censored."""
1684 return False
1690 return False
1685
1691
1686 def _peek_iscensored(self, baserev, delta, flush):
1692 def _peek_iscensored(self, baserev, delta, flush):
1687 """Quickly check if a delta produces a censored revision."""
1693 """Quickly check if a delta produces a censored revision."""
1688 return False
1694 return False
1689
1695
1690 def getstrippoint(self, minlink):
1696 def getstrippoint(self, minlink):
1691 """find the minimum rev that must be stripped to strip the linkrev
1697 """find the minimum rev that must be stripped to strip the linkrev
1692
1698
1693 Returns a tuple containing the minimum rev and a set of all revs that
1699 Returns a tuple containing the minimum rev and a set of all revs that
1694 have linkrevs that will be broken by this strip.
1700 have linkrevs that will be broken by this strip.
1695 """
1701 """
1696 brokenrevs = set()
1702 brokenrevs = set()
1697 strippoint = len(self)
1703 strippoint = len(self)
1698
1704
1699 heads = {}
1705 heads = {}
1700 futurelargelinkrevs = set()
1706 futurelargelinkrevs = set()
1701 for head in self.headrevs():
1707 for head in self.headrevs():
1702 headlinkrev = self.linkrev(head)
1708 headlinkrev = self.linkrev(head)
1703 heads[head] = headlinkrev
1709 heads[head] = headlinkrev
1704 if headlinkrev >= minlink:
1710 if headlinkrev >= minlink:
1705 futurelargelinkrevs.add(headlinkrev)
1711 futurelargelinkrevs.add(headlinkrev)
1706
1712
1707 # This algorithm involves walking down the rev graph, starting at the
1713 # This algorithm involves walking down the rev graph, starting at the
1708 # heads. Since the revs are topologically sorted according to linkrev,
1714 # heads. Since the revs are topologically sorted according to linkrev,
1709 # once all head linkrevs are below the minlink, we know there are
1715 # once all head linkrevs are below the minlink, we know there are
1710 # no more revs that could have a linkrev greater than minlink.
1716 # no more revs that could have a linkrev greater than minlink.
1711 # So we can stop walking.
1717 # So we can stop walking.
1712 while futurelargelinkrevs:
1718 while futurelargelinkrevs:
1713 strippoint -= 1
1719 strippoint -= 1
1714 linkrev = heads.pop(strippoint)
1720 linkrev = heads.pop(strippoint)
1715
1721
1716 if linkrev < minlink:
1722 if linkrev < minlink:
1717 brokenrevs.add(strippoint)
1723 brokenrevs.add(strippoint)
1718 else:
1724 else:
1719 futurelargelinkrevs.remove(linkrev)
1725 futurelargelinkrevs.remove(linkrev)
1720
1726
1721 for p in self.parentrevs(strippoint):
1727 for p in self.parentrevs(strippoint):
1722 if p != nullrev:
1728 if p != nullrev:
1723 plinkrev = self.linkrev(p)
1729 plinkrev = self.linkrev(p)
1724 heads[p] = plinkrev
1730 heads[p] = plinkrev
1725 if plinkrev >= minlink:
1731 if plinkrev >= minlink:
1726 futurelargelinkrevs.add(plinkrev)
1732 futurelargelinkrevs.add(plinkrev)
1727
1733
1728 return strippoint, brokenrevs
1734 return strippoint, brokenrevs
1729
1735
1730 def strip(self, minlink, transaction):
1736 def strip(self, minlink, transaction):
1731 """truncate the revlog on the first revision with a linkrev >= minlink
1737 """truncate the revlog on the first revision with a linkrev >= minlink
1732
1738
1733 This function is called when we're stripping revision minlink and
1739 This function is called when we're stripping revision minlink and
1734 its descendants from the repository.
1740 its descendants from the repository.
1735
1741
1736 We have to remove all revisions with linkrev >= minlink, because
1742 We have to remove all revisions with linkrev >= minlink, because
1737 the equivalent changelog revisions will be renumbered after the
1743 the equivalent changelog revisions will be renumbered after the
1738 strip.
1744 strip.
1739
1745
1740 So we truncate the revlog on the first of these revisions, and
1746 So we truncate the revlog on the first of these revisions, and
1741 trust that the caller has saved the revisions that shouldn't be
1747 trust that the caller has saved the revisions that shouldn't be
1742 removed and that it'll re-add them after this truncation.
1748 removed and that it'll re-add them after this truncation.
1743 """
1749 """
1744 if len(self) == 0:
1750 if len(self) == 0:
1745 return
1751 return
1746
1752
1747 rev, _ = self.getstrippoint(minlink)
1753 rev, _ = self.getstrippoint(minlink)
1748 if rev == len(self):
1754 if rev == len(self):
1749 return
1755 return
1750
1756
1751 # first truncate the files on disk
1757 # first truncate the files on disk
1752 end = self.start(rev)
1758 end = self.start(rev)
1753 if not self._inline:
1759 if not self._inline:
1754 transaction.add(self.datafile, end)
1760 transaction.add(self.datafile, end)
1755 end = rev * self._io.size
1761 end = rev * self._io.size
1756 else:
1762 else:
1757 end += rev * self._io.size
1763 end += rev * self._io.size
1758
1764
1759 transaction.add(self.indexfile, end)
1765 transaction.add(self.indexfile, end)
1760
1766
1761 # then reset internal state in memory to forget those revisions
1767 # then reset internal state in memory to forget those revisions
1762 self._cache = None
1768 self._cache = None
1763 self._chaininfocache = {}
1769 self._chaininfocache = {}
1764 self._chunkclear()
1770 self._chunkclear()
1765 for x in xrange(rev, len(self)):
1771 for x in xrange(rev, len(self)):
1766 del self.nodemap[self.node(x)]
1772 del self.nodemap[self.node(x)]
1767
1773
1768 del self.index[rev:-1]
1774 del self.index[rev:-1]
1769
1775
1770 def checksize(self):
1776 def checksize(self):
1771 expected = 0
1777 expected = 0
1772 if len(self):
1778 if len(self):
1773 expected = max(0, self.end(len(self) - 1))
1779 expected = max(0, self.end(len(self) - 1))
1774
1780
1775 try:
1781 try:
1776 f = self.opener(self.datafile)
1782 f = self.opener(self.datafile)
1777 f.seek(0, 2)
1783 f.seek(0, 2)
1778 actual = f.tell()
1784 actual = f.tell()
1779 f.close()
1785 f.close()
1780 dd = actual - expected
1786 dd = actual - expected
1781 except IOError as inst:
1787 except IOError as inst:
1782 if inst.errno != errno.ENOENT:
1788 if inst.errno != errno.ENOENT:
1783 raise
1789 raise
1784 dd = 0
1790 dd = 0
1785
1791
1786 try:
1792 try:
1787 f = self.opener(self.indexfile)
1793 f = self.opener(self.indexfile)
1788 f.seek(0, 2)
1794 f.seek(0, 2)
1789 actual = f.tell()
1795 actual = f.tell()
1790 f.close()
1796 f.close()
1791 s = self._io.size
1797 s = self._io.size
1792 i = max(0, actual // s)
1798 i = max(0, actual // s)
1793 di = actual - (i * s)
1799 di = actual - (i * s)
1794 if self._inline:
1800 if self._inline:
1795 databytes = 0
1801 databytes = 0
1796 for r in self:
1802 for r in self:
1797 databytes += max(0, self.length(r))
1803 databytes += max(0, self.length(r))
1798 dd = 0
1804 dd = 0
1799 di = actual - len(self) * s - databytes
1805 di = actual - len(self) * s - databytes
1800 except IOError as inst:
1806 except IOError as inst:
1801 if inst.errno != errno.ENOENT:
1807 if inst.errno != errno.ENOENT:
1802 raise
1808 raise
1803 di = 0
1809 di = 0
1804
1810
1805 return (dd, di)
1811 return (dd, di)
1806
1812
1807 def files(self):
1813 def files(self):
1808 res = [self.indexfile]
1814 res = [self.indexfile]
1809 if not self._inline:
1815 if not self._inline:
1810 res.append(self.datafile)
1816 res.append(self.datafile)
1811 return res
1817 return res
General Comments 0
You need to be logged in to leave comments. Login now