##// END OF EJS Templates
revlog: make code in builddelta() slightly easier to read...
Gregory Szorc -
r30011:d81fe5af default
parent child Browse files
Show More
@@ -1,1789 +1,1790 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215
215
216 If checkambig, indexfile is opened with checkambig=True at
216 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
217 writing, to avoid file stat ambiguity.
218 """
218 """
219 def __init__(self, opener, indexfile, checkambig=False):
219 def __init__(self, opener, indexfile, checkambig=False):
220 """
220 """
221 create a revlog object
221 create a revlog object
222
222
223 opener is a function that abstracts the file opening operation
223 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
224 and can be used to implement COW semantics or the like.
225 """
225 """
226 self.indexfile = indexfile
226 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
227 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
228 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
229 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
230 # avoid file stat ambiguity.
231 self._checkambig = checkambig
231 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
232 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
233 self._cache = None
234 # Maps rev to chain base rev.
234 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
235 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
237 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
238 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
239 self._chunkcachesize = 65536
240 self._maxchainlen = None
240 self._maxchainlen = None
241 self._aggressivemergedeltas = False
241 self._aggressivemergedeltas = False
242 self.index = []
242 self.index = []
243 # Mapping of partial identifiers to full nodes.
243 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
244 self._pcache = {}
245 # Mapping of revision integer to full node.
245 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
246 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
247 self._nodepos = None
248
248
249 v = REVLOG_DEFAULT_VERSION
249 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
250 opts = getattr(opener, 'options', None)
251 if opts is not None:
251 if opts is not None:
252 if 'revlogv1' in opts:
252 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
253 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
254 v |= REVLOGGENERALDELTA
255 else:
255 else:
256 v = 0
256 v = 0
257 if 'chunkcachesize' in opts:
257 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
258 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
259 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
260 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
261 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
264
265 if self._chunkcachesize <= 0:
265 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
267 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
270 'of 2') % self._chunkcachesize)
271
271
272 indexdata = ''
272 indexdata = ''
273 self._initempty = True
273 self._initempty = True
274 try:
274 try:
275 f = self.opener(self.indexfile)
275 f = self.opener(self.indexfile)
276 indexdata = f.read()
276 indexdata = f.read()
277 f.close()
277 f.close()
278 if len(indexdata) > 0:
278 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
280 self._initempty = False
281 except IOError as inst:
281 except IOError as inst:
282 if inst.errno != errno.ENOENT:
282 if inst.errno != errno.ENOENT:
283 raise
283 raise
284
284
285 self.version = v
285 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
286 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
287 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
288 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
289 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
290 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
292 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
295 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
296 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
297 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
298 % (self.indexfile, fmt))
299
299
300 self._io = revlogio()
300 self._io = revlogio()
301 if self.version == REVLOGV0:
301 if self.version == REVLOGV0:
302 self._io = revlogoldio()
302 self._io = revlogoldio()
303 try:
303 try:
304 d = self._io.parseindex(indexdata, self._inline)
304 d = self._io.parseindex(indexdata, self._inline)
305 except (ValueError, IndexError):
305 except (ValueError, IndexError):
306 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
306 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
307 self.index, nodemap, self._chunkcache = d
307 self.index, nodemap, self._chunkcache = d
308 if nodemap is not None:
308 if nodemap is not None:
309 self.nodemap = self._nodecache = nodemap
309 self.nodemap = self._nodecache = nodemap
310 if not self._chunkcache:
310 if not self._chunkcache:
311 self._chunkclear()
311 self._chunkclear()
312 # revnum -> (chain-length, sum-delta-length)
312 # revnum -> (chain-length, sum-delta-length)
313 self._chaininfocache = {}
313 self._chaininfocache = {}
314
314
315 def tip(self):
315 def tip(self):
316 return self.node(len(self.index) - 2)
316 return self.node(len(self.index) - 2)
317 def __contains__(self, rev):
317 def __contains__(self, rev):
318 return 0 <= rev < len(self)
318 return 0 <= rev < len(self)
319 def __len__(self):
319 def __len__(self):
320 return len(self.index) - 1
320 return len(self.index) - 1
321 def __iter__(self):
321 def __iter__(self):
322 return iter(xrange(len(self)))
322 return iter(xrange(len(self)))
323 def revs(self, start=0, stop=None):
323 def revs(self, start=0, stop=None):
324 """iterate over all rev in this revlog (from start to stop)"""
324 """iterate over all rev in this revlog (from start to stop)"""
325 step = 1
325 step = 1
326 if stop is not None:
326 if stop is not None:
327 if start > stop:
327 if start > stop:
328 step = -1
328 step = -1
329 stop += step
329 stop += step
330 else:
330 else:
331 stop = len(self)
331 stop = len(self)
332 return xrange(start, stop, step)
332 return xrange(start, stop, step)
333
333
334 @util.propertycache
334 @util.propertycache
335 def nodemap(self):
335 def nodemap(self):
336 self.rev(self.node(0))
336 self.rev(self.node(0))
337 return self._nodecache
337 return self._nodecache
338
338
339 def hasnode(self, node):
339 def hasnode(self, node):
340 try:
340 try:
341 self.rev(node)
341 self.rev(node)
342 return True
342 return True
343 except KeyError:
343 except KeyError:
344 return False
344 return False
345
345
346 def clearcaches(self):
346 def clearcaches(self):
347 self._cache = None
347 self._cache = None
348 self._chainbasecache.clear()
348 self._chainbasecache.clear()
349 self._chunkcache = (0, '')
349 self._chunkcache = (0, '')
350 self._pcache = {}
350 self._pcache = {}
351
351
352 try:
352 try:
353 self._nodecache.clearcaches()
353 self._nodecache.clearcaches()
354 except AttributeError:
354 except AttributeError:
355 self._nodecache = {nullid: nullrev}
355 self._nodecache = {nullid: nullrev}
356 self._nodepos = None
356 self._nodepos = None
357
357
358 def rev(self, node):
358 def rev(self, node):
359 try:
359 try:
360 return self._nodecache[node]
360 return self._nodecache[node]
361 except TypeError:
361 except TypeError:
362 raise
362 raise
363 except RevlogError:
363 except RevlogError:
364 # parsers.c radix tree lookup failed
364 # parsers.c radix tree lookup failed
365 raise LookupError(node, self.indexfile, _('no node'))
365 raise LookupError(node, self.indexfile, _('no node'))
366 except KeyError:
366 except KeyError:
367 # pure python cache lookup failed
367 # pure python cache lookup failed
368 n = self._nodecache
368 n = self._nodecache
369 i = self.index
369 i = self.index
370 p = self._nodepos
370 p = self._nodepos
371 if p is None:
371 if p is None:
372 p = len(i) - 2
372 p = len(i) - 2
373 for r in xrange(p, -1, -1):
373 for r in xrange(p, -1, -1):
374 v = i[r][7]
374 v = i[r][7]
375 n[v] = r
375 n[v] = r
376 if v == node:
376 if v == node:
377 self._nodepos = r - 1
377 self._nodepos = r - 1
378 return r
378 return r
379 raise LookupError(node, self.indexfile, _('no node'))
379 raise LookupError(node, self.indexfile, _('no node'))
380
380
381 def node(self, rev):
381 def node(self, rev):
382 return self.index[rev][7]
382 return self.index[rev][7]
383 def linkrev(self, rev):
383 def linkrev(self, rev):
384 return self.index[rev][4]
384 return self.index[rev][4]
385 def parents(self, node):
385 def parents(self, node):
386 i = self.index
386 i = self.index
387 d = i[self.rev(node)]
387 d = i[self.rev(node)]
388 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
388 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
389 def parentrevs(self, rev):
389 def parentrevs(self, rev):
390 return self.index[rev][5:7]
390 return self.index[rev][5:7]
391 def start(self, rev):
391 def start(self, rev):
392 return int(self.index[rev][0] >> 16)
392 return int(self.index[rev][0] >> 16)
393 def end(self, rev):
393 def end(self, rev):
394 return self.start(rev) + self.length(rev)
394 return self.start(rev) + self.length(rev)
395 def length(self, rev):
395 def length(self, rev):
396 return self.index[rev][1]
396 return self.index[rev][1]
397 def chainbase(self, rev):
397 def chainbase(self, rev):
398 base = self._chainbasecache.get(rev)
398 base = self._chainbasecache.get(rev)
399 if base is not None:
399 if base is not None:
400 return base
400 return base
401
401
402 index = self.index
402 index = self.index
403 base = index[rev][3]
403 base = index[rev][3]
404 while base != rev:
404 while base != rev:
405 rev = base
405 rev = base
406 base = index[rev][3]
406 base = index[rev][3]
407
407
408 self._chainbasecache[rev] = base
408 self._chainbasecache[rev] = base
409 return base
409 return base
410 def chainlen(self, rev):
410 def chainlen(self, rev):
411 return self._chaininfo(rev)[0]
411 return self._chaininfo(rev)[0]
412
412
413 def _chaininfo(self, rev):
413 def _chaininfo(self, rev):
414 chaininfocache = self._chaininfocache
414 chaininfocache = self._chaininfocache
415 if rev in chaininfocache:
415 if rev in chaininfocache:
416 return chaininfocache[rev]
416 return chaininfocache[rev]
417 index = self.index
417 index = self.index
418 generaldelta = self._generaldelta
418 generaldelta = self._generaldelta
419 iterrev = rev
419 iterrev = rev
420 e = index[iterrev]
420 e = index[iterrev]
421 clen = 0
421 clen = 0
422 compresseddeltalen = 0
422 compresseddeltalen = 0
423 while iterrev != e[3]:
423 while iterrev != e[3]:
424 clen += 1
424 clen += 1
425 compresseddeltalen += e[1]
425 compresseddeltalen += e[1]
426 if generaldelta:
426 if generaldelta:
427 iterrev = e[3]
427 iterrev = e[3]
428 else:
428 else:
429 iterrev -= 1
429 iterrev -= 1
430 if iterrev in chaininfocache:
430 if iterrev in chaininfocache:
431 t = chaininfocache[iterrev]
431 t = chaininfocache[iterrev]
432 clen += t[0]
432 clen += t[0]
433 compresseddeltalen += t[1]
433 compresseddeltalen += t[1]
434 break
434 break
435 e = index[iterrev]
435 e = index[iterrev]
436 else:
436 else:
437 # Add text length of base since decompressing that also takes
437 # Add text length of base since decompressing that also takes
438 # work. For cache hits the length is already included.
438 # work. For cache hits the length is already included.
439 compresseddeltalen += e[1]
439 compresseddeltalen += e[1]
440 r = (clen, compresseddeltalen)
440 r = (clen, compresseddeltalen)
441 chaininfocache[rev] = r
441 chaininfocache[rev] = r
442 return r
442 return r
443
443
444 def _deltachain(self, rev, stoprev=None):
444 def _deltachain(self, rev, stoprev=None):
445 """Obtain the delta chain for a revision.
445 """Obtain the delta chain for a revision.
446
446
447 ``stoprev`` specifies a revision to stop at. If not specified, we
447 ``stoprev`` specifies a revision to stop at. If not specified, we
448 stop at the base of the chain.
448 stop at the base of the chain.
449
449
450 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
450 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
451 revs in ascending order and ``stopped`` is a bool indicating whether
451 revs in ascending order and ``stopped`` is a bool indicating whether
452 ``stoprev`` was hit.
452 ``stoprev`` was hit.
453 """
453 """
454 chain = []
454 chain = []
455
455
456 # Alias to prevent attribute lookup in tight loop.
456 # Alias to prevent attribute lookup in tight loop.
457 index = self.index
457 index = self.index
458 generaldelta = self._generaldelta
458 generaldelta = self._generaldelta
459
459
460 iterrev = rev
460 iterrev = rev
461 e = index[iterrev]
461 e = index[iterrev]
462 while iterrev != e[3] and iterrev != stoprev:
462 while iterrev != e[3] and iterrev != stoprev:
463 chain.append(iterrev)
463 chain.append(iterrev)
464 if generaldelta:
464 if generaldelta:
465 iterrev = e[3]
465 iterrev = e[3]
466 else:
466 else:
467 iterrev -= 1
467 iterrev -= 1
468 e = index[iterrev]
468 e = index[iterrev]
469
469
470 if iterrev == stoprev:
470 if iterrev == stoprev:
471 stopped = True
471 stopped = True
472 else:
472 else:
473 chain.append(iterrev)
473 chain.append(iterrev)
474 stopped = False
474 stopped = False
475
475
476 chain.reverse()
476 chain.reverse()
477 return chain, stopped
477 return chain, stopped
478
478
479 def flags(self, rev):
479 def flags(self, rev):
480 return self.index[rev][0] & 0xFFFF
480 return self.index[rev][0] & 0xFFFF
481 def rawsize(self, rev):
481 def rawsize(self, rev):
482 """return the length of the uncompressed text for a given revision"""
482 """return the length of the uncompressed text for a given revision"""
483 l = self.index[rev][2]
483 l = self.index[rev][2]
484 if l >= 0:
484 if l >= 0:
485 return l
485 return l
486
486
487 t = self.revision(self.node(rev))
487 t = self.revision(self.node(rev))
488 return len(t)
488 return len(t)
489 size = rawsize
489 size = rawsize
490
490
491 def ancestors(self, revs, stoprev=0, inclusive=False):
491 def ancestors(self, revs, stoprev=0, inclusive=False):
492 """Generate the ancestors of 'revs' in reverse topological order.
492 """Generate the ancestors of 'revs' in reverse topological order.
493 Does not generate revs lower than stoprev.
493 Does not generate revs lower than stoprev.
494
494
495 See the documentation for ancestor.lazyancestors for more details."""
495 See the documentation for ancestor.lazyancestors for more details."""
496
496
497 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
497 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
498 inclusive=inclusive)
498 inclusive=inclusive)
499
499
500 def descendants(self, revs):
500 def descendants(self, revs):
501 """Generate the descendants of 'revs' in revision order.
501 """Generate the descendants of 'revs' in revision order.
502
502
503 Yield a sequence of revision numbers starting with a child of
503 Yield a sequence of revision numbers starting with a child of
504 some rev in revs, i.e., each revision is *not* considered a
504 some rev in revs, i.e., each revision is *not* considered a
505 descendant of itself. Results are ordered by revision number (a
505 descendant of itself. Results are ordered by revision number (a
506 topological sort)."""
506 topological sort)."""
507 first = min(revs)
507 first = min(revs)
508 if first == nullrev:
508 if first == nullrev:
509 for i in self:
509 for i in self:
510 yield i
510 yield i
511 return
511 return
512
512
513 seen = set(revs)
513 seen = set(revs)
514 for i in self.revs(start=first + 1):
514 for i in self.revs(start=first + 1):
515 for x in self.parentrevs(i):
515 for x in self.parentrevs(i):
516 if x != nullrev and x in seen:
516 if x != nullrev and x in seen:
517 seen.add(i)
517 seen.add(i)
518 yield i
518 yield i
519 break
519 break
520
520
521 def findcommonmissing(self, common=None, heads=None):
521 def findcommonmissing(self, common=None, heads=None):
522 """Return a tuple of the ancestors of common and the ancestors of heads
522 """Return a tuple of the ancestors of common and the ancestors of heads
523 that are not ancestors of common. In revset terminology, we return the
523 that are not ancestors of common. In revset terminology, we return the
524 tuple:
524 tuple:
525
525
526 ::common, (::heads) - (::common)
526 ::common, (::heads) - (::common)
527
527
528 The list is sorted by revision number, meaning it is
528 The list is sorted by revision number, meaning it is
529 topologically sorted.
529 topologically sorted.
530
530
531 'heads' and 'common' are both lists of node IDs. If heads is
531 'heads' and 'common' are both lists of node IDs. If heads is
532 not supplied, uses all of the revlog's heads. If common is not
532 not supplied, uses all of the revlog's heads. If common is not
533 supplied, uses nullid."""
533 supplied, uses nullid."""
534 if common is None:
534 if common is None:
535 common = [nullid]
535 common = [nullid]
536 if heads is None:
536 if heads is None:
537 heads = self.heads()
537 heads = self.heads()
538
538
539 common = [self.rev(n) for n in common]
539 common = [self.rev(n) for n in common]
540 heads = [self.rev(n) for n in heads]
540 heads = [self.rev(n) for n in heads]
541
541
542 # we want the ancestors, but inclusive
542 # we want the ancestors, but inclusive
543 class lazyset(object):
543 class lazyset(object):
544 def __init__(self, lazyvalues):
544 def __init__(self, lazyvalues):
545 self.addedvalues = set()
545 self.addedvalues = set()
546 self.lazyvalues = lazyvalues
546 self.lazyvalues = lazyvalues
547
547
548 def __contains__(self, value):
548 def __contains__(self, value):
549 return value in self.addedvalues or value in self.lazyvalues
549 return value in self.addedvalues or value in self.lazyvalues
550
550
551 def __iter__(self):
551 def __iter__(self):
552 added = self.addedvalues
552 added = self.addedvalues
553 for r in added:
553 for r in added:
554 yield r
554 yield r
555 for r in self.lazyvalues:
555 for r in self.lazyvalues:
556 if not r in added:
556 if not r in added:
557 yield r
557 yield r
558
558
559 def add(self, value):
559 def add(self, value):
560 self.addedvalues.add(value)
560 self.addedvalues.add(value)
561
561
562 def update(self, values):
562 def update(self, values):
563 self.addedvalues.update(values)
563 self.addedvalues.update(values)
564
564
565 has = lazyset(self.ancestors(common))
565 has = lazyset(self.ancestors(common))
566 has.add(nullrev)
566 has.add(nullrev)
567 has.update(common)
567 has.update(common)
568
568
569 # take all ancestors from heads that aren't in has
569 # take all ancestors from heads that aren't in has
570 missing = set()
570 missing = set()
571 visit = collections.deque(r for r in heads if r not in has)
571 visit = collections.deque(r for r in heads if r not in has)
572 while visit:
572 while visit:
573 r = visit.popleft()
573 r = visit.popleft()
574 if r in missing:
574 if r in missing:
575 continue
575 continue
576 else:
576 else:
577 missing.add(r)
577 missing.add(r)
578 for p in self.parentrevs(r):
578 for p in self.parentrevs(r):
579 if p not in has:
579 if p not in has:
580 visit.append(p)
580 visit.append(p)
581 missing = list(missing)
581 missing = list(missing)
582 missing.sort()
582 missing.sort()
583 return has, [self.node(r) for r in missing]
583 return has, [self.node(r) for r in missing]
584
584
585 def incrementalmissingrevs(self, common=None):
585 def incrementalmissingrevs(self, common=None):
586 """Return an object that can be used to incrementally compute the
586 """Return an object that can be used to incrementally compute the
587 revision numbers of the ancestors of arbitrary sets that are not
587 revision numbers of the ancestors of arbitrary sets that are not
588 ancestors of common. This is an ancestor.incrementalmissingancestors
588 ancestors of common. This is an ancestor.incrementalmissingancestors
589 object.
589 object.
590
590
591 'common' is a list of revision numbers. If common is not supplied, uses
591 'common' is a list of revision numbers. If common is not supplied, uses
592 nullrev.
592 nullrev.
593 """
593 """
594 if common is None:
594 if common is None:
595 common = [nullrev]
595 common = [nullrev]
596
596
597 return ancestor.incrementalmissingancestors(self.parentrevs, common)
597 return ancestor.incrementalmissingancestors(self.parentrevs, common)
598
598
599 def findmissingrevs(self, common=None, heads=None):
599 def findmissingrevs(self, common=None, heads=None):
600 """Return the revision numbers of the ancestors of heads that
600 """Return the revision numbers of the ancestors of heads that
601 are not ancestors of common.
601 are not ancestors of common.
602
602
603 More specifically, return a list of revision numbers corresponding to
603 More specifically, return a list of revision numbers corresponding to
604 nodes N such that every N satisfies the following constraints:
604 nodes N such that every N satisfies the following constraints:
605
605
606 1. N is an ancestor of some node in 'heads'
606 1. N is an ancestor of some node in 'heads'
607 2. N is not an ancestor of any node in 'common'
607 2. N is not an ancestor of any node in 'common'
608
608
609 The list is sorted by revision number, meaning it is
609 The list is sorted by revision number, meaning it is
610 topologically sorted.
610 topologically sorted.
611
611
612 'heads' and 'common' are both lists of revision numbers. If heads is
612 'heads' and 'common' are both lists of revision numbers. If heads is
613 not supplied, uses all of the revlog's heads. If common is not
613 not supplied, uses all of the revlog's heads. If common is not
614 supplied, uses nullid."""
614 supplied, uses nullid."""
615 if common is None:
615 if common is None:
616 common = [nullrev]
616 common = [nullrev]
617 if heads is None:
617 if heads is None:
618 heads = self.headrevs()
618 heads = self.headrevs()
619
619
620 inc = self.incrementalmissingrevs(common=common)
620 inc = self.incrementalmissingrevs(common=common)
621 return inc.missingancestors(heads)
621 return inc.missingancestors(heads)
622
622
623 def findmissing(self, common=None, heads=None):
623 def findmissing(self, common=None, heads=None):
624 """Return the ancestors of heads that are not ancestors of common.
624 """Return the ancestors of heads that are not ancestors of common.
625
625
626 More specifically, return a list of nodes N such that every N
626 More specifically, return a list of nodes N such that every N
627 satisfies the following constraints:
627 satisfies the following constraints:
628
628
629 1. N is an ancestor of some node in 'heads'
629 1. N is an ancestor of some node in 'heads'
630 2. N is not an ancestor of any node in 'common'
630 2. N is not an ancestor of any node in 'common'
631
631
632 The list is sorted by revision number, meaning it is
632 The list is sorted by revision number, meaning it is
633 topologically sorted.
633 topologically sorted.
634
634
635 'heads' and 'common' are both lists of node IDs. If heads is
635 'heads' and 'common' are both lists of node IDs. If heads is
636 not supplied, uses all of the revlog's heads. If common is not
636 not supplied, uses all of the revlog's heads. If common is not
637 supplied, uses nullid."""
637 supplied, uses nullid."""
638 if common is None:
638 if common is None:
639 common = [nullid]
639 common = [nullid]
640 if heads is None:
640 if heads is None:
641 heads = self.heads()
641 heads = self.heads()
642
642
643 common = [self.rev(n) for n in common]
643 common = [self.rev(n) for n in common]
644 heads = [self.rev(n) for n in heads]
644 heads = [self.rev(n) for n in heads]
645
645
646 inc = self.incrementalmissingrevs(common=common)
646 inc = self.incrementalmissingrevs(common=common)
647 return [self.node(r) for r in inc.missingancestors(heads)]
647 return [self.node(r) for r in inc.missingancestors(heads)]
648
648
649 def nodesbetween(self, roots=None, heads=None):
649 def nodesbetween(self, roots=None, heads=None):
650 """Return a topological path from 'roots' to 'heads'.
650 """Return a topological path from 'roots' to 'heads'.
651
651
652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
653 topologically sorted list of all nodes N that satisfy both of
653 topologically sorted list of all nodes N that satisfy both of
654 these constraints:
654 these constraints:
655
655
656 1. N is a descendant of some node in 'roots'
656 1. N is a descendant of some node in 'roots'
657 2. N is an ancestor of some node in 'heads'
657 2. N is an ancestor of some node in 'heads'
658
658
659 Every node is considered to be both a descendant and an ancestor
659 Every node is considered to be both a descendant and an ancestor
660 of itself, so every reachable node in 'roots' and 'heads' will be
660 of itself, so every reachable node in 'roots' and 'heads' will be
661 included in 'nodes'.
661 included in 'nodes'.
662
662
663 'outroots' is the list of reachable nodes in 'roots', i.e., the
663 'outroots' is the list of reachable nodes in 'roots', i.e., the
664 subset of 'roots' that is returned in 'nodes'. Likewise,
664 subset of 'roots' that is returned in 'nodes'. Likewise,
665 'outheads' is the subset of 'heads' that is also in 'nodes'.
665 'outheads' is the subset of 'heads' that is also in 'nodes'.
666
666
667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
668 unspecified, uses nullid as the only root. If 'heads' is
668 unspecified, uses nullid as the only root. If 'heads' is
669 unspecified, uses list of all of the revlog's heads."""
669 unspecified, uses list of all of the revlog's heads."""
670 nonodes = ([], [], [])
670 nonodes = ([], [], [])
671 if roots is not None:
671 if roots is not None:
672 roots = list(roots)
672 roots = list(roots)
673 if not roots:
673 if not roots:
674 return nonodes
674 return nonodes
675 lowestrev = min([self.rev(n) for n in roots])
675 lowestrev = min([self.rev(n) for n in roots])
676 else:
676 else:
677 roots = [nullid] # Everybody's a descendant of nullid
677 roots = [nullid] # Everybody's a descendant of nullid
678 lowestrev = nullrev
678 lowestrev = nullrev
679 if (lowestrev == nullrev) and (heads is None):
679 if (lowestrev == nullrev) and (heads is None):
680 # We want _all_ the nodes!
680 # We want _all_ the nodes!
681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
682 if heads is None:
682 if heads is None:
683 # All nodes are ancestors, so the latest ancestor is the last
683 # All nodes are ancestors, so the latest ancestor is the last
684 # node.
684 # node.
685 highestrev = len(self) - 1
685 highestrev = len(self) - 1
686 # Set ancestors to None to signal that every node is an ancestor.
686 # Set ancestors to None to signal that every node is an ancestor.
687 ancestors = None
687 ancestors = None
688 # Set heads to an empty dictionary for later discovery of heads
688 # Set heads to an empty dictionary for later discovery of heads
689 heads = {}
689 heads = {}
690 else:
690 else:
691 heads = list(heads)
691 heads = list(heads)
692 if not heads:
692 if not heads:
693 return nonodes
693 return nonodes
694 ancestors = set()
694 ancestors = set()
695 # Turn heads into a dictionary so we can remove 'fake' heads.
695 # Turn heads into a dictionary so we can remove 'fake' heads.
696 # Also, later we will be using it to filter out the heads we can't
696 # Also, later we will be using it to filter out the heads we can't
697 # find from roots.
697 # find from roots.
698 heads = dict.fromkeys(heads, False)
698 heads = dict.fromkeys(heads, False)
699 # Start at the top and keep marking parents until we're done.
699 # Start at the top and keep marking parents until we're done.
700 nodestotag = set(heads)
700 nodestotag = set(heads)
701 # Remember where the top was so we can use it as a limit later.
701 # Remember where the top was so we can use it as a limit later.
702 highestrev = max([self.rev(n) for n in nodestotag])
702 highestrev = max([self.rev(n) for n in nodestotag])
703 while nodestotag:
703 while nodestotag:
704 # grab a node to tag
704 # grab a node to tag
705 n = nodestotag.pop()
705 n = nodestotag.pop()
706 # Never tag nullid
706 # Never tag nullid
707 if n == nullid:
707 if n == nullid:
708 continue
708 continue
709 # A node's revision number represents its place in a
709 # A node's revision number represents its place in a
710 # topologically sorted list of nodes.
710 # topologically sorted list of nodes.
711 r = self.rev(n)
711 r = self.rev(n)
712 if r >= lowestrev:
712 if r >= lowestrev:
713 if n not in ancestors:
713 if n not in ancestors:
714 # If we are possibly a descendant of one of the roots
714 # If we are possibly a descendant of one of the roots
715 # and we haven't already been marked as an ancestor
715 # and we haven't already been marked as an ancestor
716 ancestors.add(n) # Mark as ancestor
716 ancestors.add(n) # Mark as ancestor
717 # Add non-nullid parents to list of nodes to tag.
717 # Add non-nullid parents to list of nodes to tag.
718 nodestotag.update([p for p in self.parents(n) if
718 nodestotag.update([p for p in self.parents(n) if
719 p != nullid])
719 p != nullid])
720 elif n in heads: # We've seen it before, is it a fake head?
720 elif n in heads: # We've seen it before, is it a fake head?
721 # So it is, real heads should not be the ancestors of
721 # So it is, real heads should not be the ancestors of
722 # any other heads.
722 # any other heads.
723 heads.pop(n)
723 heads.pop(n)
724 if not ancestors:
724 if not ancestors:
725 return nonodes
725 return nonodes
726 # Now that we have our set of ancestors, we want to remove any
726 # Now that we have our set of ancestors, we want to remove any
727 # roots that are not ancestors.
727 # roots that are not ancestors.
728
728
729 # If one of the roots was nullid, everything is included anyway.
729 # If one of the roots was nullid, everything is included anyway.
730 if lowestrev > nullrev:
730 if lowestrev > nullrev:
731 # But, since we weren't, let's recompute the lowest rev to not
731 # But, since we weren't, let's recompute the lowest rev to not
732 # include roots that aren't ancestors.
732 # include roots that aren't ancestors.
733
733
734 # Filter out roots that aren't ancestors of heads
734 # Filter out roots that aren't ancestors of heads
735 roots = [n for n in roots if n in ancestors]
735 roots = [n for n in roots if n in ancestors]
736 # Recompute the lowest revision
736 # Recompute the lowest revision
737 if roots:
737 if roots:
738 lowestrev = min([self.rev(n) for n in roots])
738 lowestrev = min([self.rev(n) for n in roots])
739 else:
739 else:
740 # No more roots? Return empty list
740 # No more roots? Return empty list
741 return nonodes
741 return nonodes
742 else:
742 else:
743 # We are descending from nullid, and don't need to care about
743 # We are descending from nullid, and don't need to care about
744 # any other roots.
744 # any other roots.
745 lowestrev = nullrev
745 lowestrev = nullrev
746 roots = [nullid]
746 roots = [nullid]
747 # Transform our roots list into a set.
747 # Transform our roots list into a set.
748 descendants = set(roots)
748 descendants = set(roots)
749 # Also, keep the original roots so we can filter out roots that aren't
749 # Also, keep the original roots so we can filter out roots that aren't
750 # 'real' roots (i.e. are descended from other roots).
750 # 'real' roots (i.e. are descended from other roots).
751 roots = descendants.copy()
751 roots = descendants.copy()
752 # Our topologically sorted list of output nodes.
752 # Our topologically sorted list of output nodes.
753 orderedout = []
753 orderedout = []
754 # Don't start at nullid since we don't want nullid in our output list,
754 # Don't start at nullid since we don't want nullid in our output list,
755 # and if nullid shows up in descendants, empty parents will look like
755 # and if nullid shows up in descendants, empty parents will look like
756 # they're descendants.
756 # they're descendants.
757 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
757 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
758 n = self.node(r)
758 n = self.node(r)
759 isdescendant = False
759 isdescendant = False
760 if lowestrev == nullrev: # Everybody is a descendant of nullid
760 if lowestrev == nullrev: # Everybody is a descendant of nullid
761 isdescendant = True
761 isdescendant = True
762 elif n in descendants:
762 elif n in descendants:
763 # n is already a descendant
763 # n is already a descendant
764 isdescendant = True
764 isdescendant = True
765 # This check only needs to be done here because all the roots
765 # This check only needs to be done here because all the roots
766 # will start being marked is descendants before the loop.
766 # will start being marked is descendants before the loop.
767 if n in roots:
767 if n in roots:
768 # If n was a root, check if it's a 'real' root.
768 # If n was a root, check if it's a 'real' root.
769 p = tuple(self.parents(n))
769 p = tuple(self.parents(n))
770 # If any of its parents are descendants, it's not a root.
770 # If any of its parents are descendants, it's not a root.
771 if (p[0] in descendants) or (p[1] in descendants):
771 if (p[0] in descendants) or (p[1] in descendants):
772 roots.remove(n)
772 roots.remove(n)
773 else:
773 else:
774 p = tuple(self.parents(n))
774 p = tuple(self.parents(n))
775 # A node is a descendant if either of its parents are
775 # A node is a descendant if either of its parents are
776 # descendants. (We seeded the dependents list with the roots
776 # descendants. (We seeded the dependents list with the roots
777 # up there, remember?)
777 # up there, remember?)
778 if (p[0] in descendants) or (p[1] in descendants):
778 if (p[0] in descendants) or (p[1] in descendants):
779 descendants.add(n)
779 descendants.add(n)
780 isdescendant = True
780 isdescendant = True
781 if isdescendant and ((ancestors is None) or (n in ancestors)):
781 if isdescendant and ((ancestors is None) or (n in ancestors)):
782 # Only include nodes that are both descendants and ancestors.
782 # Only include nodes that are both descendants and ancestors.
783 orderedout.append(n)
783 orderedout.append(n)
784 if (ancestors is not None) and (n in heads):
784 if (ancestors is not None) and (n in heads):
785 # We're trying to figure out which heads are reachable
785 # We're trying to figure out which heads are reachable
786 # from roots.
786 # from roots.
787 # Mark this head as having been reached
787 # Mark this head as having been reached
788 heads[n] = True
788 heads[n] = True
789 elif ancestors is None:
789 elif ancestors is None:
790 # Otherwise, we're trying to discover the heads.
790 # Otherwise, we're trying to discover the heads.
791 # Assume this is a head because if it isn't, the next step
791 # Assume this is a head because if it isn't, the next step
792 # will eventually remove it.
792 # will eventually remove it.
793 heads[n] = True
793 heads[n] = True
794 # But, obviously its parents aren't.
794 # But, obviously its parents aren't.
795 for p in self.parents(n):
795 for p in self.parents(n):
796 heads.pop(p, None)
796 heads.pop(p, None)
797 heads = [n for n, flag in heads.iteritems() if flag]
797 heads = [n for n, flag in heads.iteritems() if flag]
798 roots = list(roots)
798 roots = list(roots)
799 assert orderedout
799 assert orderedout
800 assert roots
800 assert roots
801 assert heads
801 assert heads
802 return (orderedout, roots, heads)
802 return (orderedout, roots, heads)
803
803
804 def headrevs(self):
804 def headrevs(self):
805 try:
805 try:
806 return self.index.headrevs()
806 return self.index.headrevs()
807 except AttributeError:
807 except AttributeError:
808 return self._headrevs()
808 return self._headrevs()
809
809
810 def computephases(self, roots):
810 def computephases(self, roots):
811 return self.index.computephasesmapsets(roots)
811 return self.index.computephasesmapsets(roots)
812
812
813 def _headrevs(self):
813 def _headrevs(self):
814 count = len(self)
814 count = len(self)
815 if not count:
815 if not count:
816 return [nullrev]
816 return [nullrev]
817 # we won't iter over filtered rev so nobody is a head at start
817 # we won't iter over filtered rev so nobody is a head at start
818 ishead = [0] * (count + 1)
818 ishead = [0] * (count + 1)
819 index = self.index
819 index = self.index
820 for r in self:
820 for r in self:
821 ishead[r] = 1 # I may be an head
821 ishead[r] = 1 # I may be an head
822 e = index[r]
822 e = index[r]
823 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
823 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
824 return [r for r, val in enumerate(ishead) if val]
824 return [r for r, val in enumerate(ishead) if val]
825
825
826 def heads(self, start=None, stop=None):
826 def heads(self, start=None, stop=None):
827 """return the list of all nodes that have no children
827 """return the list of all nodes that have no children
828
828
829 if start is specified, only heads that are descendants of
829 if start is specified, only heads that are descendants of
830 start will be returned
830 start will be returned
831 if stop is specified, it will consider all the revs from stop
831 if stop is specified, it will consider all the revs from stop
832 as if they had no children
832 as if they had no children
833 """
833 """
834 if start is None and stop is None:
834 if start is None and stop is None:
835 if not len(self):
835 if not len(self):
836 return [nullid]
836 return [nullid]
837 return [self.node(r) for r in self.headrevs()]
837 return [self.node(r) for r in self.headrevs()]
838
838
839 if start is None:
839 if start is None:
840 start = nullid
840 start = nullid
841 if stop is None:
841 if stop is None:
842 stop = []
842 stop = []
843 stoprevs = set([self.rev(n) for n in stop])
843 stoprevs = set([self.rev(n) for n in stop])
844 startrev = self.rev(start)
844 startrev = self.rev(start)
845 reachable = set((startrev,))
845 reachable = set((startrev,))
846 heads = set((startrev,))
846 heads = set((startrev,))
847
847
848 parentrevs = self.parentrevs
848 parentrevs = self.parentrevs
849 for r in self.revs(start=startrev + 1):
849 for r in self.revs(start=startrev + 1):
850 for p in parentrevs(r):
850 for p in parentrevs(r):
851 if p in reachable:
851 if p in reachable:
852 if r not in stoprevs:
852 if r not in stoprevs:
853 reachable.add(r)
853 reachable.add(r)
854 heads.add(r)
854 heads.add(r)
855 if p in heads and p not in stoprevs:
855 if p in heads and p not in stoprevs:
856 heads.remove(p)
856 heads.remove(p)
857
857
858 return [self.node(r) for r in heads]
858 return [self.node(r) for r in heads]
859
859
860 def children(self, node):
860 def children(self, node):
861 """find the children of a given node"""
861 """find the children of a given node"""
862 c = []
862 c = []
863 p = self.rev(node)
863 p = self.rev(node)
864 for r in self.revs(start=p + 1):
864 for r in self.revs(start=p + 1):
865 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
865 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
866 if prevs:
866 if prevs:
867 for pr in prevs:
867 for pr in prevs:
868 if pr == p:
868 if pr == p:
869 c.append(self.node(r))
869 c.append(self.node(r))
870 elif p == nullrev:
870 elif p == nullrev:
871 c.append(self.node(r))
871 c.append(self.node(r))
872 return c
872 return c
873
873
874 def descendant(self, start, end):
874 def descendant(self, start, end):
875 if start == nullrev:
875 if start == nullrev:
876 return True
876 return True
877 for i in self.descendants([start]):
877 for i in self.descendants([start]):
878 if i == end:
878 if i == end:
879 return True
879 return True
880 elif i > end:
880 elif i > end:
881 break
881 break
882 return False
882 return False
883
883
884 def commonancestorsheads(self, a, b):
884 def commonancestorsheads(self, a, b):
885 """calculate all the heads of the common ancestors of nodes a and b"""
885 """calculate all the heads of the common ancestors of nodes a and b"""
886 a, b = self.rev(a), self.rev(b)
886 a, b = self.rev(a), self.rev(b)
887 try:
887 try:
888 ancs = self.index.commonancestorsheads(a, b)
888 ancs = self.index.commonancestorsheads(a, b)
889 except (AttributeError, OverflowError): # C implementation failed
889 except (AttributeError, OverflowError): # C implementation failed
890 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
890 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
891 return map(self.node, ancs)
891 return map(self.node, ancs)
892
892
893 def isancestor(self, a, b):
893 def isancestor(self, a, b):
894 """return True if node a is an ancestor of node b
894 """return True if node a is an ancestor of node b
895
895
896 The implementation of this is trivial but the use of
896 The implementation of this is trivial but the use of
897 commonancestorsheads is not."""
897 commonancestorsheads is not."""
898 return a in self.commonancestorsheads(a, b)
898 return a in self.commonancestorsheads(a, b)
899
899
900 def ancestor(self, a, b):
900 def ancestor(self, a, b):
901 """calculate the "best" common ancestor of nodes a and b"""
901 """calculate the "best" common ancestor of nodes a and b"""
902
902
903 a, b = self.rev(a), self.rev(b)
903 a, b = self.rev(a), self.rev(b)
904 try:
904 try:
905 ancs = self.index.ancestors(a, b)
905 ancs = self.index.ancestors(a, b)
906 except (AttributeError, OverflowError):
906 except (AttributeError, OverflowError):
907 ancs = ancestor.ancestors(self.parentrevs, a, b)
907 ancs = ancestor.ancestors(self.parentrevs, a, b)
908 if ancs:
908 if ancs:
909 # choose a consistent winner when there's a tie
909 # choose a consistent winner when there's a tie
910 return min(map(self.node, ancs))
910 return min(map(self.node, ancs))
911 return nullid
911 return nullid
912
912
913 def _match(self, id):
913 def _match(self, id):
914 if isinstance(id, int):
914 if isinstance(id, int):
915 # rev
915 # rev
916 return self.node(id)
916 return self.node(id)
917 if len(id) == 20:
917 if len(id) == 20:
918 # possibly a binary node
918 # possibly a binary node
919 # odds of a binary node being all hex in ASCII are 1 in 10**25
919 # odds of a binary node being all hex in ASCII are 1 in 10**25
920 try:
920 try:
921 node = id
921 node = id
922 self.rev(node) # quick search the index
922 self.rev(node) # quick search the index
923 return node
923 return node
924 except LookupError:
924 except LookupError:
925 pass # may be partial hex id
925 pass # may be partial hex id
926 try:
926 try:
927 # str(rev)
927 # str(rev)
928 rev = int(id)
928 rev = int(id)
929 if str(rev) != id:
929 if str(rev) != id:
930 raise ValueError
930 raise ValueError
931 if rev < 0:
931 if rev < 0:
932 rev = len(self) + rev
932 rev = len(self) + rev
933 if rev < 0 or rev >= len(self):
933 if rev < 0 or rev >= len(self):
934 raise ValueError
934 raise ValueError
935 return self.node(rev)
935 return self.node(rev)
936 except (ValueError, OverflowError):
936 except (ValueError, OverflowError):
937 pass
937 pass
938 if len(id) == 40:
938 if len(id) == 40:
939 try:
939 try:
940 # a full hex nodeid?
940 # a full hex nodeid?
941 node = bin(id)
941 node = bin(id)
942 self.rev(node)
942 self.rev(node)
943 return node
943 return node
944 except (TypeError, LookupError):
944 except (TypeError, LookupError):
945 pass
945 pass
946
946
947 def _partialmatch(self, id):
947 def _partialmatch(self, id):
948 try:
948 try:
949 n = self.index.partialmatch(id)
949 n = self.index.partialmatch(id)
950 if n and self.hasnode(n):
950 if n and self.hasnode(n):
951 return n
951 return n
952 return None
952 return None
953 except RevlogError:
953 except RevlogError:
954 # parsers.c radix tree lookup gave multiple matches
954 # parsers.c radix tree lookup gave multiple matches
955 # fast path: for unfiltered changelog, radix tree is accurate
955 # fast path: for unfiltered changelog, radix tree is accurate
956 if not getattr(self, 'filteredrevs', None):
956 if not getattr(self, 'filteredrevs', None):
957 raise LookupError(id, self.indexfile,
957 raise LookupError(id, self.indexfile,
958 _('ambiguous identifier'))
958 _('ambiguous identifier'))
959 # fall through to slow path that filters hidden revisions
959 # fall through to slow path that filters hidden revisions
960 except (AttributeError, ValueError):
960 except (AttributeError, ValueError):
961 # we are pure python, or key was too short to search radix tree
961 # we are pure python, or key was too short to search radix tree
962 pass
962 pass
963
963
964 if id in self._pcache:
964 if id in self._pcache:
965 return self._pcache[id]
965 return self._pcache[id]
966
966
967 if len(id) < 40:
967 if len(id) < 40:
968 try:
968 try:
969 # hex(node)[:...]
969 # hex(node)[:...]
970 l = len(id) // 2 # grab an even number of digits
970 l = len(id) // 2 # grab an even number of digits
971 prefix = bin(id[:l * 2])
971 prefix = bin(id[:l * 2])
972 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
972 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
973 nl = [n for n in nl if hex(n).startswith(id) and
973 nl = [n for n in nl if hex(n).startswith(id) and
974 self.hasnode(n)]
974 self.hasnode(n)]
975 if len(nl) > 0:
975 if len(nl) > 0:
976 if len(nl) == 1:
976 if len(nl) == 1:
977 self._pcache[id] = nl[0]
977 self._pcache[id] = nl[0]
978 return nl[0]
978 return nl[0]
979 raise LookupError(id, self.indexfile,
979 raise LookupError(id, self.indexfile,
980 _('ambiguous identifier'))
980 _('ambiguous identifier'))
981 return None
981 return None
982 except TypeError:
982 except TypeError:
983 pass
983 pass
984
984
985 def lookup(self, id):
985 def lookup(self, id):
986 """locate a node based on:
986 """locate a node based on:
987 - revision number or str(revision number)
987 - revision number or str(revision number)
988 - nodeid or subset of hex nodeid
988 - nodeid or subset of hex nodeid
989 """
989 """
990 n = self._match(id)
990 n = self._match(id)
991 if n is not None:
991 if n is not None:
992 return n
992 return n
993 n = self._partialmatch(id)
993 n = self._partialmatch(id)
994 if n:
994 if n:
995 return n
995 return n
996
996
997 raise LookupError(id, self.indexfile, _('no match found'))
997 raise LookupError(id, self.indexfile, _('no match found'))
998
998
999 def cmp(self, node, text):
999 def cmp(self, node, text):
1000 """compare text with a given file revision
1000 """compare text with a given file revision
1001
1001
1002 returns True if text is different than what is stored.
1002 returns True if text is different than what is stored.
1003 """
1003 """
1004 p1, p2 = self.parents(node)
1004 p1, p2 = self.parents(node)
1005 return hash(text, p1, p2) != node
1005 return hash(text, p1, p2) != node
1006
1006
1007 def _addchunk(self, offset, data):
1007 def _addchunk(self, offset, data):
1008 """Add a segment to the revlog cache.
1008 """Add a segment to the revlog cache.
1009
1009
1010 Accepts an absolute offset and the data that is at that location.
1010 Accepts an absolute offset and the data that is at that location.
1011 """
1011 """
1012 o, d = self._chunkcache
1012 o, d = self._chunkcache
1013 # try to add to existing cache
1013 # try to add to existing cache
1014 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1014 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1015 self._chunkcache = o, d + data
1015 self._chunkcache = o, d + data
1016 else:
1016 else:
1017 self._chunkcache = offset, data
1017 self._chunkcache = offset, data
1018
1018
1019 def _loadchunk(self, offset, length, df=None):
1019 def _loadchunk(self, offset, length, df=None):
1020 """Load a segment of raw data from the revlog.
1020 """Load a segment of raw data from the revlog.
1021
1021
1022 Accepts an absolute offset, length to read, and an optional existing
1022 Accepts an absolute offset, length to read, and an optional existing
1023 file handle to read from.
1023 file handle to read from.
1024
1024
1025 If an existing file handle is passed, it will be seeked and the
1025 If an existing file handle is passed, it will be seeked and the
1026 original seek position will NOT be restored.
1026 original seek position will NOT be restored.
1027
1027
1028 Returns a str or buffer of raw byte data.
1028 Returns a str or buffer of raw byte data.
1029 """
1029 """
1030 if df is not None:
1030 if df is not None:
1031 closehandle = False
1031 closehandle = False
1032 else:
1032 else:
1033 if self._inline:
1033 if self._inline:
1034 df = self.opener(self.indexfile)
1034 df = self.opener(self.indexfile)
1035 else:
1035 else:
1036 df = self.opener(self.datafile)
1036 df = self.opener(self.datafile)
1037 closehandle = True
1037 closehandle = True
1038
1038
1039 # Cache data both forward and backward around the requested
1039 # Cache data both forward and backward around the requested
1040 # data, in a fixed size window. This helps speed up operations
1040 # data, in a fixed size window. This helps speed up operations
1041 # involving reading the revlog backwards.
1041 # involving reading the revlog backwards.
1042 cachesize = self._chunkcachesize
1042 cachesize = self._chunkcachesize
1043 realoffset = offset & ~(cachesize - 1)
1043 realoffset = offset & ~(cachesize - 1)
1044 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1044 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1045 - realoffset)
1045 - realoffset)
1046 df.seek(realoffset)
1046 df.seek(realoffset)
1047 d = df.read(reallength)
1047 d = df.read(reallength)
1048 if closehandle:
1048 if closehandle:
1049 df.close()
1049 df.close()
1050 self._addchunk(realoffset, d)
1050 self._addchunk(realoffset, d)
1051 if offset != realoffset or reallength != length:
1051 if offset != realoffset or reallength != length:
1052 return util.buffer(d, offset - realoffset, length)
1052 return util.buffer(d, offset - realoffset, length)
1053 return d
1053 return d
1054
1054
1055 def _getchunk(self, offset, length, df=None):
1055 def _getchunk(self, offset, length, df=None):
1056 """Obtain a segment of raw data from the revlog.
1056 """Obtain a segment of raw data from the revlog.
1057
1057
1058 Accepts an absolute offset, length of bytes to obtain, and an
1058 Accepts an absolute offset, length of bytes to obtain, and an
1059 optional file handle to the already-opened revlog. If the file
1059 optional file handle to the already-opened revlog. If the file
1060 handle is used, it's original seek position will not be preserved.
1060 handle is used, it's original seek position will not be preserved.
1061
1061
1062 Requests for data may be returned from a cache.
1062 Requests for data may be returned from a cache.
1063
1063
1064 Returns a str or a buffer instance of raw byte data.
1064 Returns a str or a buffer instance of raw byte data.
1065 """
1065 """
1066 o, d = self._chunkcache
1066 o, d = self._chunkcache
1067 l = len(d)
1067 l = len(d)
1068
1068
1069 # is it in the cache?
1069 # is it in the cache?
1070 cachestart = offset - o
1070 cachestart = offset - o
1071 cacheend = cachestart + length
1071 cacheend = cachestart + length
1072 if cachestart >= 0 and cacheend <= l:
1072 if cachestart >= 0 and cacheend <= l:
1073 if cachestart == 0 and cacheend == l:
1073 if cachestart == 0 and cacheend == l:
1074 return d # avoid a copy
1074 return d # avoid a copy
1075 return util.buffer(d, cachestart, cacheend - cachestart)
1075 return util.buffer(d, cachestart, cacheend - cachestart)
1076
1076
1077 return self._loadchunk(offset, length, df=df)
1077 return self._loadchunk(offset, length, df=df)
1078
1078
1079 def _chunkraw(self, startrev, endrev, df=None):
1079 def _chunkraw(self, startrev, endrev, df=None):
1080 """Obtain a segment of raw data corresponding to a range of revisions.
1080 """Obtain a segment of raw data corresponding to a range of revisions.
1081
1081
1082 Accepts the start and end revisions and an optional already-open
1082 Accepts the start and end revisions and an optional already-open
1083 file handle to be used for reading. If the file handle is read, its
1083 file handle to be used for reading. If the file handle is read, its
1084 seek position will not be preserved.
1084 seek position will not be preserved.
1085
1085
1086 Requests for data may be satisfied by a cache.
1086 Requests for data may be satisfied by a cache.
1087
1087
1088 Returns a 2-tuple of (offset, data) for the requested range of
1088 Returns a 2-tuple of (offset, data) for the requested range of
1089 revisions. Offset is the integer offset from the beginning of the
1089 revisions. Offset is the integer offset from the beginning of the
1090 revlog and data is a str or buffer of the raw byte data.
1090 revlog and data is a str or buffer of the raw byte data.
1091
1091
1092 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1092 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1093 to determine where each revision's data begins and ends.
1093 to determine where each revision's data begins and ends.
1094 """
1094 """
1095 start = self.start(startrev)
1095 start = self.start(startrev)
1096 end = self.end(endrev)
1096 end = self.end(endrev)
1097 if self._inline:
1097 if self._inline:
1098 start += (startrev + 1) * self._io.size
1098 start += (startrev + 1) * self._io.size
1099 end += (endrev + 1) * self._io.size
1099 end += (endrev + 1) * self._io.size
1100 length = end - start
1100 length = end - start
1101
1101
1102 return start, self._getchunk(start, length, df=df)
1102 return start, self._getchunk(start, length, df=df)
1103
1103
1104 def _chunk(self, rev, df=None):
1104 def _chunk(self, rev, df=None):
1105 """Obtain a single decompressed chunk for a revision.
1105 """Obtain a single decompressed chunk for a revision.
1106
1106
1107 Accepts an integer revision and an optional already-open file handle
1107 Accepts an integer revision and an optional already-open file handle
1108 to be used for reading. If used, the seek position of the file will not
1108 to be used for reading. If used, the seek position of the file will not
1109 be preserved.
1109 be preserved.
1110
1110
1111 Returns a str holding uncompressed data for the requested revision.
1111 Returns a str holding uncompressed data for the requested revision.
1112 """
1112 """
1113 return decompress(self._chunkraw(rev, rev, df=df)[1])
1113 return decompress(self._chunkraw(rev, rev, df=df)[1])
1114
1114
1115 def _chunks(self, revs, df=None):
1115 def _chunks(self, revs, df=None):
1116 """Obtain decompressed chunks for the specified revisions.
1116 """Obtain decompressed chunks for the specified revisions.
1117
1117
1118 Accepts an iterable of numeric revisions that are assumed to be in
1118 Accepts an iterable of numeric revisions that are assumed to be in
1119 ascending order. Also accepts an optional already-open file handle
1119 ascending order. Also accepts an optional already-open file handle
1120 to be used for reading. If used, the seek position of the file will
1120 to be used for reading. If used, the seek position of the file will
1121 not be preserved.
1121 not be preserved.
1122
1122
1123 This function is similar to calling ``self._chunk()`` multiple times,
1123 This function is similar to calling ``self._chunk()`` multiple times,
1124 but is faster.
1124 but is faster.
1125
1125
1126 Returns a list with decompressed data for each requested revision.
1126 Returns a list with decompressed data for each requested revision.
1127 """
1127 """
1128 if not revs:
1128 if not revs:
1129 return []
1129 return []
1130 start = self.start
1130 start = self.start
1131 length = self.length
1131 length = self.length
1132 inline = self._inline
1132 inline = self._inline
1133 iosize = self._io.size
1133 iosize = self._io.size
1134 buffer = util.buffer
1134 buffer = util.buffer
1135
1135
1136 l = []
1136 l = []
1137 ladd = l.append
1137 ladd = l.append
1138
1138
1139 try:
1139 try:
1140 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1140 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1141 except OverflowError:
1141 except OverflowError:
1142 # issue4215 - we can't cache a run of chunks greater than
1142 # issue4215 - we can't cache a run of chunks greater than
1143 # 2G on Windows
1143 # 2G on Windows
1144 return [self._chunk(rev, df=df) for rev in revs]
1144 return [self._chunk(rev, df=df) for rev in revs]
1145
1145
1146 for rev in revs:
1146 for rev in revs:
1147 chunkstart = start(rev)
1147 chunkstart = start(rev)
1148 if inline:
1148 if inline:
1149 chunkstart += (rev + 1) * iosize
1149 chunkstart += (rev + 1) * iosize
1150 chunklength = length(rev)
1150 chunklength = length(rev)
1151 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1151 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1152
1152
1153 return l
1153 return l
1154
1154
1155 def _chunkclear(self):
1155 def _chunkclear(self):
1156 """Clear the raw chunk cache."""
1156 """Clear the raw chunk cache."""
1157 self._chunkcache = (0, '')
1157 self._chunkcache = (0, '')
1158
1158
1159 def deltaparent(self, rev):
1159 def deltaparent(self, rev):
1160 """return deltaparent of the given revision"""
1160 """return deltaparent of the given revision"""
1161 base = self.index[rev][3]
1161 base = self.index[rev][3]
1162 if base == rev:
1162 if base == rev:
1163 return nullrev
1163 return nullrev
1164 elif self._generaldelta:
1164 elif self._generaldelta:
1165 return base
1165 return base
1166 else:
1166 else:
1167 return rev - 1
1167 return rev - 1
1168
1168
1169 def revdiff(self, rev1, rev2):
1169 def revdiff(self, rev1, rev2):
1170 """return or calculate a delta between two revisions"""
1170 """return or calculate a delta between two revisions"""
1171 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1171 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1172 return str(self._chunk(rev2))
1172 return str(self._chunk(rev2))
1173
1173
1174 return mdiff.textdiff(self.revision(rev1),
1174 return mdiff.textdiff(self.revision(rev1),
1175 self.revision(rev2))
1175 self.revision(rev2))
1176
1176
1177 def revision(self, nodeorrev, _df=None):
1177 def revision(self, nodeorrev, _df=None):
1178 """return an uncompressed revision of a given node or revision
1178 """return an uncompressed revision of a given node or revision
1179 number.
1179 number.
1180
1180
1181 _df is an existing file handle to read from. It is meant to only be
1181 _df is an existing file handle to read from. It is meant to only be
1182 used internally.
1182 used internally.
1183 """
1183 """
1184 if isinstance(nodeorrev, int):
1184 if isinstance(nodeorrev, int):
1185 rev = nodeorrev
1185 rev = nodeorrev
1186 node = self.node(rev)
1186 node = self.node(rev)
1187 else:
1187 else:
1188 node = nodeorrev
1188 node = nodeorrev
1189 rev = None
1189 rev = None
1190
1190
1191 cachedrev = None
1191 cachedrev = None
1192 if node == nullid:
1192 if node == nullid:
1193 return ""
1193 return ""
1194 if self._cache:
1194 if self._cache:
1195 if self._cache[0] == node:
1195 if self._cache[0] == node:
1196 return self._cache[2]
1196 return self._cache[2]
1197 cachedrev = self._cache[1]
1197 cachedrev = self._cache[1]
1198
1198
1199 # look up what we need to read
1199 # look up what we need to read
1200 text = None
1200 text = None
1201 if rev is None:
1201 if rev is None:
1202 rev = self.rev(node)
1202 rev = self.rev(node)
1203
1203
1204 # check rev flags
1204 # check rev flags
1205 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1205 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1206 raise RevlogError(_('incompatible revision flag %x') %
1206 raise RevlogError(_('incompatible revision flag %x') %
1207 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1207 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1208
1208
1209 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1209 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1210 if stopped:
1210 if stopped:
1211 text = self._cache[2]
1211 text = self._cache[2]
1212
1212
1213 # drop cache to save memory
1213 # drop cache to save memory
1214 self._cache = None
1214 self._cache = None
1215
1215
1216 bins = self._chunks(chain, df=_df)
1216 bins = self._chunks(chain, df=_df)
1217 if text is None:
1217 if text is None:
1218 text = str(bins[0])
1218 text = str(bins[0])
1219 bins = bins[1:]
1219 bins = bins[1:]
1220
1220
1221 text = mdiff.patches(text, bins)
1221 text = mdiff.patches(text, bins)
1222
1222
1223 text = self._checkhash(text, node, rev)
1223 text = self._checkhash(text, node, rev)
1224
1224
1225 self._cache = (node, rev, text)
1225 self._cache = (node, rev, text)
1226 return text
1226 return text
1227
1227
1228 def hash(self, text, p1, p2):
1228 def hash(self, text, p1, p2):
1229 """Compute a node hash.
1229 """Compute a node hash.
1230
1230
1231 Available as a function so that subclasses can replace the hash
1231 Available as a function so that subclasses can replace the hash
1232 as needed.
1232 as needed.
1233 """
1233 """
1234 return hash(text, p1, p2)
1234 return hash(text, p1, p2)
1235
1235
1236 def _checkhash(self, text, node, rev):
1236 def _checkhash(self, text, node, rev):
1237 p1, p2 = self.parents(node)
1237 p1, p2 = self.parents(node)
1238 self.checkhash(text, p1, p2, node, rev)
1238 self.checkhash(text, p1, p2, node, rev)
1239 return text
1239 return text
1240
1240
1241 def checkhash(self, text, p1, p2, node, rev=None):
1241 def checkhash(self, text, p1, p2, node, rev=None):
1242 if node != self.hash(text, p1, p2):
1242 if node != self.hash(text, p1, p2):
1243 revornode = rev
1243 revornode = rev
1244 if revornode is None:
1244 if revornode is None:
1245 revornode = templatefilters.short(hex(node))
1245 revornode = templatefilters.short(hex(node))
1246 raise RevlogError(_("integrity check failed on %s:%s")
1246 raise RevlogError(_("integrity check failed on %s:%s")
1247 % (self.indexfile, revornode))
1247 % (self.indexfile, revornode))
1248
1248
1249 def checkinlinesize(self, tr, fp=None):
1249 def checkinlinesize(self, tr, fp=None):
1250 """Check if the revlog is too big for inline and convert if so.
1250 """Check if the revlog is too big for inline and convert if so.
1251
1251
1252 This should be called after revisions are added to the revlog. If the
1252 This should be called after revisions are added to the revlog. If the
1253 revlog has grown too large to be an inline revlog, it will convert it
1253 revlog has grown too large to be an inline revlog, it will convert it
1254 to use multiple index and data files.
1254 to use multiple index and data files.
1255 """
1255 """
1256 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1256 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1257 return
1257 return
1258
1258
1259 trinfo = tr.find(self.indexfile)
1259 trinfo = tr.find(self.indexfile)
1260 if trinfo is None:
1260 if trinfo is None:
1261 raise RevlogError(_("%s not found in the transaction")
1261 raise RevlogError(_("%s not found in the transaction")
1262 % self.indexfile)
1262 % self.indexfile)
1263
1263
1264 trindex = trinfo[2]
1264 trindex = trinfo[2]
1265 if trindex is not None:
1265 if trindex is not None:
1266 dataoff = self.start(trindex)
1266 dataoff = self.start(trindex)
1267 else:
1267 else:
1268 # revlog was stripped at start of transaction, use all leftover data
1268 # revlog was stripped at start of transaction, use all leftover data
1269 trindex = len(self) - 1
1269 trindex = len(self) - 1
1270 dataoff = self.end(-2)
1270 dataoff = self.end(-2)
1271
1271
1272 tr.add(self.datafile, dataoff)
1272 tr.add(self.datafile, dataoff)
1273
1273
1274 if fp:
1274 if fp:
1275 fp.flush()
1275 fp.flush()
1276 fp.close()
1276 fp.close()
1277
1277
1278 df = self.opener(self.datafile, 'w')
1278 df = self.opener(self.datafile, 'w')
1279 try:
1279 try:
1280 for r in self:
1280 for r in self:
1281 df.write(self._chunkraw(r, r)[1])
1281 df.write(self._chunkraw(r, r)[1])
1282 finally:
1282 finally:
1283 df.close()
1283 df.close()
1284
1284
1285 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1285 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1286 checkambig=self._checkambig)
1286 checkambig=self._checkambig)
1287 self.version &= ~(REVLOGNGINLINEDATA)
1287 self.version &= ~(REVLOGNGINLINEDATA)
1288 self._inline = False
1288 self._inline = False
1289 for i in self:
1289 for i in self:
1290 e = self._io.packentry(self.index[i], self.node, self.version, i)
1290 e = self._io.packentry(self.index[i], self.node, self.version, i)
1291 fp.write(e)
1291 fp.write(e)
1292
1292
1293 # if we don't call close, the temp file will never replace the
1293 # if we don't call close, the temp file will never replace the
1294 # real index
1294 # real index
1295 fp.close()
1295 fp.close()
1296
1296
1297 tr.replace(self.indexfile, trindex * self._io.size)
1297 tr.replace(self.indexfile, trindex * self._io.size)
1298 self._chunkclear()
1298 self._chunkclear()
1299
1299
1300 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1300 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1301 node=None):
1301 node=None):
1302 """add a revision to the log
1302 """add a revision to the log
1303
1303
1304 text - the revision data to add
1304 text - the revision data to add
1305 transaction - the transaction object used for rollback
1305 transaction - the transaction object used for rollback
1306 link - the linkrev data to add
1306 link - the linkrev data to add
1307 p1, p2 - the parent nodeids of the revision
1307 p1, p2 - the parent nodeids of the revision
1308 cachedelta - an optional precomputed delta
1308 cachedelta - an optional precomputed delta
1309 node - nodeid of revision; typically node is not specified, and it is
1309 node - nodeid of revision; typically node is not specified, and it is
1310 computed by default as hash(text, p1, p2), however subclasses might
1310 computed by default as hash(text, p1, p2), however subclasses might
1311 use different hashing method (and override checkhash() in such case)
1311 use different hashing method (and override checkhash() in such case)
1312 """
1312 """
1313 if link == nullrev:
1313 if link == nullrev:
1314 raise RevlogError(_("attempted to add linkrev -1 to %s")
1314 raise RevlogError(_("attempted to add linkrev -1 to %s")
1315 % self.indexfile)
1315 % self.indexfile)
1316
1316
1317 if len(text) > _maxentrysize:
1317 if len(text) > _maxentrysize:
1318 raise RevlogError(
1318 raise RevlogError(
1319 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1319 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1320 % (self.indexfile, len(text)))
1320 % (self.indexfile, len(text)))
1321
1321
1322 node = node or self.hash(text, p1, p2)
1322 node = node or self.hash(text, p1, p2)
1323 if node in self.nodemap:
1323 if node in self.nodemap:
1324 return node
1324 return node
1325
1325
1326 dfh = None
1326 dfh = None
1327 if not self._inline:
1327 if not self._inline:
1328 dfh = self.opener(self.datafile, "a+")
1328 dfh = self.opener(self.datafile, "a+")
1329 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1329 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1330 try:
1330 try:
1331 return self._addrevision(node, text, transaction, link, p1, p2,
1331 return self._addrevision(node, text, transaction, link, p1, p2,
1332 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1332 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1333 finally:
1333 finally:
1334 if dfh:
1334 if dfh:
1335 dfh.close()
1335 dfh.close()
1336 ifh.close()
1336 ifh.close()
1337
1337
1338 def compress(self, text):
1338 def compress(self, text):
1339 """ generate a possibly-compressed representation of text """
1339 """ generate a possibly-compressed representation of text """
1340 if not text:
1340 if not text:
1341 return ("", text)
1341 return ("", text)
1342 l = len(text)
1342 l = len(text)
1343 bin = None
1343 bin = None
1344 if l < 44:
1344 if l < 44:
1345 pass
1345 pass
1346 elif l > 1000000:
1346 elif l > 1000000:
1347 # zlib makes an internal copy, thus doubling memory usage for
1347 # zlib makes an internal copy, thus doubling memory usage for
1348 # large files, so lets do this in pieces
1348 # large files, so lets do this in pieces
1349 z = zlib.compressobj()
1349 z = zlib.compressobj()
1350 p = []
1350 p = []
1351 pos = 0
1351 pos = 0
1352 while pos < l:
1352 while pos < l:
1353 pos2 = pos + 2**20
1353 pos2 = pos + 2**20
1354 p.append(z.compress(text[pos:pos2]))
1354 p.append(z.compress(text[pos:pos2]))
1355 pos = pos2
1355 pos = pos2
1356 p.append(z.flush())
1356 p.append(z.flush())
1357 if sum(map(len, p)) < l:
1357 if sum(map(len, p)) < l:
1358 bin = "".join(p)
1358 bin = "".join(p)
1359 else:
1359 else:
1360 bin = _compress(text)
1360 bin = _compress(text)
1361 if bin is None or len(bin) > l:
1361 if bin is None or len(bin) > l:
1362 if text[0] == '\0':
1362 if text[0] == '\0':
1363 return ("", text)
1363 return ("", text)
1364 return ('u', text)
1364 return ('u', text)
1365 return ("", bin)
1365 return ("", bin)
1366
1366
1367 def _isgooddelta(self, d, textlen):
1367 def _isgooddelta(self, d, textlen):
1368 """Returns True if the given delta is good. Good means that it is within
1368 """Returns True if the given delta is good. Good means that it is within
1369 the disk span, disk size, and chain length bounds that we know to be
1369 the disk span, disk size, and chain length bounds that we know to be
1370 performant."""
1370 performant."""
1371 if d is None:
1371 if d is None:
1372 return False
1372 return False
1373
1373
1374 # - 'dist' is the distance from the base revision -- bounding it limits
1374 # - 'dist' is the distance from the base revision -- bounding it limits
1375 # the amount of I/O we need to do.
1375 # the amount of I/O we need to do.
1376 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1376 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1377 # to apply -- bounding it limits the amount of CPU we consume.
1377 # to apply -- bounding it limits the amount of CPU we consume.
1378 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1378 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1379 if (dist > textlen * 4 or l > textlen or
1379 if (dist > textlen * 4 or l > textlen or
1380 compresseddeltalen > textlen * 2 or
1380 compresseddeltalen > textlen * 2 or
1381 (self._maxchainlen and chainlen > self._maxchainlen)):
1381 (self._maxchainlen and chainlen > self._maxchainlen)):
1382 return False
1382 return False
1383
1383
1384 return True
1384 return True
1385
1385
1386 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1386 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1387 cachedelta, ifh, dfh, alwayscache=False):
1387 cachedelta, ifh, dfh, alwayscache=False):
1388 """internal function to add revisions to the log
1388 """internal function to add revisions to the log
1389
1389
1390 see addrevision for argument descriptions.
1390 see addrevision for argument descriptions.
1391 invariants:
1391 invariants:
1392 - text is optional (can be None); if not set, cachedelta must be set.
1392 - text is optional (can be None); if not set, cachedelta must be set.
1393 if both are set, they must correspond to each other.
1393 if both are set, they must correspond to each other.
1394 """
1394 """
1395 btext = [text]
1395 btext = [text]
1396 def buildtext():
1396 def buildtext():
1397 if btext[0] is not None:
1397 if btext[0] is not None:
1398 return btext[0]
1398 return btext[0]
1399 baserev = cachedelta[0]
1399 baserev = cachedelta[0]
1400 delta = cachedelta[1]
1400 delta = cachedelta[1]
1401 # special case deltas which replace entire base; no need to decode
1401 # special case deltas which replace entire base; no need to decode
1402 # base revision. this neatly avoids censored bases, which throw when
1402 # base revision. this neatly avoids censored bases, which throw when
1403 # they're decoded.
1403 # they're decoded.
1404 hlen = struct.calcsize(">lll")
1404 hlen = struct.calcsize(">lll")
1405 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1405 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1406 len(delta) - hlen):
1406 len(delta) - hlen):
1407 btext[0] = delta[hlen:]
1407 btext[0] = delta[hlen:]
1408 else:
1408 else:
1409 if self._inline:
1409 if self._inline:
1410 fh = ifh
1410 fh = ifh
1411 else:
1411 else:
1412 fh = dfh
1412 fh = dfh
1413 basetext = self.revision(self.node(baserev), _df=fh)
1413 basetext = self.revision(self.node(baserev), _df=fh)
1414 btext[0] = mdiff.patch(basetext, delta)
1414 btext[0] = mdiff.patch(basetext, delta)
1415 try:
1415 try:
1416 self.checkhash(btext[0], p1, p2, node)
1416 self.checkhash(btext[0], p1, p2, node)
1417 if flags & REVIDX_ISCENSORED:
1417 if flags & REVIDX_ISCENSORED:
1418 raise RevlogError(_('node %s is not censored') % node)
1418 raise RevlogError(_('node %s is not censored') % node)
1419 except CensoredNodeError:
1419 except CensoredNodeError:
1420 # must pass the censored index flag to add censored revisions
1420 # must pass the censored index flag to add censored revisions
1421 if not flags & REVIDX_ISCENSORED:
1421 if not flags & REVIDX_ISCENSORED:
1422 raise
1422 raise
1423 return btext[0]
1423 return btext[0]
1424
1424
1425 def builddelta(rev):
1425 def builddelta(rev):
1426 # can we use the cached delta?
1426 # can we use the cached delta?
1427 if cachedelta and cachedelta[0] == rev:
1427 if cachedelta and cachedelta[0] == rev:
1428 delta = cachedelta[1]
1428 delta = cachedelta[1]
1429 else:
1429 else:
1430 t = buildtext()
1430 t = buildtext()
1431 if self.iscensored(rev):
1431 if self.iscensored(rev):
1432 # deltas based on a censored revision must replace the
1432 # deltas based on a censored revision must replace the
1433 # full content in one patch, so delta works everywhere
1433 # full content in one patch, so delta works everywhere
1434 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1434 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1435 delta = header + t
1435 delta = header + t
1436 else:
1436 else:
1437 if self._inline:
1437 if self._inline:
1438 fh = ifh
1438 fh = ifh
1439 else:
1439 else:
1440 fh = dfh
1440 fh = dfh
1441 ptext = self.revision(self.node(rev), _df=fh)
1441 ptext = self.revision(self.node(rev), _df=fh)
1442 delta = mdiff.textdiff(ptext, t)
1442 delta = mdiff.textdiff(ptext, t)
1443 data = self.compress(delta)
1443 header, data = self.compress(delta)
1444 l = len(data[1]) + len(data[0])
1444 deltalen = len(header) + len(data)
1445 chainbase = self.chainbase(rev)
1445 chainbase = self.chainbase(rev)
1446 dist = l + offset - self.start(chainbase)
1446 dist = deltalen + offset - self.start(chainbase)
1447 if self._generaldelta:
1447 if self._generaldelta:
1448 base = rev
1448 base = rev
1449 else:
1449 else:
1450 base = chainbase
1450 base = chainbase
1451 chainlen, compresseddeltalen = self._chaininfo(rev)
1451 chainlen, compresseddeltalen = self._chaininfo(rev)
1452 chainlen += 1
1452 chainlen += 1
1453 compresseddeltalen += l
1453 compresseddeltalen += deltalen
1454 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1454 return (dist, deltalen, (header, data), base,
1455 chainbase, chainlen, compresseddeltalen)
1455
1456
1456 curr = len(self)
1457 curr = len(self)
1457 prev = curr - 1
1458 prev = curr - 1
1458 offset = self.end(prev)
1459 offset = self.end(prev)
1459 delta = None
1460 delta = None
1460 p1r, p2r = self.rev(p1), self.rev(p2)
1461 p1r, p2r = self.rev(p1), self.rev(p2)
1461
1462
1462 # full versions are inserted when the needed deltas
1463 # full versions are inserted when the needed deltas
1463 # become comparable to the uncompressed text
1464 # become comparable to the uncompressed text
1464 if text is None:
1465 if text is None:
1465 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1466 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1466 cachedelta[1])
1467 cachedelta[1])
1467 else:
1468 else:
1468 textlen = len(text)
1469 textlen = len(text)
1469
1470
1470 # should we try to build a delta?
1471 # should we try to build a delta?
1471 if prev != nullrev:
1472 if prev != nullrev:
1472 tested = set()
1473 tested = set()
1473 if cachedelta and self._generaldelta and self._lazydeltabase:
1474 if cachedelta and self._generaldelta and self._lazydeltabase:
1474 # Assume what we received from the server is a good choice
1475 # Assume what we received from the server is a good choice
1475 # build delta will reuse the cache
1476 # build delta will reuse the cache
1476 candidatedelta = builddelta(cachedelta[0])
1477 candidatedelta = builddelta(cachedelta[0])
1477 tested.add(cachedelta[0])
1478 tested.add(cachedelta[0])
1478 if self._isgooddelta(candidatedelta, textlen):
1479 if self._isgooddelta(candidatedelta, textlen):
1479 delta = candidatedelta
1480 delta = candidatedelta
1480 if delta is None and self._generaldelta:
1481 if delta is None and self._generaldelta:
1481 # exclude already lazy tested base if any
1482 # exclude already lazy tested base if any
1482 parents = [p for p in (p1r, p2r)
1483 parents = [p for p in (p1r, p2r)
1483 if p != nullrev and p not in tested]
1484 if p != nullrev and p not in tested]
1484 if parents and not self._aggressivemergedeltas:
1485 if parents and not self._aggressivemergedeltas:
1485 # Pick whichever parent is closer to us (to minimize the
1486 # Pick whichever parent is closer to us (to minimize the
1486 # chance of having to build a fulltext).
1487 # chance of having to build a fulltext).
1487 parents = [max(parents)]
1488 parents = [max(parents)]
1488 tested.update(parents)
1489 tested.update(parents)
1489 pdeltas = []
1490 pdeltas = []
1490 for p in parents:
1491 for p in parents:
1491 pd = builddelta(p)
1492 pd = builddelta(p)
1492 if self._isgooddelta(pd, textlen):
1493 if self._isgooddelta(pd, textlen):
1493 pdeltas.append(pd)
1494 pdeltas.append(pd)
1494 if pdeltas:
1495 if pdeltas:
1495 delta = min(pdeltas, key=lambda x: x[1])
1496 delta = min(pdeltas, key=lambda x: x[1])
1496 if delta is None and prev not in tested:
1497 if delta is None and prev not in tested:
1497 # other approach failed try against prev to hopefully save us a
1498 # other approach failed try against prev to hopefully save us a
1498 # fulltext.
1499 # fulltext.
1499 candidatedelta = builddelta(prev)
1500 candidatedelta = builddelta(prev)
1500 if self._isgooddelta(candidatedelta, textlen):
1501 if self._isgooddelta(candidatedelta, textlen):
1501 delta = candidatedelta
1502 delta = candidatedelta
1502 if delta is not None:
1503 if delta is not None:
1503 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1504 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1504 else:
1505 else:
1505 text = buildtext()
1506 text = buildtext()
1506 data = self.compress(text)
1507 data = self.compress(text)
1507 l = len(data[1]) + len(data[0])
1508 l = len(data[1]) + len(data[0])
1508 base = chainbase = curr
1509 base = chainbase = curr
1509
1510
1510 e = (offset_type(offset, flags), l, textlen,
1511 e = (offset_type(offset, flags), l, textlen,
1511 base, link, p1r, p2r, node)
1512 base, link, p1r, p2r, node)
1512 self.index.insert(-1, e)
1513 self.index.insert(-1, e)
1513 self.nodemap[node] = curr
1514 self.nodemap[node] = curr
1514
1515
1515 entry = self._io.packentry(e, self.node, self.version, curr)
1516 entry = self._io.packentry(e, self.node, self.version, curr)
1516 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1517 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1517
1518
1518 if alwayscache and text is None:
1519 if alwayscache and text is None:
1519 text = buildtext()
1520 text = buildtext()
1520
1521
1521 if type(text) == str: # only accept immutable objects
1522 if type(text) == str: # only accept immutable objects
1522 self._cache = (node, curr, text)
1523 self._cache = (node, curr, text)
1523 self._chainbasecache[curr] = chainbase
1524 self._chainbasecache[curr] = chainbase
1524 return node
1525 return node
1525
1526
1526 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1527 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1527 # Files opened in a+ mode have inconsistent behavior on various
1528 # Files opened in a+ mode have inconsistent behavior on various
1528 # platforms. Windows requires that a file positioning call be made
1529 # platforms. Windows requires that a file positioning call be made
1529 # when the file handle transitions between reads and writes. See
1530 # when the file handle transitions between reads and writes. See
1530 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1531 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1531 # platforms, Python or the platform itself can be buggy. Some versions
1532 # platforms, Python or the platform itself can be buggy. Some versions
1532 # of Solaris have been observed to not append at the end of the file
1533 # of Solaris have been observed to not append at the end of the file
1533 # if the file was seeked to before the end. See issue4943 for more.
1534 # if the file was seeked to before the end. See issue4943 for more.
1534 #
1535 #
1535 # We work around this issue by inserting a seek() before writing.
1536 # We work around this issue by inserting a seek() before writing.
1536 # Note: This is likely not necessary on Python 3.
1537 # Note: This is likely not necessary on Python 3.
1537 ifh.seek(0, os.SEEK_END)
1538 ifh.seek(0, os.SEEK_END)
1538 if dfh:
1539 if dfh:
1539 dfh.seek(0, os.SEEK_END)
1540 dfh.seek(0, os.SEEK_END)
1540
1541
1541 curr = len(self) - 1
1542 curr = len(self) - 1
1542 if not self._inline:
1543 if not self._inline:
1543 transaction.add(self.datafile, offset)
1544 transaction.add(self.datafile, offset)
1544 transaction.add(self.indexfile, curr * len(entry))
1545 transaction.add(self.indexfile, curr * len(entry))
1545 if data[0]:
1546 if data[0]:
1546 dfh.write(data[0])
1547 dfh.write(data[0])
1547 dfh.write(data[1])
1548 dfh.write(data[1])
1548 ifh.write(entry)
1549 ifh.write(entry)
1549 else:
1550 else:
1550 offset += curr * self._io.size
1551 offset += curr * self._io.size
1551 transaction.add(self.indexfile, offset, curr)
1552 transaction.add(self.indexfile, offset, curr)
1552 ifh.write(entry)
1553 ifh.write(entry)
1553 ifh.write(data[0])
1554 ifh.write(data[0])
1554 ifh.write(data[1])
1555 ifh.write(data[1])
1555 self.checkinlinesize(transaction, ifh)
1556 self.checkinlinesize(transaction, ifh)
1556
1557
1557 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1558 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1558 """
1559 """
1559 add a delta group
1560 add a delta group
1560
1561
1561 given a set of deltas, add them to the revision log. the
1562 given a set of deltas, add them to the revision log. the
1562 first delta is against its parent, which should be in our
1563 first delta is against its parent, which should be in our
1563 log, the rest are against the previous delta.
1564 log, the rest are against the previous delta.
1564
1565
1565 If ``addrevisioncb`` is defined, it will be called with arguments of
1566 If ``addrevisioncb`` is defined, it will be called with arguments of
1566 this revlog and the node that was added.
1567 this revlog and the node that was added.
1567 """
1568 """
1568
1569
1569 # track the base of the current delta log
1570 # track the base of the current delta log
1570 content = []
1571 content = []
1571 node = None
1572 node = None
1572
1573
1573 r = len(self)
1574 r = len(self)
1574 end = 0
1575 end = 0
1575 if r:
1576 if r:
1576 end = self.end(r - 1)
1577 end = self.end(r - 1)
1577 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1578 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1578 isize = r * self._io.size
1579 isize = r * self._io.size
1579 if self._inline:
1580 if self._inline:
1580 transaction.add(self.indexfile, end + isize, r)
1581 transaction.add(self.indexfile, end + isize, r)
1581 dfh = None
1582 dfh = None
1582 else:
1583 else:
1583 transaction.add(self.indexfile, isize, r)
1584 transaction.add(self.indexfile, isize, r)
1584 transaction.add(self.datafile, end)
1585 transaction.add(self.datafile, end)
1585 dfh = self.opener(self.datafile, "a+")
1586 dfh = self.opener(self.datafile, "a+")
1586 def flush():
1587 def flush():
1587 if dfh:
1588 if dfh:
1588 dfh.flush()
1589 dfh.flush()
1589 ifh.flush()
1590 ifh.flush()
1590 try:
1591 try:
1591 # loop through our set of deltas
1592 # loop through our set of deltas
1592 chain = None
1593 chain = None
1593 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1594 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1594 node = chunkdata['node']
1595 node = chunkdata['node']
1595 p1 = chunkdata['p1']
1596 p1 = chunkdata['p1']
1596 p2 = chunkdata['p2']
1597 p2 = chunkdata['p2']
1597 cs = chunkdata['cs']
1598 cs = chunkdata['cs']
1598 deltabase = chunkdata['deltabase']
1599 deltabase = chunkdata['deltabase']
1599 delta = chunkdata['delta']
1600 delta = chunkdata['delta']
1600 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1601 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1601
1602
1602 content.append(node)
1603 content.append(node)
1603
1604
1604 link = linkmapper(cs)
1605 link = linkmapper(cs)
1605 if node in self.nodemap:
1606 if node in self.nodemap:
1606 # this can happen if two branches make the same change
1607 # this can happen if two branches make the same change
1607 chain = node
1608 chain = node
1608 continue
1609 continue
1609
1610
1610 for p in (p1, p2):
1611 for p in (p1, p2):
1611 if p not in self.nodemap:
1612 if p not in self.nodemap:
1612 raise LookupError(p, self.indexfile,
1613 raise LookupError(p, self.indexfile,
1613 _('unknown parent'))
1614 _('unknown parent'))
1614
1615
1615 if deltabase not in self.nodemap:
1616 if deltabase not in self.nodemap:
1616 raise LookupError(deltabase, self.indexfile,
1617 raise LookupError(deltabase, self.indexfile,
1617 _('unknown delta base'))
1618 _('unknown delta base'))
1618
1619
1619 baserev = self.rev(deltabase)
1620 baserev = self.rev(deltabase)
1620
1621
1621 if baserev != nullrev and self.iscensored(baserev):
1622 if baserev != nullrev and self.iscensored(baserev):
1622 # if base is censored, delta must be full replacement in a
1623 # if base is censored, delta must be full replacement in a
1623 # single patch operation
1624 # single patch operation
1624 hlen = struct.calcsize(">lll")
1625 hlen = struct.calcsize(">lll")
1625 oldlen = self.rawsize(baserev)
1626 oldlen = self.rawsize(baserev)
1626 newlen = len(delta) - hlen
1627 newlen = len(delta) - hlen
1627 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1628 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1628 raise error.CensoredBaseError(self.indexfile,
1629 raise error.CensoredBaseError(self.indexfile,
1629 self.node(baserev))
1630 self.node(baserev))
1630
1631
1631 if not flags and self._peek_iscensored(baserev, delta, flush):
1632 if not flags and self._peek_iscensored(baserev, delta, flush):
1632 flags |= REVIDX_ISCENSORED
1633 flags |= REVIDX_ISCENSORED
1633
1634
1634 # We assume consumers of addrevisioncb will want to retrieve
1635 # We assume consumers of addrevisioncb will want to retrieve
1635 # the added revision, which will require a call to
1636 # the added revision, which will require a call to
1636 # revision(). revision() will fast path if there is a cache
1637 # revision(). revision() will fast path if there is a cache
1637 # hit. So, we tell _addrevision() to always cache in this case.
1638 # hit. So, we tell _addrevision() to always cache in this case.
1638 chain = self._addrevision(node, None, transaction, link,
1639 chain = self._addrevision(node, None, transaction, link,
1639 p1, p2, flags, (baserev, delta),
1640 p1, p2, flags, (baserev, delta),
1640 ifh, dfh,
1641 ifh, dfh,
1641 alwayscache=bool(addrevisioncb))
1642 alwayscache=bool(addrevisioncb))
1642
1643
1643 if addrevisioncb:
1644 if addrevisioncb:
1644 addrevisioncb(self, chain)
1645 addrevisioncb(self, chain)
1645
1646
1646 if not dfh and not self._inline:
1647 if not dfh and not self._inline:
1647 # addrevision switched from inline to conventional
1648 # addrevision switched from inline to conventional
1648 # reopen the index
1649 # reopen the index
1649 ifh.close()
1650 ifh.close()
1650 dfh = self.opener(self.datafile, "a+")
1651 dfh = self.opener(self.datafile, "a+")
1651 ifh = self.opener(self.indexfile, "a+",
1652 ifh = self.opener(self.indexfile, "a+",
1652 checkambig=self._checkambig)
1653 checkambig=self._checkambig)
1653 finally:
1654 finally:
1654 if dfh:
1655 if dfh:
1655 dfh.close()
1656 dfh.close()
1656 ifh.close()
1657 ifh.close()
1657
1658
1658 return content
1659 return content
1659
1660
1660 def iscensored(self, rev):
1661 def iscensored(self, rev):
1661 """Check if a file revision is censored."""
1662 """Check if a file revision is censored."""
1662 return False
1663 return False
1663
1664
1664 def _peek_iscensored(self, baserev, delta, flush):
1665 def _peek_iscensored(self, baserev, delta, flush):
1665 """Quickly check if a delta produces a censored revision."""
1666 """Quickly check if a delta produces a censored revision."""
1666 return False
1667 return False
1667
1668
1668 def getstrippoint(self, minlink):
1669 def getstrippoint(self, minlink):
1669 """find the minimum rev that must be stripped to strip the linkrev
1670 """find the minimum rev that must be stripped to strip the linkrev
1670
1671
1671 Returns a tuple containing the minimum rev and a set of all revs that
1672 Returns a tuple containing the minimum rev and a set of all revs that
1672 have linkrevs that will be broken by this strip.
1673 have linkrevs that will be broken by this strip.
1673 """
1674 """
1674 brokenrevs = set()
1675 brokenrevs = set()
1675 strippoint = len(self)
1676 strippoint = len(self)
1676
1677
1677 heads = {}
1678 heads = {}
1678 futurelargelinkrevs = set()
1679 futurelargelinkrevs = set()
1679 for head in self.headrevs():
1680 for head in self.headrevs():
1680 headlinkrev = self.linkrev(head)
1681 headlinkrev = self.linkrev(head)
1681 heads[head] = headlinkrev
1682 heads[head] = headlinkrev
1682 if headlinkrev >= minlink:
1683 if headlinkrev >= minlink:
1683 futurelargelinkrevs.add(headlinkrev)
1684 futurelargelinkrevs.add(headlinkrev)
1684
1685
1685 # This algorithm involves walking down the rev graph, starting at the
1686 # This algorithm involves walking down the rev graph, starting at the
1686 # heads. Since the revs are topologically sorted according to linkrev,
1687 # heads. Since the revs are topologically sorted according to linkrev,
1687 # once all head linkrevs are below the minlink, we know there are
1688 # once all head linkrevs are below the minlink, we know there are
1688 # no more revs that could have a linkrev greater than minlink.
1689 # no more revs that could have a linkrev greater than minlink.
1689 # So we can stop walking.
1690 # So we can stop walking.
1690 while futurelargelinkrevs:
1691 while futurelargelinkrevs:
1691 strippoint -= 1
1692 strippoint -= 1
1692 linkrev = heads.pop(strippoint)
1693 linkrev = heads.pop(strippoint)
1693
1694
1694 if linkrev < minlink:
1695 if linkrev < minlink:
1695 brokenrevs.add(strippoint)
1696 brokenrevs.add(strippoint)
1696 else:
1697 else:
1697 futurelargelinkrevs.remove(linkrev)
1698 futurelargelinkrevs.remove(linkrev)
1698
1699
1699 for p in self.parentrevs(strippoint):
1700 for p in self.parentrevs(strippoint):
1700 if p != nullrev:
1701 if p != nullrev:
1701 plinkrev = self.linkrev(p)
1702 plinkrev = self.linkrev(p)
1702 heads[p] = plinkrev
1703 heads[p] = plinkrev
1703 if plinkrev >= minlink:
1704 if plinkrev >= minlink:
1704 futurelargelinkrevs.add(plinkrev)
1705 futurelargelinkrevs.add(plinkrev)
1705
1706
1706 return strippoint, brokenrevs
1707 return strippoint, brokenrevs
1707
1708
1708 def strip(self, minlink, transaction):
1709 def strip(self, minlink, transaction):
1709 """truncate the revlog on the first revision with a linkrev >= minlink
1710 """truncate the revlog on the first revision with a linkrev >= minlink
1710
1711
1711 This function is called when we're stripping revision minlink and
1712 This function is called when we're stripping revision minlink and
1712 its descendants from the repository.
1713 its descendants from the repository.
1713
1714
1714 We have to remove all revisions with linkrev >= minlink, because
1715 We have to remove all revisions with linkrev >= minlink, because
1715 the equivalent changelog revisions will be renumbered after the
1716 the equivalent changelog revisions will be renumbered after the
1716 strip.
1717 strip.
1717
1718
1718 So we truncate the revlog on the first of these revisions, and
1719 So we truncate the revlog on the first of these revisions, and
1719 trust that the caller has saved the revisions that shouldn't be
1720 trust that the caller has saved the revisions that shouldn't be
1720 removed and that it'll re-add them after this truncation.
1721 removed and that it'll re-add them after this truncation.
1721 """
1722 """
1722 if len(self) == 0:
1723 if len(self) == 0:
1723 return
1724 return
1724
1725
1725 rev, _ = self.getstrippoint(minlink)
1726 rev, _ = self.getstrippoint(minlink)
1726 if rev == len(self):
1727 if rev == len(self):
1727 return
1728 return
1728
1729
1729 # first truncate the files on disk
1730 # first truncate the files on disk
1730 end = self.start(rev)
1731 end = self.start(rev)
1731 if not self._inline:
1732 if not self._inline:
1732 transaction.add(self.datafile, end)
1733 transaction.add(self.datafile, end)
1733 end = rev * self._io.size
1734 end = rev * self._io.size
1734 else:
1735 else:
1735 end += rev * self._io.size
1736 end += rev * self._io.size
1736
1737
1737 transaction.add(self.indexfile, end)
1738 transaction.add(self.indexfile, end)
1738
1739
1739 # then reset internal state in memory to forget those revisions
1740 # then reset internal state in memory to forget those revisions
1740 self._cache = None
1741 self._cache = None
1741 self._chaininfocache = {}
1742 self._chaininfocache = {}
1742 self._chunkclear()
1743 self._chunkclear()
1743 for x in xrange(rev, len(self)):
1744 for x in xrange(rev, len(self)):
1744 del self.nodemap[self.node(x)]
1745 del self.nodemap[self.node(x)]
1745
1746
1746 del self.index[rev:-1]
1747 del self.index[rev:-1]
1747
1748
1748 def checksize(self):
1749 def checksize(self):
1749 expected = 0
1750 expected = 0
1750 if len(self):
1751 if len(self):
1751 expected = max(0, self.end(len(self) - 1))
1752 expected = max(0, self.end(len(self) - 1))
1752
1753
1753 try:
1754 try:
1754 f = self.opener(self.datafile)
1755 f = self.opener(self.datafile)
1755 f.seek(0, 2)
1756 f.seek(0, 2)
1756 actual = f.tell()
1757 actual = f.tell()
1757 f.close()
1758 f.close()
1758 dd = actual - expected
1759 dd = actual - expected
1759 except IOError as inst:
1760 except IOError as inst:
1760 if inst.errno != errno.ENOENT:
1761 if inst.errno != errno.ENOENT:
1761 raise
1762 raise
1762 dd = 0
1763 dd = 0
1763
1764
1764 try:
1765 try:
1765 f = self.opener(self.indexfile)
1766 f = self.opener(self.indexfile)
1766 f.seek(0, 2)
1767 f.seek(0, 2)
1767 actual = f.tell()
1768 actual = f.tell()
1768 f.close()
1769 f.close()
1769 s = self._io.size
1770 s = self._io.size
1770 i = max(0, actual // s)
1771 i = max(0, actual // s)
1771 di = actual - (i * s)
1772 di = actual - (i * s)
1772 if self._inline:
1773 if self._inline:
1773 databytes = 0
1774 databytes = 0
1774 for r in self:
1775 for r in self:
1775 databytes += max(0, self.length(r))
1776 databytes += max(0, self.length(r))
1776 dd = 0
1777 dd = 0
1777 di = actual - len(self) * s - databytes
1778 di = actual - len(self) * s - databytes
1778 except IOError as inst:
1779 except IOError as inst:
1779 if inst.errno != errno.ENOENT:
1780 if inst.errno != errno.ENOENT:
1780 raise
1781 raise
1781 di = 0
1782 di = 0
1782
1783
1783 return (dd, di)
1784 return (dd, di)
1784
1785
1785 def files(self):
1786 def files(self):
1786 res = [self.indexfile]
1787 res = [self.indexfile]
1787 if not self._inline:
1788 if not self._inline:
1788 res.append(self.datafile)
1789 res.append(self.datafile)
1789 return res
1790 return res
General Comments 0
You need to be logged in to leave comments. Login now