##// END OF EJS Templates
revlog: add instance variable controlling delta chain use...
Gregory Szorc -
r30154:5e72129d default
parent child Browse files
Show More
@@ -1,1794 +1,1796
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215
215
216 If checkambig, indexfile is opened with checkambig=True at
216 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
217 writing, to avoid file stat ambiguity.
218 """
218 """
219 def __init__(self, opener, indexfile, checkambig=False):
219 def __init__(self, opener, indexfile, checkambig=False):
220 """
220 """
221 create a revlog object
221 create a revlog object
222
222
223 opener is a function that abstracts the file opening operation
223 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
224 and can be used to implement COW semantics or the like.
225 """
225 """
226 self.indexfile = indexfile
226 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
227 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
228 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
229 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
230 # avoid file stat ambiguity.
231 self._checkambig = checkambig
231 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
232 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
233 self._cache = None
234 # Maps rev to chain base rev.
234 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
235 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
237 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
238 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
239 self._chunkcachesize = 65536
240 self._maxchainlen = None
240 self._maxchainlen = None
241 self._aggressivemergedeltas = False
241 self._aggressivemergedeltas = False
242 self.index = []
242 self.index = []
243 # Mapping of partial identifiers to full nodes.
243 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
244 self._pcache = {}
245 # Mapping of revision integer to full node.
245 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
246 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
247 self._nodepos = None
248
248
249 v = REVLOG_DEFAULT_VERSION
249 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
250 opts = getattr(opener, 'options', None)
251 if opts is not None:
251 if opts is not None:
252 if 'revlogv1' in opts:
252 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
253 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
254 v |= REVLOGGENERALDELTA
255 else:
255 else:
256 v = 0
256 v = 0
257 if 'chunkcachesize' in opts:
257 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
258 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
259 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
260 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
261 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
264
265 if self._chunkcachesize <= 0:
265 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
267 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
270 'of 2') % self._chunkcachesize)
271
271
272 indexdata = ''
272 indexdata = ''
273 self._initempty = True
273 self._initempty = True
274 try:
274 try:
275 f = self.opener(self.indexfile)
275 f = self.opener(self.indexfile)
276 indexdata = f.read()
276 indexdata = f.read()
277 f.close()
277 f.close()
278 if len(indexdata) > 0:
278 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
280 self._initempty = False
281 except IOError as inst:
281 except IOError as inst:
282 if inst.errno != errno.ENOENT:
282 if inst.errno != errno.ENOENT:
283 raise
283 raise
284
284
285 self.version = v
285 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
286 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
287 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
288 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
289 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
290 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
292 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
295 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
296 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
297 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
298 % (self.indexfile, fmt))
299
299
300 self._storedeltachains = True
301
300 self._io = revlogio()
302 self._io = revlogio()
301 if self.version == REVLOGV0:
303 if self.version == REVLOGV0:
302 self._io = revlogoldio()
304 self._io = revlogoldio()
303 try:
305 try:
304 d = self._io.parseindex(indexdata, self._inline)
306 d = self._io.parseindex(indexdata, self._inline)
305 except (ValueError, IndexError):
307 except (ValueError, IndexError):
306 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
307 self.index, nodemap, self._chunkcache = d
309 self.index, nodemap, self._chunkcache = d
308 if nodemap is not None:
310 if nodemap is not None:
309 self.nodemap = self._nodecache = nodemap
311 self.nodemap = self._nodecache = nodemap
310 if not self._chunkcache:
312 if not self._chunkcache:
311 self._chunkclear()
313 self._chunkclear()
312 # revnum -> (chain-length, sum-delta-length)
314 # revnum -> (chain-length, sum-delta-length)
313 self._chaininfocache = {}
315 self._chaininfocache = {}
314
316
315 def tip(self):
317 def tip(self):
316 return self.node(len(self.index) - 2)
318 return self.node(len(self.index) - 2)
317 def __contains__(self, rev):
319 def __contains__(self, rev):
318 return 0 <= rev < len(self)
320 return 0 <= rev < len(self)
319 def __len__(self):
321 def __len__(self):
320 return len(self.index) - 1
322 return len(self.index) - 1
321 def __iter__(self):
323 def __iter__(self):
322 return iter(xrange(len(self)))
324 return iter(xrange(len(self)))
323 def revs(self, start=0, stop=None):
325 def revs(self, start=0, stop=None):
324 """iterate over all rev in this revlog (from start to stop)"""
326 """iterate over all rev in this revlog (from start to stop)"""
325 step = 1
327 step = 1
326 if stop is not None:
328 if stop is not None:
327 if start > stop:
329 if start > stop:
328 step = -1
330 step = -1
329 stop += step
331 stop += step
330 else:
332 else:
331 stop = len(self)
333 stop = len(self)
332 return xrange(start, stop, step)
334 return xrange(start, stop, step)
333
335
334 @util.propertycache
336 @util.propertycache
335 def nodemap(self):
337 def nodemap(self):
336 self.rev(self.node(0))
338 self.rev(self.node(0))
337 return self._nodecache
339 return self._nodecache
338
340
339 def hasnode(self, node):
341 def hasnode(self, node):
340 try:
342 try:
341 self.rev(node)
343 self.rev(node)
342 return True
344 return True
343 except KeyError:
345 except KeyError:
344 return False
346 return False
345
347
346 def clearcaches(self):
348 def clearcaches(self):
347 self._cache = None
349 self._cache = None
348 self._chainbasecache.clear()
350 self._chainbasecache.clear()
349 self._chunkcache = (0, '')
351 self._chunkcache = (0, '')
350 self._pcache = {}
352 self._pcache = {}
351
353
352 try:
354 try:
353 self._nodecache.clearcaches()
355 self._nodecache.clearcaches()
354 except AttributeError:
356 except AttributeError:
355 self._nodecache = {nullid: nullrev}
357 self._nodecache = {nullid: nullrev}
356 self._nodepos = None
358 self._nodepos = None
357
359
358 def rev(self, node):
360 def rev(self, node):
359 try:
361 try:
360 return self._nodecache[node]
362 return self._nodecache[node]
361 except TypeError:
363 except TypeError:
362 raise
364 raise
363 except RevlogError:
365 except RevlogError:
364 # parsers.c radix tree lookup failed
366 # parsers.c radix tree lookup failed
365 raise LookupError(node, self.indexfile, _('no node'))
367 raise LookupError(node, self.indexfile, _('no node'))
366 except KeyError:
368 except KeyError:
367 # pure python cache lookup failed
369 # pure python cache lookup failed
368 n = self._nodecache
370 n = self._nodecache
369 i = self.index
371 i = self.index
370 p = self._nodepos
372 p = self._nodepos
371 if p is None:
373 if p is None:
372 p = len(i) - 2
374 p = len(i) - 2
373 for r in xrange(p, -1, -1):
375 for r in xrange(p, -1, -1):
374 v = i[r][7]
376 v = i[r][7]
375 n[v] = r
377 n[v] = r
376 if v == node:
378 if v == node:
377 self._nodepos = r - 1
379 self._nodepos = r - 1
378 return r
380 return r
379 raise LookupError(node, self.indexfile, _('no node'))
381 raise LookupError(node, self.indexfile, _('no node'))
380
382
381 def node(self, rev):
383 def node(self, rev):
382 return self.index[rev][7]
384 return self.index[rev][7]
383 def linkrev(self, rev):
385 def linkrev(self, rev):
384 return self.index[rev][4]
386 return self.index[rev][4]
385 def parents(self, node):
387 def parents(self, node):
386 i = self.index
388 i = self.index
387 d = i[self.rev(node)]
389 d = i[self.rev(node)]
388 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
390 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
389 def parentrevs(self, rev):
391 def parentrevs(self, rev):
390 return self.index[rev][5:7]
392 return self.index[rev][5:7]
391 def start(self, rev):
393 def start(self, rev):
392 return int(self.index[rev][0] >> 16)
394 return int(self.index[rev][0] >> 16)
393 def end(self, rev):
395 def end(self, rev):
394 return self.start(rev) + self.length(rev)
396 return self.start(rev) + self.length(rev)
395 def length(self, rev):
397 def length(self, rev):
396 return self.index[rev][1]
398 return self.index[rev][1]
397 def chainbase(self, rev):
399 def chainbase(self, rev):
398 base = self._chainbasecache.get(rev)
400 base = self._chainbasecache.get(rev)
399 if base is not None:
401 if base is not None:
400 return base
402 return base
401
403
402 index = self.index
404 index = self.index
403 base = index[rev][3]
405 base = index[rev][3]
404 while base != rev:
406 while base != rev:
405 rev = base
407 rev = base
406 base = index[rev][3]
408 base = index[rev][3]
407
409
408 self._chainbasecache[rev] = base
410 self._chainbasecache[rev] = base
409 return base
411 return base
410 def chainlen(self, rev):
412 def chainlen(self, rev):
411 return self._chaininfo(rev)[0]
413 return self._chaininfo(rev)[0]
412
414
413 def _chaininfo(self, rev):
415 def _chaininfo(self, rev):
414 chaininfocache = self._chaininfocache
416 chaininfocache = self._chaininfocache
415 if rev in chaininfocache:
417 if rev in chaininfocache:
416 return chaininfocache[rev]
418 return chaininfocache[rev]
417 index = self.index
419 index = self.index
418 generaldelta = self._generaldelta
420 generaldelta = self._generaldelta
419 iterrev = rev
421 iterrev = rev
420 e = index[iterrev]
422 e = index[iterrev]
421 clen = 0
423 clen = 0
422 compresseddeltalen = 0
424 compresseddeltalen = 0
423 while iterrev != e[3]:
425 while iterrev != e[3]:
424 clen += 1
426 clen += 1
425 compresseddeltalen += e[1]
427 compresseddeltalen += e[1]
426 if generaldelta:
428 if generaldelta:
427 iterrev = e[3]
429 iterrev = e[3]
428 else:
430 else:
429 iterrev -= 1
431 iterrev -= 1
430 if iterrev in chaininfocache:
432 if iterrev in chaininfocache:
431 t = chaininfocache[iterrev]
433 t = chaininfocache[iterrev]
432 clen += t[0]
434 clen += t[0]
433 compresseddeltalen += t[1]
435 compresseddeltalen += t[1]
434 break
436 break
435 e = index[iterrev]
437 e = index[iterrev]
436 else:
438 else:
437 # Add text length of base since decompressing that also takes
439 # Add text length of base since decompressing that also takes
438 # work. For cache hits the length is already included.
440 # work. For cache hits the length is already included.
439 compresseddeltalen += e[1]
441 compresseddeltalen += e[1]
440 r = (clen, compresseddeltalen)
442 r = (clen, compresseddeltalen)
441 chaininfocache[rev] = r
443 chaininfocache[rev] = r
442 return r
444 return r
443
445
444 def _deltachain(self, rev, stoprev=None):
446 def _deltachain(self, rev, stoprev=None):
445 """Obtain the delta chain for a revision.
447 """Obtain the delta chain for a revision.
446
448
447 ``stoprev`` specifies a revision to stop at. If not specified, we
449 ``stoprev`` specifies a revision to stop at. If not specified, we
448 stop at the base of the chain.
450 stop at the base of the chain.
449
451
450 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
452 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
451 revs in ascending order and ``stopped`` is a bool indicating whether
453 revs in ascending order and ``stopped`` is a bool indicating whether
452 ``stoprev`` was hit.
454 ``stoprev`` was hit.
453 """
455 """
454 chain = []
456 chain = []
455
457
456 # Alias to prevent attribute lookup in tight loop.
458 # Alias to prevent attribute lookup in tight loop.
457 index = self.index
459 index = self.index
458 generaldelta = self._generaldelta
460 generaldelta = self._generaldelta
459
461
460 iterrev = rev
462 iterrev = rev
461 e = index[iterrev]
463 e = index[iterrev]
462 while iterrev != e[3] and iterrev != stoprev:
464 while iterrev != e[3] and iterrev != stoprev:
463 chain.append(iterrev)
465 chain.append(iterrev)
464 if generaldelta:
466 if generaldelta:
465 iterrev = e[3]
467 iterrev = e[3]
466 else:
468 else:
467 iterrev -= 1
469 iterrev -= 1
468 e = index[iterrev]
470 e = index[iterrev]
469
471
470 if iterrev == stoprev:
472 if iterrev == stoprev:
471 stopped = True
473 stopped = True
472 else:
474 else:
473 chain.append(iterrev)
475 chain.append(iterrev)
474 stopped = False
476 stopped = False
475
477
476 chain.reverse()
478 chain.reverse()
477 return chain, stopped
479 return chain, stopped
478
480
479 def flags(self, rev):
481 def flags(self, rev):
480 return self.index[rev][0] & 0xFFFF
482 return self.index[rev][0] & 0xFFFF
481 def rawsize(self, rev):
483 def rawsize(self, rev):
482 """return the length of the uncompressed text for a given revision"""
484 """return the length of the uncompressed text for a given revision"""
483 l = self.index[rev][2]
485 l = self.index[rev][2]
484 if l >= 0:
486 if l >= 0:
485 return l
487 return l
486
488
487 t = self.revision(self.node(rev))
489 t = self.revision(self.node(rev))
488 return len(t)
490 return len(t)
489 size = rawsize
491 size = rawsize
490
492
491 def ancestors(self, revs, stoprev=0, inclusive=False):
493 def ancestors(self, revs, stoprev=0, inclusive=False):
492 """Generate the ancestors of 'revs' in reverse topological order.
494 """Generate the ancestors of 'revs' in reverse topological order.
493 Does not generate revs lower than stoprev.
495 Does not generate revs lower than stoprev.
494
496
495 See the documentation for ancestor.lazyancestors for more details."""
497 See the documentation for ancestor.lazyancestors for more details."""
496
498
497 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
499 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
498 inclusive=inclusive)
500 inclusive=inclusive)
499
501
500 def descendants(self, revs):
502 def descendants(self, revs):
501 """Generate the descendants of 'revs' in revision order.
503 """Generate the descendants of 'revs' in revision order.
502
504
503 Yield a sequence of revision numbers starting with a child of
505 Yield a sequence of revision numbers starting with a child of
504 some rev in revs, i.e., each revision is *not* considered a
506 some rev in revs, i.e., each revision is *not* considered a
505 descendant of itself. Results are ordered by revision number (a
507 descendant of itself. Results are ordered by revision number (a
506 topological sort)."""
508 topological sort)."""
507 first = min(revs)
509 first = min(revs)
508 if first == nullrev:
510 if first == nullrev:
509 for i in self:
511 for i in self:
510 yield i
512 yield i
511 return
513 return
512
514
513 seen = set(revs)
515 seen = set(revs)
514 for i in self.revs(start=first + 1):
516 for i in self.revs(start=first + 1):
515 for x in self.parentrevs(i):
517 for x in self.parentrevs(i):
516 if x != nullrev and x in seen:
518 if x != nullrev and x in seen:
517 seen.add(i)
519 seen.add(i)
518 yield i
520 yield i
519 break
521 break
520
522
521 def findcommonmissing(self, common=None, heads=None):
523 def findcommonmissing(self, common=None, heads=None):
522 """Return a tuple of the ancestors of common and the ancestors of heads
524 """Return a tuple of the ancestors of common and the ancestors of heads
523 that are not ancestors of common. In revset terminology, we return the
525 that are not ancestors of common. In revset terminology, we return the
524 tuple:
526 tuple:
525
527
526 ::common, (::heads) - (::common)
528 ::common, (::heads) - (::common)
527
529
528 The list is sorted by revision number, meaning it is
530 The list is sorted by revision number, meaning it is
529 topologically sorted.
531 topologically sorted.
530
532
531 'heads' and 'common' are both lists of node IDs. If heads is
533 'heads' and 'common' are both lists of node IDs. If heads is
532 not supplied, uses all of the revlog's heads. If common is not
534 not supplied, uses all of the revlog's heads. If common is not
533 supplied, uses nullid."""
535 supplied, uses nullid."""
534 if common is None:
536 if common is None:
535 common = [nullid]
537 common = [nullid]
536 if heads is None:
538 if heads is None:
537 heads = self.heads()
539 heads = self.heads()
538
540
539 common = [self.rev(n) for n in common]
541 common = [self.rev(n) for n in common]
540 heads = [self.rev(n) for n in heads]
542 heads = [self.rev(n) for n in heads]
541
543
542 # we want the ancestors, but inclusive
544 # we want the ancestors, but inclusive
543 class lazyset(object):
545 class lazyset(object):
544 def __init__(self, lazyvalues):
546 def __init__(self, lazyvalues):
545 self.addedvalues = set()
547 self.addedvalues = set()
546 self.lazyvalues = lazyvalues
548 self.lazyvalues = lazyvalues
547
549
548 def __contains__(self, value):
550 def __contains__(self, value):
549 return value in self.addedvalues or value in self.lazyvalues
551 return value in self.addedvalues or value in self.lazyvalues
550
552
551 def __iter__(self):
553 def __iter__(self):
552 added = self.addedvalues
554 added = self.addedvalues
553 for r in added:
555 for r in added:
554 yield r
556 yield r
555 for r in self.lazyvalues:
557 for r in self.lazyvalues:
556 if not r in added:
558 if not r in added:
557 yield r
559 yield r
558
560
559 def add(self, value):
561 def add(self, value):
560 self.addedvalues.add(value)
562 self.addedvalues.add(value)
561
563
562 def update(self, values):
564 def update(self, values):
563 self.addedvalues.update(values)
565 self.addedvalues.update(values)
564
566
565 has = lazyset(self.ancestors(common))
567 has = lazyset(self.ancestors(common))
566 has.add(nullrev)
568 has.add(nullrev)
567 has.update(common)
569 has.update(common)
568
570
569 # take all ancestors from heads that aren't in has
571 # take all ancestors from heads that aren't in has
570 missing = set()
572 missing = set()
571 visit = collections.deque(r for r in heads if r not in has)
573 visit = collections.deque(r for r in heads if r not in has)
572 while visit:
574 while visit:
573 r = visit.popleft()
575 r = visit.popleft()
574 if r in missing:
576 if r in missing:
575 continue
577 continue
576 else:
578 else:
577 missing.add(r)
579 missing.add(r)
578 for p in self.parentrevs(r):
580 for p in self.parentrevs(r):
579 if p not in has:
581 if p not in has:
580 visit.append(p)
582 visit.append(p)
581 missing = list(missing)
583 missing = list(missing)
582 missing.sort()
584 missing.sort()
583 return has, [self.node(r) for r in missing]
585 return has, [self.node(r) for r in missing]
584
586
585 def incrementalmissingrevs(self, common=None):
587 def incrementalmissingrevs(self, common=None):
586 """Return an object that can be used to incrementally compute the
588 """Return an object that can be used to incrementally compute the
587 revision numbers of the ancestors of arbitrary sets that are not
589 revision numbers of the ancestors of arbitrary sets that are not
588 ancestors of common. This is an ancestor.incrementalmissingancestors
590 ancestors of common. This is an ancestor.incrementalmissingancestors
589 object.
591 object.
590
592
591 'common' is a list of revision numbers. If common is not supplied, uses
593 'common' is a list of revision numbers. If common is not supplied, uses
592 nullrev.
594 nullrev.
593 """
595 """
594 if common is None:
596 if common is None:
595 common = [nullrev]
597 common = [nullrev]
596
598
597 return ancestor.incrementalmissingancestors(self.parentrevs, common)
599 return ancestor.incrementalmissingancestors(self.parentrevs, common)
598
600
599 def findmissingrevs(self, common=None, heads=None):
601 def findmissingrevs(self, common=None, heads=None):
600 """Return the revision numbers of the ancestors of heads that
602 """Return the revision numbers of the ancestors of heads that
601 are not ancestors of common.
603 are not ancestors of common.
602
604
603 More specifically, return a list of revision numbers corresponding to
605 More specifically, return a list of revision numbers corresponding to
604 nodes N such that every N satisfies the following constraints:
606 nodes N such that every N satisfies the following constraints:
605
607
606 1. N is an ancestor of some node in 'heads'
608 1. N is an ancestor of some node in 'heads'
607 2. N is not an ancestor of any node in 'common'
609 2. N is not an ancestor of any node in 'common'
608
610
609 The list is sorted by revision number, meaning it is
611 The list is sorted by revision number, meaning it is
610 topologically sorted.
612 topologically sorted.
611
613
612 'heads' and 'common' are both lists of revision numbers. If heads is
614 'heads' and 'common' are both lists of revision numbers. If heads is
613 not supplied, uses all of the revlog's heads. If common is not
615 not supplied, uses all of the revlog's heads. If common is not
614 supplied, uses nullid."""
616 supplied, uses nullid."""
615 if common is None:
617 if common is None:
616 common = [nullrev]
618 common = [nullrev]
617 if heads is None:
619 if heads is None:
618 heads = self.headrevs()
620 heads = self.headrevs()
619
621
620 inc = self.incrementalmissingrevs(common=common)
622 inc = self.incrementalmissingrevs(common=common)
621 return inc.missingancestors(heads)
623 return inc.missingancestors(heads)
622
624
623 def findmissing(self, common=None, heads=None):
625 def findmissing(self, common=None, heads=None):
624 """Return the ancestors of heads that are not ancestors of common.
626 """Return the ancestors of heads that are not ancestors of common.
625
627
626 More specifically, return a list of nodes N such that every N
628 More specifically, return a list of nodes N such that every N
627 satisfies the following constraints:
629 satisfies the following constraints:
628
630
629 1. N is an ancestor of some node in 'heads'
631 1. N is an ancestor of some node in 'heads'
630 2. N is not an ancestor of any node in 'common'
632 2. N is not an ancestor of any node in 'common'
631
633
632 The list is sorted by revision number, meaning it is
634 The list is sorted by revision number, meaning it is
633 topologically sorted.
635 topologically sorted.
634
636
635 'heads' and 'common' are both lists of node IDs. If heads is
637 'heads' and 'common' are both lists of node IDs. If heads is
636 not supplied, uses all of the revlog's heads. If common is not
638 not supplied, uses all of the revlog's heads. If common is not
637 supplied, uses nullid."""
639 supplied, uses nullid."""
638 if common is None:
640 if common is None:
639 common = [nullid]
641 common = [nullid]
640 if heads is None:
642 if heads is None:
641 heads = self.heads()
643 heads = self.heads()
642
644
643 common = [self.rev(n) for n in common]
645 common = [self.rev(n) for n in common]
644 heads = [self.rev(n) for n in heads]
646 heads = [self.rev(n) for n in heads]
645
647
646 inc = self.incrementalmissingrevs(common=common)
648 inc = self.incrementalmissingrevs(common=common)
647 return [self.node(r) for r in inc.missingancestors(heads)]
649 return [self.node(r) for r in inc.missingancestors(heads)]
648
650
649 def nodesbetween(self, roots=None, heads=None):
651 def nodesbetween(self, roots=None, heads=None):
650 """Return a topological path from 'roots' to 'heads'.
652 """Return a topological path from 'roots' to 'heads'.
651
653
652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
654 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
653 topologically sorted list of all nodes N that satisfy both of
655 topologically sorted list of all nodes N that satisfy both of
654 these constraints:
656 these constraints:
655
657
656 1. N is a descendant of some node in 'roots'
658 1. N is a descendant of some node in 'roots'
657 2. N is an ancestor of some node in 'heads'
659 2. N is an ancestor of some node in 'heads'
658
660
659 Every node is considered to be both a descendant and an ancestor
661 Every node is considered to be both a descendant and an ancestor
660 of itself, so every reachable node in 'roots' and 'heads' will be
662 of itself, so every reachable node in 'roots' and 'heads' will be
661 included in 'nodes'.
663 included in 'nodes'.
662
664
663 'outroots' is the list of reachable nodes in 'roots', i.e., the
665 'outroots' is the list of reachable nodes in 'roots', i.e., the
664 subset of 'roots' that is returned in 'nodes'. Likewise,
666 subset of 'roots' that is returned in 'nodes'. Likewise,
665 'outheads' is the subset of 'heads' that is also in 'nodes'.
667 'outheads' is the subset of 'heads' that is also in 'nodes'.
666
668
667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
669 'roots' and 'heads' are both lists of node IDs. If 'roots' is
668 unspecified, uses nullid as the only root. If 'heads' is
670 unspecified, uses nullid as the only root. If 'heads' is
669 unspecified, uses list of all of the revlog's heads."""
671 unspecified, uses list of all of the revlog's heads."""
670 nonodes = ([], [], [])
672 nonodes = ([], [], [])
671 if roots is not None:
673 if roots is not None:
672 roots = list(roots)
674 roots = list(roots)
673 if not roots:
675 if not roots:
674 return nonodes
676 return nonodes
675 lowestrev = min([self.rev(n) for n in roots])
677 lowestrev = min([self.rev(n) for n in roots])
676 else:
678 else:
677 roots = [nullid] # Everybody's a descendant of nullid
679 roots = [nullid] # Everybody's a descendant of nullid
678 lowestrev = nullrev
680 lowestrev = nullrev
679 if (lowestrev == nullrev) and (heads is None):
681 if (lowestrev == nullrev) and (heads is None):
680 # We want _all_ the nodes!
682 # We want _all_ the nodes!
681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
683 return ([self.node(r) for r in self], [nullid], list(self.heads()))
682 if heads is None:
684 if heads is None:
683 # All nodes are ancestors, so the latest ancestor is the last
685 # All nodes are ancestors, so the latest ancestor is the last
684 # node.
686 # node.
685 highestrev = len(self) - 1
687 highestrev = len(self) - 1
686 # Set ancestors to None to signal that every node is an ancestor.
688 # Set ancestors to None to signal that every node is an ancestor.
687 ancestors = None
689 ancestors = None
688 # Set heads to an empty dictionary for later discovery of heads
690 # Set heads to an empty dictionary for later discovery of heads
689 heads = {}
691 heads = {}
690 else:
692 else:
691 heads = list(heads)
693 heads = list(heads)
692 if not heads:
694 if not heads:
693 return nonodes
695 return nonodes
694 ancestors = set()
696 ancestors = set()
695 # Turn heads into a dictionary so we can remove 'fake' heads.
697 # Turn heads into a dictionary so we can remove 'fake' heads.
696 # Also, later we will be using it to filter out the heads we can't
698 # Also, later we will be using it to filter out the heads we can't
697 # find from roots.
699 # find from roots.
698 heads = dict.fromkeys(heads, False)
700 heads = dict.fromkeys(heads, False)
699 # Start at the top and keep marking parents until we're done.
701 # Start at the top and keep marking parents until we're done.
700 nodestotag = set(heads)
702 nodestotag = set(heads)
701 # Remember where the top was so we can use it as a limit later.
703 # Remember where the top was so we can use it as a limit later.
702 highestrev = max([self.rev(n) for n in nodestotag])
704 highestrev = max([self.rev(n) for n in nodestotag])
703 while nodestotag:
705 while nodestotag:
704 # grab a node to tag
706 # grab a node to tag
705 n = nodestotag.pop()
707 n = nodestotag.pop()
706 # Never tag nullid
708 # Never tag nullid
707 if n == nullid:
709 if n == nullid:
708 continue
710 continue
709 # A node's revision number represents its place in a
711 # A node's revision number represents its place in a
710 # topologically sorted list of nodes.
712 # topologically sorted list of nodes.
711 r = self.rev(n)
713 r = self.rev(n)
712 if r >= lowestrev:
714 if r >= lowestrev:
713 if n not in ancestors:
715 if n not in ancestors:
714 # If we are possibly a descendant of one of the roots
716 # If we are possibly a descendant of one of the roots
715 # and we haven't already been marked as an ancestor
717 # and we haven't already been marked as an ancestor
716 ancestors.add(n) # Mark as ancestor
718 ancestors.add(n) # Mark as ancestor
717 # Add non-nullid parents to list of nodes to tag.
719 # Add non-nullid parents to list of nodes to tag.
718 nodestotag.update([p for p in self.parents(n) if
720 nodestotag.update([p for p in self.parents(n) if
719 p != nullid])
721 p != nullid])
720 elif n in heads: # We've seen it before, is it a fake head?
722 elif n in heads: # We've seen it before, is it a fake head?
721 # So it is, real heads should not be the ancestors of
723 # So it is, real heads should not be the ancestors of
722 # any other heads.
724 # any other heads.
723 heads.pop(n)
725 heads.pop(n)
724 if not ancestors:
726 if not ancestors:
725 return nonodes
727 return nonodes
726 # Now that we have our set of ancestors, we want to remove any
728 # Now that we have our set of ancestors, we want to remove any
727 # roots that are not ancestors.
729 # roots that are not ancestors.
728
730
729 # If one of the roots was nullid, everything is included anyway.
731 # If one of the roots was nullid, everything is included anyway.
730 if lowestrev > nullrev:
732 if lowestrev > nullrev:
731 # But, since we weren't, let's recompute the lowest rev to not
733 # But, since we weren't, let's recompute the lowest rev to not
732 # include roots that aren't ancestors.
734 # include roots that aren't ancestors.
733
735
734 # Filter out roots that aren't ancestors of heads
736 # Filter out roots that aren't ancestors of heads
735 roots = [n for n in roots if n in ancestors]
737 roots = [n for n in roots if n in ancestors]
736 # Recompute the lowest revision
738 # Recompute the lowest revision
737 if roots:
739 if roots:
738 lowestrev = min([self.rev(n) for n in roots])
740 lowestrev = min([self.rev(n) for n in roots])
739 else:
741 else:
740 # No more roots? Return empty list
742 # No more roots? Return empty list
741 return nonodes
743 return nonodes
742 else:
744 else:
743 # We are descending from nullid, and don't need to care about
745 # We are descending from nullid, and don't need to care about
744 # any other roots.
746 # any other roots.
745 lowestrev = nullrev
747 lowestrev = nullrev
746 roots = [nullid]
748 roots = [nullid]
747 # Transform our roots list into a set.
749 # Transform our roots list into a set.
748 descendants = set(roots)
750 descendants = set(roots)
749 # Also, keep the original roots so we can filter out roots that aren't
751 # Also, keep the original roots so we can filter out roots that aren't
750 # 'real' roots (i.e. are descended from other roots).
752 # 'real' roots (i.e. are descended from other roots).
751 roots = descendants.copy()
753 roots = descendants.copy()
752 # Our topologically sorted list of output nodes.
754 # Our topologically sorted list of output nodes.
753 orderedout = []
755 orderedout = []
754 # Don't start at nullid since we don't want nullid in our output list,
756 # Don't start at nullid since we don't want nullid in our output list,
755 # and if nullid shows up in descendants, empty parents will look like
757 # and if nullid shows up in descendants, empty parents will look like
756 # they're descendants.
758 # they're descendants.
757 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
759 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
758 n = self.node(r)
760 n = self.node(r)
759 isdescendant = False
761 isdescendant = False
760 if lowestrev == nullrev: # Everybody is a descendant of nullid
762 if lowestrev == nullrev: # Everybody is a descendant of nullid
761 isdescendant = True
763 isdescendant = True
762 elif n in descendants:
764 elif n in descendants:
763 # n is already a descendant
765 # n is already a descendant
764 isdescendant = True
766 isdescendant = True
765 # This check only needs to be done here because all the roots
767 # This check only needs to be done here because all the roots
766 # will start being marked is descendants before the loop.
768 # will start being marked is descendants before the loop.
767 if n in roots:
769 if n in roots:
768 # If n was a root, check if it's a 'real' root.
770 # If n was a root, check if it's a 'real' root.
769 p = tuple(self.parents(n))
771 p = tuple(self.parents(n))
770 # If any of its parents are descendants, it's not a root.
772 # If any of its parents are descendants, it's not a root.
771 if (p[0] in descendants) or (p[1] in descendants):
773 if (p[0] in descendants) or (p[1] in descendants):
772 roots.remove(n)
774 roots.remove(n)
773 else:
775 else:
774 p = tuple(self.parents(n))
776 p = tuple(self.parents(n))
775 # A node is a descendant if either of its parents are
777 # A node is a descendant if either of its parents are
776 # descendants. (We seeded the dependents list with the roots
778 # descendants. (We seeded the dependents list with the roots
777 # up there, remember?)
779 # up there, remember?)
778 if (p[0] in descendants) or (p[1] in descendants):
780 if (p[0] in descendants) or (p[1] in descendants):
779 descendants.add(n)
781 descendants.add(n)
780 isdescendant = True
782 isdescendant = True
781 if isdescendant and ((ancestors is None) or (n in ancestors)):
783 if isdescendant and ((ancestors is None) or (n in ancestors)):
782 # Only include nodes that are both descendants and ancestors.
784 # Only include nodes that are both descendants and ancestors.
783 orderedout.append(n)
785 orderedout.append(n)
784 if (ancestors is not None) and (n in heads):
786 if (ancestors is not None) and (n in heads):
785 # We're trying to figure out which heads are reachable
787 # We're trying to figure out which heads are reachable
786 # from roots.
788 # from roots.
787 # Mark this head as having been reached
789 # Mark this head as having been reached
788 heads[n] = True
790 heads[n] = True
789 elif ancestors is None:
791 elif ancestors is None:
790 # Otherwise, we're trying to discover the heads.
792 # Otherwise, we're trying to discover the heads.
791 # Assume this is a head because if it isn't, the next step
793 # Assume this is a head because if it isn't, the next step
792 # will eventually remove it.
794 # will eventually remove it.
793 heads[n] = True
795 heads[n] = True
794 # But, obviously its parents aren't.
796 # But, obviously its parents aren't.
795 for p in self.parents(n):
797 for p in self.parents(n):
796 heads.pop(p, None)
798 heads.pop(p, None)
797 heads = [n for n, flag in heads.iteritems() if flag]
799 heads = [n for n, flag in heads.iteritems() if flag]
798 roots = list(roots)
800 roots = list(roots)
799 assert orderedout
801 assert orderedout
800 assert roots
802 assert roots
801 assert heads
803 assert heads
802 return (orderedout, roots, heads)
804 return (orderedout, roots, heads)
803
805
804 def headrevs(self):
806 def headrevs(self):
805 try:
807 try:
806 return self.index.headrevs()
808 return self.index.headrevs()
807 except AttributeError:
809 except AttributeError:
808 return self._headrevs()
810 return self._headrevs()
809
811
810 def computephases(self, roots):
812 def computephases(self, roots):
811 return self.index.computephasesmapsets(roots)
813 return self.index.computephasesmapsets(roots)
812
814
813 def _headrevs(self):
815 def _headrevs(self):
814 count = len(self)
816 count = len(self)
815 if not count:
817 if not count:
816 return [nullrev]
818 return [nullrev]
817 # we won't iter over filtered rev so nobody is a head at start
819 # we won't iter over filtered rev so nobody is a head at start
818 ishead = [0] * (count + 1)
820 ishead = [0] * (count + 1)
819 index = self.index
821 index = self.index
820 for r in self:
822 for r in self:
821 ishead[r] = 1 # I may be an head
823 ishead[r] = 1 # I may be an head
822 e = index[r]
824 e = index[r]
823 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
825 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
824 return [r for r, val in enumerate(ishead) if val]
826 return [r for r, val in enumerate(ishead) if val]
825
827
826 def heads(self, start=None, stop=None):
828 def heads(self, start=None, stop=None):
827 """return the list of all nodes that have no children
829 """return the list of all nodes that have no children
828
830
829 if start is specified, only heads that are descendants of
831 if start is specified, only heads that are descendants of
830 start will be returned
832 start will be returned
831 if stop is specified, it will consider all the revs from stop
833 if stop is specified, it will consider all the revs from stop
832 as if they had no children
834 as if they had no children
833 """
835 """
834 if start is None and stop is None:
836 if start is None and stop is None:
835 if not len(self):
837 if not len(self):
836 return [nullid]
838 return [nullid]
837 return [self.node(r) for r in self.headrevs()]
839 return [self.node(r) for r in self.headrevs()]
838
840
839 if start is None:
841 if start is None:
840 start = nullid
842 start = nullid
841 if stop is None:
843 if stop is None:
842 stop = []
844 stop = []
843 stoprevs = set([self.rev(n) for n in stop])
845 stoprevs = set([self.rev(n) for n in stop])
844 startrev = self.rev(start)
846 startrev = self.rev(start)
845 reachable = set((startrev,))
847 reachable = set((startrev,))
846 heads = set((startrev,))
848 heads = set((startrev,))
847
849
848 parentrevs = self.parentrevs
850 parentrevs = self.parentrevs
849 for r in self.revs(start=startrev + 1):
851 for r in self.revs(start=startrev + 1):
850 for p in parentrevs(r):
852 for p in parentrevs(r):
851 if p in reachable:
853 if p in reachable:
852 if r not in stoprevs:
854 if r not in stoprevs:
853 reachable.add(r)
855 reachable.add(r)
854 heads.add(r)
856 heads.add(r)
855 if p in heads and p not in stoprevs:
857 if p in heads and p not in stoprevs:
856 heads.remove(p)
858 heads.remove(p)
857
859
858 return [self.node(r) for r in heads]
860 return [self.node(r) for r in heads]
859
861
860 def children(self, node):
862 def children(self, node):
861 """find the children of a given node"""
863 """find the children of a given node"""
862 c = []
864 c = []
863 p = self.rev(node)
865 p = self.rev(node)
864 for r in self.revs(start=p + 1):
866 for r in self.revs(start=p + 1):
865 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
867 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
866 if prevs:
868 if prevs:
867 for pr in prevs:
869 for pr in prevs:
868 if pr == p:
870 if pr == p:
869 c.append(self.node(r))
871 c.append(self.node(r))
870 elif p == nullrev:
872 elif p == nullrev:
871 c.append(self.node(r))
873 c.append(self.node(r))
872 return c
874 return c
873
875
874 def descendant(self, start, end):
876 def descendant(self, start, end):
875 if start == nullrev:
877 if start == nullrev:
876 return True
878 return True
877 for i in self.descendants([start]):
879 for i in self.descendants([start]):
878 if i == end:
880 if i == end:
879 return True
881 return True
880 elif i > end:
882 elif i > end:
881 break
883 break
882 return False
884 return False
883
885
884 def commonancestorsheads(self, a, b):
886 def commonancestorsheads(self, a, b):
885 """calculate all the heads of the common ancestors of nodes a and b"""
887 """calculate all the heads of the common ancestors of nodes a and b"""
886 a, b = self.rev(a), self.rev(b)
888 a, b = self.rev(a), self.rev(b)
887 try:
889 try:
888 ancs = self.index.commonancestorsheads(a, b)
890 ancs = self.index.commonancestorsheads(a, b)
889 except (AttributeError, OverflowError): # C implementation failed
891 except (AttributeError, OverflowError): # C implementation failed
890 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
892 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
891 return map(self.node, ancs)
893 return map(self.node, ancs)
892
894
893 def isancestor(self, a, b):
895 def isancestor(self, a, b):
894 """return True if node a is an ancestor of node b
896 """return True if node a is an ancestor of node b
895
897
896 The implementation of this is trivial but the use of
898 The implementation of this is trivial but the use of
897 commonancestorsheads is not."""
899 commonancestorsheads is not."""
898 return a in self.commonancestorsheads(a, b)
900 return a in self.commonancestorsheads(a, b)
899
901
900 def ancestor(self, a, b):
902 def ancestor(self, a, b):
901 """calculate the "best" common ancestor of nodes a and b"""
903 """calculate the "best" common ancestor of nodes a and b"""
902
904
903 a, b = self.rev(a), self.rev(b)
905 a, b = self.rev(a), self.rev(b)
904 try:
906 try:
905 ancs = self.index.ancestors(a, b)
907 ancs = self.index.ancestors(a, b)
906 except (AttributeError, OverflowError):
908 except (AttributeError, OverflowError):
907 ancs = ancestor.ancestors(self.parentrevs, a, b)
909 ancs = ancestor.ancestors(self.parentrevs, a, b)
908 if ancs:
910 if ancs:
909 # choose a consistent winner when there's a tie
911 # choose a consistent winner when there's a tie
910 return min(map(self.node, ancs))
912 return min(map(self.node, ancs))
911 return nullid
913 return nullid
912
914
913 def _match(self, id):
915 def _match(self, id):
914 if isinstance(id, int):
916 if isinstance(id, int):
915 # rev
917 # rev
916 return self.node(id)
918 return self.node(id)
917 if len(id) == 20:
919 if len(id) == 20:
918 # possibly a binary node
920 # possibly a binary node
919 # odds of a binary node being all hex in ASCII are 1 in 10**25
921 # odds of a binary node being all hex in ASCII are 1 in 10**25
920 try:
922 try:
921 node = id
923 node = id
922 self.rev(node) # quick search the index
924 self.rev(node) # quick search the index
923 return node
925 return node
924 except LookupError:
926 except LookupError:
925 pass # may be partial hex id
927 pass # may be partial hex id
926 try:
928 try:
927 # str(rev)
929 # str(rev)
928 rev = int(id)
930 rev = int(id)
929 if str(rev) != id:
931 if str(rev) != id:
930 raise ValueError
932 raise ValueError
931 if rev < 0:
933 if rev < 0:
932 rev = len(self) + rev
934 rev = len(self) + rev
933 if rev < 0 or rev >= len(self):
935 if rev < 0 or rev >= len(self):
934 raise ValueError
936 raise ValueError
935 return self.node(rev)
937 return self.node(rev)
936 except (ValueError, OverflowError):
938 except (ValueError, OverflowError):
937 pass
939 pass
938 if len(id) == 40:
940 if len(id) == 40:
939 try:
941 try:
940 # a full hex nodeid?
942 # a full hex nodeid?
941 node = bin(id)
943 node = bin(id)
942 self.rev(node)
944 self.rev(node)
943 return node
945 return node
944 except (TypeError, LookupError):
946 except (TypeError, LookupError):
945 pass
947 pass
946
948
947 def _partialmatch(self, id):
949 def _partialmatch(self, id):
948 try:
950 try:
949 n = self.index.partialmatch(id)
951 n = self.index.partialmatch(id)
950 if n and self.hasnode(n):
952 if n and self.hasnode(n):
951 return n
953 return n
952 return None
954 return None
953 except RevlogError:
955 except RevlogError:
954 # parsers.c radix tree lookup gave multiple matches
956 # parsers.c radix tree lookup gave multiple matches
955 # fast path: for unfiltered changelog, radix tree is accurate
957 # fast path: for unfiltered changelog, radix tree is accurate
956 if not getattr(self, 'filteredrevs', None):
958 if not getattr(self, 'filteredrevs', None):
957 raise LookupError(id, self.indexfile,
959 raise LookupError(id, self.indexfile,
958 _('ambiguous identifier'))
960 _('ambiguous identifier'))
959 # fall through to slow path that filters hidden revisions
961 # fall through to slow path that filters hidden revisions
960 except (AttributeError, ValueError):
962 except (AttributeError, ValueError):
961 # we are pure python, or key was too short to search radix tree
963 # we are pure python, or key was too short to search radix tree
962 pass
964 pass
963
965
964 if id in self._pcache:
966 if id in self._pcache:
965 return self._pcache[id]
967 return self._pcache[id]
966
968
967 if len(id) < 40:
969 if len(id) < 40:
968 try:
970 try:
969 # hex(node)[:...]
971 # hex(node)[:...]
970 l = len(id) // 2 # grab an even number of digits
972 l = len(id) // 2 # grab an even number of digits
971 prefix = bin(id[:l * 2])
973 prefix = bin(id[:l * 2])
972 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
974 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
973 nl = [n for n in nl if hex(n).startswith(id) and
975 nl = [n for n in nl if hex(n).startswith(id) and
974 self.hasnode(n)]
976 self.hasnode(n)]
975 if len(nl) > 0:
977 if len(nl) > 0:
976 if len(nl) == 1:
978 if len(nl) == 1:
977 self._pcache[id] = nl[0]
979 self._pcache[id] = nl[0]
978 return nl[0]
980 return nl[0]
979 raise LookupError(id, self.indexfile,
981 raise LookupError(id, self.indexfile,
980 _('ambiguous identifier'))
982 _('ambiguous identifier'))
981 return None
983 return None
982 except TypeError:
984 except TypeError:
983 pass
985 pass
984
986
985 def lookup(self, id):
987 def lookup(self, id):
986 """locate a node based on:
988 """locate a node based on:
987 - revision number or str(revision number)
989 - revision number or str(revision number)
988 - nodeid or subset of hex nodeid
990 - nodeid or subset of hex nodeid
989 """
991 """
990 n = self._match(id)
992 n = self._match(id)
991 if n is not None:
993 if n is not None:
992 return n
994 return n
993 n = self._partialmatch(id)
995 n = self._partialmatch(id)
994 if n:
996 if n:
995 return n
997 return n
996
998
997 raise LookupError(id, self.indexfile, _('no match found'))
999 raise LookupError(id, self.indexfile, _('no match found'))
998
1000
999 def cmp(self, node, text):
1001 def cmp(self, node, text):
1000 """compare text with a given file revision
1002 """compare text with a given file revision
1001
1003
1002 returns True if text is different than what is stored.
1004 returns True if text is different than what is stored.
1003 """
1005 """
1004 p1, p2 = self.parents(node)
1006 p1, p2 = self.parents(node)
1005 return hash(text, p1, p2) != node
1007 return hash(text, p1, p2) != node
1006
1008
1007 def _addchunk(self, offset, data):
1009 def _addchunk(self, offset, data):
1008 """Add a segment to the revlog cache.
1010 """Add a segment to the revlog cache.
1009
1011
1010 Accepts an absolute offset and the data that is at that location.
1012 Accepts an absolute offset and the data that is at that location.
1011 """
1013 """
1012 o, d = self._chunkcache
1014 o, d = self._chunkcache
1013 # try to add to existing cache
1015 # try to add to existing cache
1014 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1016 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1015 self._chunkcache = o, d + data
1017 self._chunkcache = o, d + data
1016 else:
1018 else:
1017 self._chunkcache = offset, data
1019 self._chunkcache = offset, data
1018
1020
1019 def _loadchunk(self, offset, length, df=None):
1021 def _loadchunk(self, offset, length, df=None):
1020 """Load a segment of raw data from the revlog.
1022 """Load a segment of raw data from the revlog.
1021
1023
1022 Accepts an absolute offset, length to read, and an optional existing
1024 Accepts an absolute offset, length to read, and an optional existing
1023 file handle to read from.
1025 file handle to read from.
1024
1026
1025 If an existing file handle is passed, it will be seeked and the
1027 If an existing file handle is passed, it will be seeked and the
1026 original seek position will NOT be restored.
1028 original seek position will NOT be restored.
1027
1029
1028 Returns a str or buffer of raw byte data.
1030 Returns a str or buffer of raw byte data.
1029 """
1031 """
1030 if df is not None:
1032 if df is not None:
1031 closehandle = False
1033 closehandle = False
1032 else:
1034 else:
1033 if self._inline:
1035 if self._inline:
1034 df = self.opener(self.indexfile)
1036 df = self.opener(self.indexfile)
1035 else:
1037 else:
1036 df = self.opener(self.datafile)
1038 df = self.opener(self.datafile)
1037 closehandle = True
1039 closehandle = True
1038
1040
1039 # Cache data both forward and backward around the requested
1041 # Cache data both forward and backward around the requested
1040 # data, in a fixed size window. This helps speed up operations
1042 # data, in a fixed size window. This helps speed up operations
1041 # involving reading the revlog backwards.
1043 # involving reading the revlog backwards.
1042 cachesize = self._chunkcachesize
1044 cachesize = self._chunkcachesize
1043 realoffset = offset & ~(cachesize - 1)
1045 realoffset = offset & ~(cachesize - 1)
1044 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1046 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1045 - realoffset)
1047 - realoffset)
1046 df.seek(realoffset)
1048 df.seek(realoffset)
1047 d = df.read(reallength)
1049 d = df.read(reallength)
1048 if closehandle:
1050 if closehandle:
1049 df.close()
1051 df.close()
1050 self._addchunk(realoffset, d)
1052 self._addchunk(realoffset, d)
1051 if offset != realoffset or reallength != length:
1053 if offset != realoffset or reallength != length:
1052 return util.buffer(d, offset - realoffset, length)
1054 return util.buffer(d, offset - realoffset, length)
1053 return d
1055 return d
1054
1056
1055 def _getchunk(self, offset, length, df=None):
1057 def _getchunk(self, offset, length, df=None):
1056 """Obtain a segment of raw data from the revlog.
1058 """Obtain a segment of raw data from the revlog.
1057
1059
1058 Accepts an absolute offset, length of bytes to obtain, and an
1060 Accepts an absolute offset, length of bytes to obtain, and an
1059 optional file handle to the already-opened revlog. If the file
1061 optional file handle to the already-opened revlog. If the file
1060 handle is used, it's original seek position will not be preserved.
1062 handle is used, it's original seek position will not be preserved.
1061
1063
1062 Requests for data may be returned from a cache.
1064 Requests for data may be returned from a cache.
1063
1065
1064 Returns a str or a buffer instance of raw byte data.
1066 Returns a str or a buffer instance of raw byte data.
1065 """
1067 """
1066 o, d = self._chunkcache
1068 o, d = self._chunkcache
1067 l = len(d)
1069 l = len(d)
1068
1070
1069 # is it in the cache?
1071 # is it in the cache?
1070 cachestart = offset - o
1072 cachestart = offset - o
1071 cacheend = cachestart + length
1073 cacheend = cachestart + length
1072 if cachestart >= 0 and cacheend <= l:
1074 if cachestart >= 0 and cacheend <= l:
1073 if cachestart == 0 and cacheend == l:
1075 if cachestart == 0 and cacheend == l:
1074 return d # avoid a copy
1076 return d # avoid a copy
1075 return util.buffer(d, cachestart, cacheend - cachestart)
1077 return util.buffer(d, cachestart, cacheend - cachestart)
1076
1078
1077 return self._loadchunk(offset, length, df=df)
1079 return self._loadchunk(offset, length, df=df)
1078
1080
1079 def _chunkraw(self, startrev, endrev, df=None):
1081 def _chunkraw(self, startrev, endrev, df=None):
1080 """Obtain a segment of raw data corresponding to a range of revisions.
1082 """Obtain a segment of raw data corresponding to a range of revisions.
1081
1083
1082 Accepts the start and end revisions and an optional already-open
1084 Accepts the start and end revisions and an optional already-open
1083 file handle to be used for reading. If the file handle is read, its
1085 file handle to be used for reading. If the file handle is read, its
1084 seek position will not be preserved.
1086 seek position will not be preserved.
1085
1087
1086 Requests for data may be satisfied by a cache.
1088 Requests for data may be satisfied by a cache.
1087
1089
1088 Returns a 2-tuple of (offset, data) for the requested range of
1090 Returns a 2-tuple of (offset, data) for the requested range of
1089 revisions. Offset is the integer offset from the beginning of the
1091 revisions. Offset is the integer offset from the beginning of the
1090 revlog and data is a str or buffer of the raw byte data.
1092 revlog and data is a str or buffer of the raw byte data.
1091
1093
1092 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1094 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1093 to determine where each revision's data begins and ends.
1095 to determine where each revision's data begins and ends.
1094 """
1096 """
1095 start = self.start(startrev)
1097 start = self.start(startrev)
1096 end = self.end(endrev)
1098 end = self.end(endrev)
1097 if self._inline:
1099 if self._inline:
1098 start += (startrev + 1) * self._io.size
1100 start += (startrev + 1) * self._io.size
1099 end += (endrev + 1) * self._io.size
1101 end += (endrev + 1) * self._io.size
1100 length = end - start
1102 length = end - start
1101
1103
1102 return start, self._getchunk(start, length, df=df)
1104 return start, self._getchunk(start, length, df=df)
1103
1105
1104 def _chunk(self, rev, df=None):
1106 def _chunk(self, rev, df=None):
1105 """Obtain a single decompressed chunk for a revision.
1107 """Obtain a single decompressed chunk for a revision.
1106
1108
1107 Accepts an integer revision and an optional already-open file handle
1109 Accepts an integer revision and an optional already-open file handle
1108 to be used for reading. If used, the seek position of the file will not
1110 to be used for reading. If used, the seek position of the file will not
1109 be preserved.
1111 be preserved.
1110
1112
1111 Returns a str holding uncompressed data for the requested revision.
1113 Returns a str holding uncompressed data for the requested revision.
1112 """
1114 """
1113 return decompress(self._chunkraw(rev, rev, df=df)[1])
1115 return decompress(self._chunkraw(rev, rev, df=df)[1])
1114
1116
1115 def _chunks(self, revs, df=None):
1117 def _chunks(self, revs, df=None):
1116 """Obtain decompressed chunks for the specified revisions.
1118 """Obtain decompressed chunks for the specified revisions.
1117
1119
1118 Accepts an iterable of numeric revisions that are assumed to be in
1120 Accepts an iterable of numeric revisions that are assumed to be in
1119 ascending order. Also accepts an optional already-open file handle
1121 ascending order. Also accepts an optional already-open file handle
1120 to be used for reading. If used, the seek position of the file will
1122 to be used for reading. If used, the seek position of the file will
1121 not be preserved.
1123 not be preserved.
1122
1124
1123 This function is similar to calling ``self._chunk()`` multiple times,
1125 This function is similar to calling ``self._chunk()`` multiple times,
1124 but is faster.
1126 but is faster.
1125
1127
1126 Returns a list with decompressed data for each requested revision.
1128 Returns a list with decompressed data for each requested revision.
1127 """
1129 """
1128 if not revs:
1130 if not revs:
1129 return []
1131 return []
1130 start = self.start
1132 start = self.start
1131 length = self.length
1133 length = self.length
1132 inline = self._inline
1134 inline = self._inline
1133 iosize = self._io.size
1135 iosize = self._io.size
1134 buffer = util.buffer
1136 buffer = util.buffer
1135
1137
1136 l = []
1138 l = []
1137 ladd = l.append
1139 ladd = l.append
1138
1140
1139 try:
1141 try:
1140 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1142 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1141 except OverflowError:
1143 except OverflowError:
1142 # issue4215 - we can't cache a run of chunks greater than
1144 # issue4215 - we can't cache a run of chunks greater than
1143 # 2G on Windows
1145 # 2G on Windows
1144 return [self._chunk(rev, df=df) for rev in revs]
1146 return [self._chunk(rev, df=df) for rev in revs]
1145
1147
1146 for rev in revs:
1148 for rev in revs:
1147 chunkstart = start(rev)
1149 chunkstart = start(rev)
1148 if inline:
1150 if inline:
1149 chunkstart += (rev + 1) * iosize
1151 chunkstart += (rev + 1) * iosize
1150 chunklength = length(rev)
1152 chunklength = length(rev)
1151 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1153 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1152
1154
1153 return l
1155 return l
1154
1156
1155 def _chunkclear(self):
1157 def _chunkclear(self):
1156 """Clear the raw chunk cache."""
1158 """Clear the raw chunk cache."""
1157 self._chunkcache = (0, '')
1159 self._chunkcache = (0, '')
1158
1160
1159 def deltaparent(self, rev):
1161 def deltaparent(self, rev):
1160 """return deltaparent of the given revision"""
1162 """return deltaparent of the given revision"""
1161 base = self.index[rev][3]
1163 base = self.index[rev][3]
1162 if base == rev:
1164 if base == rev:
1163 return nullrev
1165 return nullrev
1164 elif self._generaldelta:
1166 elif self._generaldelta:
1165 return base
1167 return base
1166 else:
1168 else:
1167 return rev - 1
1169 return rev - 1
1168
1170
1169 def revdiff(self, rev1, rev2):
1171 def revdiff(self, rev1, rev2):
1170 """return or calculate a delta between two revisions"""
1172 """return or calculate a delta between two revisions"""
1171 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1173 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1172 return str(self._chunk(rev2))
1174 return str(self._chunk(rev2))
1173
1175
1174 return mdiff.textdiff(self.revision(rev1),
1176 return mdiff.textdiff(self.revision(rev1),
1175 self.revision(rev2))
1177 self.revision(rev2))
1176
1178
1177 def revision(self, nodeorrev, _df=None):
1179 def revision(self, nodeorrev, _df=None):
1178 """return an uncompressed revision of a given node or revision
1180 """return an uncompressed revision of a given node or revision
1179 number.
1181 number.
1180
1182
1181 _df is an existing file handle to read from. It is meant to only be
1183 _df is an existing file handle to read from. It is meant to only be
1182 used internally.
1184 used internally.
1183 """
1185 """
1184 if isinstance(nodeorrev, int):
1186 if isinstance(nodeorrev, int):
1185 rev = nodeorrev
1187 rev = nodeorrev
1186 node = self.node(rev)
1188 node = self.node(rev)
1187 else:
1189 else:
1188 node = nodeorrev
1190 node = nodeorrev
1189 rev = None
1191 rev = None
1190
1192
1191 cachedrev = None
1193 cachedrev = None
1192 if node == nullid:
1194 if node == nullid:
1193 return ""
1195 return ""
1194 if self._cache:
1196 if self._cache:
1195 if self._cache[0] == node:
1197 if self._cache[0] == node:
1196 return self._cache[2]
1198 return self._cache[2]
1197 cachedrev = self._cache[1]
1199 cachedrev = self._cache[1]
1198
1200
1199 # look up what we need to read
1201 # look up what we need to read
1200 text = None
1202 text = None
1201 if rev is None:
1203 if rev is None:
1202 rev = self.rev(node)
1204 rev = self.rev(node)
1203
1205
1204 # check rev flags
1206 # check rev flags
1205 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1207 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1206 raise RevlogError(_('incompatible revision flag %x') %
1208 raise RevlogError(_('incompatible revision flag %x') %
1207 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1209 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1208
1210
1209 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1211 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1210 if stopped:
1212 if stopped:
1211 text = self._cache[2]
1213 text = self._cache[2]
1212
1214
1213 # drop cache to save memory
1215 # drop cache to save memory
1214 self._cache = None
1216 self._cache = None
1215
1217
1216 bins = self._chunks(chain, df=_df)
1218 bins = self._chunks(chain, df=_df)
1217 if text is None:
1219 if text is None:
1218 text = str(bins[0])
1220 text = str(bins[0])
1219 bins = bins[1:]
1221 bins = bins[1:]
1220
1222
1221 text = mdiff.patches(text, bins)
1223 text = mdiff.patches(text, bins)
1222
1224
1223 text = self._checkhash(text, node, rev)
1225 text = self._checkhash(text, node, rev)
1224
1226
1225 self._cache = (node, rev, text)
1227 self._cache = (node, rev, text)
1226 return text
1228 return text
1227
1229
1228 def hash(self, text, p1, p2):
1230 def hash(self, text, p1, p2):
1229 """Compute a node hash.
1231 """Compute a node hash.
1230
1232
1231 Available as a function so that subclasses can replace the hash
1233 Available as a function so that subclasses can replace the hash
1232 as needed.
1234 as needed.
1233 """
1235 """
1234 return hash(text, p1, p2)
1236 return hash(text, p1, p2)
1235
1237
1236 def _checkhash(self, text, node, rev):
1238 def _checkhash(self, text, node, rev):
1237 p1, p2 = self.parents(node)
1239 p1, p2 = self.parents(node)
1238 self.checkhash(text, p1, p2, node, rev)
1240 self.checkhash(text, p1, p2, node, rev)
1239 return text
1241 return text
1240
1242
1241 def checkhash(self, text, p1, p2, node, rev=None):
1243 def checkhash(self, text, p1, p2, node, rev=None):
1242 if node != self.hash(text, p1, p2):
1244 if node != self.hash(text, p1, p2):
1243 revornode = rev
1245 revornode = rev
1244 if revornode is None:
1246 if revornode is None:
1245 revornode = templatefilters.short(hex(node))
1247 revornode = templatefilters.short(hex(node))
1246 raise RevlogError(_("integrity check failed on %s:%s")
1248 raise RevlogError(_("integrity check failed on %s:%s")
1247 % (self.indexfile, revornode))
1249 % (self.indexfile, revornode))
1248
1250
1249 def checkinlinesize(self, tr, fp=None):
1251 def checkinlinesize(self, tr, fp=None):
1250 """Check if the revlog is too big for inline and convert if so.
1252 """Check if the revlog is too big for inline and convert if so.
1251
1253
1252 This should be called after revisions are added to the revlog. If the
1254 This should be called after revisions are added to the revlog. If the
1253 revlog has grown too large to be an inline revlog, it will convert it
1255 revlog has grown too large to be an inline revlog, it will convert it
1254 to use multiple index and data files.
1256 to use multiple index and data files.
1255 """
1257 """
1256 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1258 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1257 return
1259 return
1258
1260
1259 trinfo = tr.find(self.indexfile)
1261 trinfo = tr.find(self.indexfile)
1260 if trinfo is None:
1262 if trinfo is None:
1261 raise RevlogError(_("%s not found in the transaction")
1263 raise RevlogError(_("%s not found in the transaction")
1262 % self.indexfile)
1264 % self.indexfile)
1263
1265
1264 trindex = trinfo[2]
1266 trindex = trinfo[2]
1265 if trindex is not None:
1267 if trindex is not None:
1266 dataoff = self.start(trindex)
1268 dataoff = self.start(trindex)
1267 else:
1269 else:
1268 # revlog was stripped at start of transaction, use all leftover data
1270 # revlog was stripped at start of transaction, use all leftover data
1269 trindex = len(self) - 1
1271 trindex = len(self) - 1
1270 dataoff = self.end(-2)
1272 dataoff = self.end(-2)
1271
1273
1272 tr.add(self.datafile, dataoff)
1274 tr.add(self.datafile, dataoff)
1273
1275
1274 if fp:
1276 if fp:
1275 fp.flush()
1277 fp.flush()
1276 fp.close()
1278 fp.close()
1277
1279
1278 df = self.opener(self.datafile, 'w')
1280 df = self.opener(self.datafile, 'w')
1279 try:
1281 try:
1280 for r in self:
1282 for r in self:
1281 df.write(self._chunkraw(r, r)[1])
1283 df.write(self._chunkraw(r, r)[1])
1282 finally:
1284 finally:
1283 df.close()
1285 df.close()
1284
1286
1285 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1287 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1286 checkambig=self._checkambig)
1288 checkambig=self._checkambig)
1287 self.version &= ~(REVLOGNGINLINEDATA)
1289 self.version &= ~(REVLOGNGINLINEDATA)
1288 self._inline = False
1290 self._inline = False
1289 for i in self:
1291 for i in self:
1290 e = self._io.packentry(self.index[i], self.node, self.version, i)
1292 e = self._io.packentry(self.index[i], self.node, self.version, i)
1291 fp.write(e)
1293 fp.write(e)
1292
1294
1293 # if we don't call close, the temp file will never replace the
1295 # if we don't call close, the temp file will never replace the
1294 # real index
1296 # real index
1295 fp.close()
1297 fp.close()
1296
1298
1297 tr.replace(self.indexfile, trindex * self._io.size)
1299 tr.replace(self.indexfile, trindex * self._io.size)
1298 self._chunkclear()
1300 self._chunkclear()
1299
1301
1300 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1302 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1301 node=None):
1303 node=None):
1302 """add a revision to the log
1304 """add a revision to the log
1303
1305
1304 text - the revision data to add
1306 text - the revision data to add
1305 transaction - the transaction object used for rollback
1307 transaction - the transaction object used for rollback
1306 link - the linkrev data to add
1308 link - the linkrev data to add
1307 p1, p2 - the parent nodeids of the revision
1309 p1, p2 - the parent nodeids of the revision
1308 cachedelta - an optional precomputed delta
1310 cachedelta - an optional precomputed delta
1309 node - nodeid of revision; typically node is not specified, and it is
1311 node - nodeid of revision; typically node is not specified, and it is
1310 computed by default as hash(text, p1, p2), however subclasses might
1312 computed by default as hash(text, p1, p2), however subclasses might
1311 use different hashing method (and override checkhash() in such case)
1313 use different hashing method (and override checkhash() in such case)
1312 """
1314 """
1313 if link == nullrev:
1315 if link == nullrev:
1314 raise RevlogError(_("attempted to add linkrev -1 to %s")
1316 raise RevlogError(_("attempted to add linkrev -1 to %s")
1315 % self.indexfile)
1317 % self.indexfile)
1316
1318
1317 if len(text) > _maxentrysize:
1319 if len(text) > _maxentrysize:
1318 raise RevlogError(
1320 raise RevlogError(
1319 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1321 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1320 % (self.indexfile, len(text)))
1322 % (self.indexfile, len(text)))
1321
1323
1322 node = node or self.hash(text, p1, p2)
1324 node = node or self.hash(text, p1, p2)
1323 if node in self.nodemap:
1325 if node in self.nodemap:
1324 return node
1326 return node
1325
1327
1326 dfh = None
1328 dfh = None
1327 if not self._inline:
1329 if not self._inline:
1328 dfh = self.opener(self.datafile, "a+")
1330 dfh = self.opener(self.datafile, "a+")
1329 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1331 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1330 try:
1332 try:
1331 return self._addrevision(node, text, transaction, link, p1, p2,
1333 return self._addrevision(node, text, transaction, link, p1, p2,
1332 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1334 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1333 finally:
1335 finally:
1334 if dfh:
1336 if dfh:
1335 dfh.close()
1337 dfh.close()
1336 ifh.close()
1338 ifh.close()
1337
1339
1338 def compress(self, text):
1340 def compress(self, text):
1339 """ generate a possibly-compressed representation of text """
1341 """ generate a possibly-compressed representation of text """
1340 if not text:
1342 if not text:
1341 return ("", text)
1343 return ("", text)
1342 l = len(text)
1344 l = len(text)
1343 bin = None
1345 bin = None
1344 if l < 44:
1346 if l < 44:
1345 pass
1347 pass
1346 elif l > 1000000:
1348 elif l > 1000000:
1347 # zlib makes an internal copy, thus doubling memory usage for
1349 # zlib makes an internal copy, thus doubling memory usage for
1348 # large files, so lets do this in pieces
1350 # large files, so lets do this in pieces
1349 z = zlib.compressobj()
1351 z = zlib.compressobj()
1350 p = []
1352 p = []
1351 pos = 0
1353 pos = 0
1352 while pos < l:
1354 while pos < l:
1353 pos2 = pos + 2**20
1355 pos2 = pos + 2**20
1354 p.append(z.compress(text[pos:pos2]))
1356 p.append(z.compress(text[pos:pos2]))
1355 pos = pos2
1357 pos = pos2
1356 p.append(z.flush())
1358 p.append(z.flush())
1357 if sum(map(len, p)) < l:
1359 if sum(map(len, p)) < l:
1358 bin = "".join(p)
1360 bin = "".join(p)
1359 else:
1361 else:
1360 bin = _compress(text)
1362 bin = _compress(text)
1361 if bin is None or len(bin) > l:
1363 if bin is None or len(bin) > l:
1362 if text[0] == '\0':
1364 if text[0] == '\0':
1363 return ("", text)
1365 return ("", text)
1364 return ('u', text)
1366 return ('u', text)
1365 return ("", bin)
1367 return ("", bin)
1366
1368
1367 def _isgooddelta(self, d, textlen):
1369 def _isgooddelta(self, d, textlen):
1368 """Returns True if the given delta is good. Good means that it is within
1370 """Returns True if the given delta is good. Good means that it is within
1369 the disk span, disk size, and chain length bounds that we know to be
1371 the disk span, disk size, and chain length bounds that we know to be
1370 performant."""
1372 performant."""
1371 if d is None:
1373 if d is None:
1372 return False
1374 return False
1373
1375
1374 # - 'dist' is the distance from the base revision -- bounding it limits
1376 # - 'dist' is the distance from the base revision -- bounding it limits
1375 # the amount of I/O we need to do.
1377 # the amount of I/O we need to do.
1376 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1378 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1377 # to apply -- bounding it limits the amount of CPU we consume.
1379 # to apply -- bounding it limits the amount of CPU we consume.
1378 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1380 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1379 if (dist > textlen * 4 or l > textlen or
1381 if (dist > textlen * 4 or l > textlen or
1380 compresseddeltalen > textlen * 2 or
1382 compresseddeltalen > textlen * 2 or
1381 (self._maxchainlen and chainlen > self._maxchainlen)):
1383 (self._maxchainlen and chainlen > self._maxchainlen)):
1382 return False
1384 return False
1383
1385
1384 return True
1386 return True
1385
1387
1386 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1388 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1387 cachedelta, ifh, dfh, alwayscache=False):
1389 cachedelta, ifh, dfh, alwayscache=False):
1388 """internal function to add revisions to the log
1390 """internal function to add revisions to the log
1389
1391
1390 see addrevision for argument descriptions.
1392 see addrevision for argument descriptions.
1391 invariants:
1393 invariants:
1392 - text is optional (can be None); if not set, cachedelta must be set.
1394 - text is optional (can be None); if not set, cachedelta must be set.
1393 if both are set, they must correspond to each other.
1395 if both are set, they must correspond to each other.
1394 """
1396 """
1395 btext = [text]
1397 btext = [text]
1396 def buildtext():
1398 def buildtext():
1397 if btext[0] is not None:
1399 if btext[0] is not None:
1398 return btext[0]
1400 return btext[0]
1399 baserev = cachedelta[0]
1401 baserev = cachedelta[0]
1400 delta = cachedelta[1]
1402 delta = cachedelta[1]
1401 # special case deltas which replace entire base; no need to decode
1403 # special case deltas which replace entire base; no need to decode
1402 # base revision. this neatly avoids censored bases, which throw when
1404 # base revision. this neatly avoids censored bases, which throw when
1403 # they're decoded.
1405 # they're decoded.
1404 hlen = struct.calcsize(">lll")
1406 hlen = struct.calcsize(">lll")
1405 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1407 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1406 len(delta) - hlen):
1408 len(delta) - hlen):
1407 btext[0] = delta[hlen:]
1409 btext[0] = delta[hlen:]
1408 else:
1410 else:
1409 if self._inline:
1411 if self._inline:
1410 fh = ifh
1412 fh = ifh
1411 else:
1413 else:
1412 fh = dfh
1414 fh = dfh
1413 basetext = self.revision(self.node(baserev), _df=fh)
1415 basetext = self.revision(self.node(baserev), _df=fh)
1414 btext[0] = mdiff.patch(basetext, delta)
1416 btext[0] = mdiff.patch(basetext, delta)
1415 try:
1417 try:
1416 self.checkhash(btext[0], p1, p2, node)
1418 self.checkhash(btext[0], p1, p2, node)
1417 if flags & REVIDX_ISCENSORED:
1419 if flags & REVIDX_ISCENSORED:
1418 raise RevlogError(_('node %s is not censored') % node)
1420 raise RevlogError(_('node %s is not censored') % node)
1419 except CensoredNodeError:
1421 except CensoredNodeError:
1420 # must pass the censored index flag to add censored revisions
1422 # must pass the censored index flag to add censored revisions
1421 if not flags & REVIDX_ISCENSORED:
1423 if not flags & REVIDX_ISCENSORED:
1422 raise
1424 raise
1423 return btext[0]
1425 return btext[0]
1424
1426
1425 def builddelta(rev):
1427 def builddelta(rev):
1426 # can we use the cached delta?
1428 # can we use the cached delta?
1427 if cachedelta and cachedelta[0] == rev:
1429 if cachedelta and cachedelta[0] == rev:
1428 delta = cachedelta[1]
1430 delta = cachedelta[1]
1429 else:
1431 else:
1430 t = buildtext()
1432 t = buildtext()
1431 if self.iscensored(rev):
1433 if self.iscensored(rev):
1432 # deltas based on a censored revision must replace the
1434 # deltas based on a censored revision must replace the
1433 # full content in one patch, so delta works everywhere
1435 # full content in one patch, so delta works everywhere
1434 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1436 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1435 delta = header + t
1437 delta = header + t
1436 else:
1438 else:
1437 if self._inline:
1439 if self._inline:
1438 fh = ifh
1440 fh = ifh
1439 else:
1441 else:
1440 fh = dfh
1442 fh = dfh
1441 ptext = self.revision(self.node(rev), _df=fh)
1443 ptext = self.revision(self.node(rev), _df=fh)
1442 delta = mdiff.textdiff(ptext, t)
1444 delta = mdiff.textdiff(ptext, t)
1443 header, data = self.compress(delta)
1445 header, data = self.compress(delta)
1444 deltalen = len(header) + len(data)
1446 deltalen = len(header) + len(data)
1445 chainbase = self.chainbase(rev)
1447 chainbase = self.chainbase(rev)
1446 dist = deltalen + offset - self.start(chainbase)
1448 dist = deltalen + offset - self.start(chainbase)
1447 if self._generaldelta:
1449 if self._generaldelta:
1448 base = rev
1450 base = rev
1449 else:
1451 else:
1450 base = chainbase
1452 base = chainbase
1451 chainlen, compresseddeltalen = self._chaininfo(rev)
1453 chainlen, compresseddeltalen = self._chaininfo(rev)
1452 chainlen += 1
1454 chainlen += 1
1453 compresseddeltalen += deltalen
1455 compresseddeltalen += deltalen
1454 return (dist, deltalen, (header, data), base,
1456 return (dist, deltalen, (header, data), base,
1455 chainbase, chainlen, compresseddeltalen)
1457 chainbase, chainlen, compresseddeltalen)
1456
1458
1457 curr = len(self)
1459 curr = len(self)
1458 prev = curr - 1
1460 prev = curr - 1
1459 offset = self.end(prev)
1461 offset = self.end(prev)
1460 delta = None
1462 delta = None
1461 p1r, p2r = self.rev(p1), self.rev(p2)
1463 p1r, p2r = self.rev(p1), self.rev(p2)
1462
1464
1463 # full versions are inserted when the needed deltas
1465 # full versions are inserted when the needed deltas
1464 # become comparable to the uncompressed text
1466 # become comparable to the uncompressed text
1465 if text is None:
1467 if text is None:
1466 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1468 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1467 cachedelta[1])
1469 cachedelta[1])
1468 else:
1470 else:
1469 textlen = len(text)
1471 textlen = len(text)
1470
1472
1471 # should we try to build a delta?
1473 # should we try to build a delta?
1472 if prev != nullrev:
1474 if prev != nullrev and self._storedeltachains:
1473 tested = set()
1475 tested = set()
1474 # This condition is true most of the time when processing
1476 # This condition is true most of the time when processing
1475 # changegroup data into a generaldelta repo. The only time it
1477 # changegroup data into a generaldelta repo. The only time it
1476 # isn't true is if this is the first revision in a delta chain
1478 # isn't true is if this is the first revision in a delta chain
1477 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1479 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1478 if cachedelta and self._generaldelta and self._lazydeltabase:
1480 if cachedelta and self._generaldelta and self._lazydeltabase:
1479 # Assume what we received from the server is a good choice
1481 # Assume what we received from the server is a good choice
1480 # build delta will reuse the cache
1482 # build delta will reuse the cache
1481 candidatedelta = builddelta(cachedelta[0])
1483 candidatedelta = builddelta(cachedelta[0])
1482 tested.add(cachedelta[0])
1484 tested.add(cachedelta[0])
1483 if self._isgooddelta(candidatedelta, textlen):
1485 if self._isgooddelta(candidatedelta, textlen):
1484 delta = candidatedelta
1486 delta = candidatedelta
1485 if delta is None and self._generaldelta:
1487 if delta is None and self._generaldelta:
1486 # exclude already lazy tested base if any
1488 # exclude already lazy tested base if any
1487 parents = [p for p in (p1r, p2r)
1489 parents = [p for p in (p1r, p2r)
1488 if p != nullrev and p not in tested]
1490 if p != nullrev and p not in tested]
1489 if parents and not self._aggressivemergedeltas:
1491 if parents and not self._aggressivemergedeltas:
1490 # Pick whichever parent is closer to us (to minimize the
1492 # Pick whichever parent is closer to us (to minimize the
1491 # chance of having to build a fulltext).
1493 # chance of having to build a fulltext).
1492 parents = [max(parents)]
1494 parents = [max(parents)]
1493 tested.update(parents)
1495 tested.update(parents)
1494 pdeltas = []
1496 pdeltas = []
1495 for p in parents:
1497 for p in parents:
1496 pd = builddelta(p)
1498 pd = builddelta(p)
1497 if self._isgooddelta(pd, textlen):
1499 if self._isgooddelta(pd, textlen):
1498 pdeltas.append(pd)
1500 pdeltas.append(pd)
1499 if pdeltas:
1501 if pdeltas:
1500 delta = min(pdeltas, key=lambda x: x[1])
1502 delta = min(pdeltas, key=lambda x: x[1])
1501 if delta is None and prev not in tested:
1503 if delta is None and prev not in tested:
1502 # other approach failed try against prev to hopefully save us a
1504 # other approach failed try against prev to hopefully save us a
1503 # fulltext.
1505 # fulltext.
1504 candidatedelta = builddelta(prev)
1506 candidatedelta = builddelta(prev)
1505 if self._isgooddelta(candidatedelta, textlen):
1507 if self._isgooddelta(candidatedelta, textlen):
1506 delta = candidatedelta
1508 delta = candidatedelta
1507 if delta is not None:
1509 if delta is not None:
1508 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1510 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1509 else:
1511 else:
1510 text = buildtext()
1512 text = buildtext()
1511 data = self.compress(text)
1513 data = self.compress(text)
1512 l = len(data[1]) + len(data[0])
1514 l = len(data[1]) + len(data[0])
1513 base = chainbase = curr
1515 base = chainbase = curr
1514
1516
1515 e = (offset_type(offset, flags), l, textlen,
1517 e = (offset_type(offset, flags), l, textlen,
1516 base, link, p1r, p2r, node)
1518 base, link, p1r, p2r, node)
1517 self.index.insert(-1, e)
1519 self.index.insert(-1, e)
1518 self.nodemap[node] = curr
1520 self.nodemap[node] = curr
1519
1521
1520 entry = self._io.packentry(e, self.node, self.version, curr)
1522 entry = self._io.packentry(e, self.node, self.version, curr)
1521 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1523 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1522
1524
1523 if alwayscache and text is None:
1525 if alwayscache and text is None:
1524 text = buildtext()
1526 text = buildtext()
1525
1527
1526 if type(text) == str: # only accept immutable objects
1528 if type(text) == str: # only accept immutable objects
1527 self._cache = (node, curr, text)
1529 self._cache = (node, curr, text)
1528 self._chainbasecache[curr] = chainbase
1530 self._chainbasecache[curr] = chainbase
1529 return node
1531 return node
1530
1532
1531 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1533 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1532 # Files opened in a+ mode have inconsistent behavior on various
1534 # Files opened in a+ mode have inconsistent behavior on various
1533 # platforms. Windows requires that a file positioning call be made
1535 # platforms. Windows requires that a file positioning call be made
1534 # when the file handle transitions between reads and writes. See
1536 # when the file handle transitions between reads and writes. See
1535 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1537 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1536 # platforms, Python or the platform itself can be buggy. Some versions
1538 # platforms, Python or the platform itself can be buggy. Some versions
1537 # of Solaris have been observed to not append at the end of the file
1539 # of Solaris have been observed to not append at the end of the file
1538 # if the file was seeked to before the end. See issue4943 for more.
1540 # if the file was seeked to before the end. See issue4943 for more.
1539 #
1541 #
1540 # We work around this issue by inserting a seek() before writing.
1542 # We work around this issue by inserting a seek() before writing.
1541 # Note: This is likely not necessary on Python 3.
1543 # Note: This is likely not necessary on Python 3.
1542 ifh.seek(0, os.SEEK_END)
1544 ifh.seek(0, os.SEEK_END)
1543 if dfh:
1545 if dfh:
1544 dfh.seek(0, os.SEEK_END)
1546 dfh.seek(0, os.SEEK_END)
1545
1547
1546 curr = len(self) - 1
1548 curr = len(self) - 1
1547 if not self._inline:
1549 if not self._inline:
1548 transaction.add(self.datafile, offset)
1550 transaction.add(self.datafile, offset)
1549 transaction.add(self.indexfile, curr * len(entry))
1551 transaction.add(self.indexfile, curr * len(entry))
1550 if data[0]:
1552 if data[0]:
1551 dfh.write(data[0])
1553 dfh.write(data[0])
1552 dfh.write(data[1])
1554 dfh.write(data[1])
1553 ifh.write(entry)
1555 ifh.write(entry)
1554 else:
1556 else:
1555 offset += curr * self._io.size
1557 offset += curr * self._io.size
1556 transaction.add(self.indexfile, offset, curr)
1558 transaction.add(self.indexfile, offset, curr)
1557 ifh.write(entry)
1559 ifh.write(entry)
1558 ifh.write(data[0])
1560 ifh.write(data[0])
1559 ifh.write(data[1])
1561 ifh.write(data[1])
1560 self.checkinlinesize(transaction, ifh)
1562 self.checkinlinesize(transaction, ifh)
1561
1563
1562 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1564 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1563 """
1565 """
1564 add a delta group
1566 add a delta group
1565
1567
1566 given a set of deltas, add them to the revision log. the
1568 given a set of deltas, add them to the revision log. the
1567 first delta is against its parent, which should be in our
1569 first delta is against its parent, which should be in our
1568 log, the rest are against the previous delta.
1570 log, the rest are against the previous delta.
1569
1571
1570 If ``addrevisioncb`` is defined, it will be called with arguments of
1572 If ``addrevisioncb`` is defined, it will be called with arguments of
1571 this revlog and the node that was added.
1573 this revlog and the node that was added.
1572 """
1574 """
1573
1575
1574 # track the base of the current delta log
1576 # track the base of the current delta log
1575 content = []
1577 content = []
1576 node = None
1578 node = None
1577
1579
1578 r = len(self)
1580 r = len(self)
1579 end = 0
1581 end = 0
1580 if r:
1582 if r:
1581 end = self.end(r - 1)
1583 end = self.end(r - 1)
1582 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1584 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1583 isize = r * self._io.size
1585 isize = r * self._io.size
1584 if self._inline:
1586 if self._inline:
1585 transaction.add(self.indexfile, end + isize, r)
1587 transaction.add(self.indexfile, end + isize, r)
1586 dfh = None
1588 dfh = None
1587 else:
1589 else:
1588 transaction.add(self.indexfile, isize, r)
1590 transaction.add(self.indexfile, isize, r)
1589 transaction.add(self.datafile, end)
1591 transaction.add(self.datafile, end)
1590 dfh = self.opener(self.datafile, "a+")
1592 dfh = self.opener(self.datafile, "a+")
1591 def flush():
1593 def flush():
1592 if dfh:
1594 if dfh:
1593 dfh.flush()
1595 dfh.flush()
1594 ifh.flush()
1596 ifh.flush()
1595 try:
1597 try:
1596 # loop through our set of deltas
1598 # loop through our set of deltas
1597 chain = None
1599 chain = None
1598 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1600 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1599 node = chunkdata['node']
1601 node = chunkdata['node']
1600 p1 = chunkdata['p1']
1602 p1 = chunkdata['p1']
1601 p2 = chunkdata['p2']
1603 p2 = chunkdata['p2']
1602 cs = chunkdata['cs']
1604 cs = chunkdata['cs']
1603 deltabase = chunkdata['deltabase']
1605 deltabase = chunkdata['deltabase']
1604 delta = chunkdata['delta']
1606 delta = chunkdata['delta']
1605 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1607 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1606
1608
1607 content.append(node)
1609 content.append(node)
1608
1610
1609 link = linkmapper(cs)
1611 link = linkmapper(cs)
1610 if node in self.nodemap:
1612 if node in self.nodemap:
1611 # this can happen if two branches make the same change
1613 # this can happen if two branches make the same change
1612 chain = node
1614 chain = node
1613 continue
1615 continue
1614
1616
1615 for p in (p1, p2):
1617 for p in (p1, p2):
1616 if p not in self.nodemap:
1618 if p not in self.nodemap:
1617 raise LookupError(p, self.indexfile,
1619 raise LookupError(p, self.indexfile,
1618 _('unknown parent'))
1620 _('unknown parent'))
1619
1621
1620 if deltabase not in self.nodemap:
1622 if deltabase not in self.nodemap:
1621 raise LookupError(deltabase, self.indexfile,
1623 raise LookupError(deltabase, self.indexfile,
1622 _('unknown delta base'))
1624 _('unknown delta base'))
1623
1625
1624 baserev = self.rev(deltabase)
1626 baserev = self.rev(deltabase)
1625
1627
1626 if baserev != nullrev and self.iscensored(baserev):
1628 if baserev != nullrev and self.iscensored(baserev):
1627 # if base is censored, delta must be full replacement in a
1629 # if base is censored, delta must be full replacement in a
1628 # single patch operation
1630 # single patch operation
1629 hlen = struct.calcsize(">lll")
1631 hlen = struct.calcsize(">lll")
1630 oldlen = self.rawsize(baserev)
1632 oldlen = self.rawsize(baserev)
1631 newlen = len(delta) - hlen
1633 newlen = len(delta) - hlen
1632 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1634 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1633 raise error.CensoredBaseError(self.indexfile,
1635 raise error.CensoredBaseError(self.indexfile,
1634 self.node(baserev))
1636 self.node(baserev))
1635
1637
1636 if not flags and self._peek_iscensored(baserev, delta, flush):
1638 if not flags and self._peek_iscensored(baserev, delta, flush):
1637 flags |= REVIDX_ISCENSORED
1639 flags |= REVIDX_ISCENSORED
1638
1640
1639 # We assume consumers of addrevisioncb will want to retrieve
1641 # We assume consumers of addrevisioncb will want to retrieve
1640 # the added revision, which will require a call to
1642 # the added revision, which will require a call to
1641 # revision(). revision() will fast path if there is a cache
1643 # revision(). revision() will fast path if there is a cache
1642 # hit. So, we tell _addrevision() to always cache in this case.
1644 # hit. So, we tell _addrevision() to always cache in this case.
1643 chain = self._addrevision(node, None, transaction, link,
1645 chain = self._addrevision(node, None, transaction, link,
1644 p1, p2, flags, (baserev, delta),
1646 p1, p2, flags, (baserev, delta),
1645 ifh, dfh,
1647 ifh, dfh,
1646 alwayscache=bool(addrevisioncb))
1648 alwayscache=bool(addrevisioncb))
1647
1649
1648 if addrevisioncb:
1650 if addrevisioncb:
1649 addrevisioncb(self, chain)
1651 addrevisioncb(self, chain)
1650
1652
1651 if not dfh and not self._inline:
1653 if not dfh and not self._inline:
1652 # addrevision switched from inline to conventional
1654 # addrevision switched from inline to conventional
1653 # reopen the index
1655 # reopen the index
1654 ifh.close()
1656 ifh.close()
1655 dfh = self.opener(self.datafile, "a+")
1657 dfh = self.opener(self.datafile, "a+")
1656 ifh = self.opener(self.indexfile, "a+",
1658 ifh = self.opener(self.indexfile, "a+",
1657 checkambig=self._checkambig)
1659 checkambig=self._checkambig)
1658 finally:
1660 finally:
1659 if dfh:
1661 if dfh:
1660 dfh.close()
1662 dfh.close()
1661 ifh.close()
1663 ifh.close()
1662
1664
1663 return content
1665 return content
1664
1666
1665 def iscensored(self, rev):
1667 def iscensored(self, rev):
1666 """Check if a file revision is censored."""
1668 """Check if a file revision is censored."""
1667 return False
1669 return False
1668
1670
1669 def _peek_iscensored(self, baserev, delta, flush):
1671 def _peek_iscensored(self, baserev, delta, flush):
1670 """Quickly check if a delta produces a censored revision."""
1672 """Quickly check if a delta produces a censored revision."""
1671 return False
1673 return False
1672
1674
1673 def getstrippoint(self, minlink):
1675 def getstrippoint(self, minlink):
1674 """find the minimum rev that must be stripped to strip the linkrev
1676 """find the minimum rev that must be stripped to strip the linkrev
1675
1677
1676 Returns a tuple containing the minimum rev and a set of all revs that
1678 Returns a tuple containing the minimum rev and a set of all revs that
1677 have linkrevs that will be broken by this strip.
1679 have linkrevs that will be broken by this strip.
1678 """
1680 """
1679 brokenrevs = set()
1681 brokenrevs = set()
1680 strippoint = len(self)
1682 strippoint = len(self)
1681
1683
1682 heads = {}
1684 heads = {}
1683 futurelargelinkrevs = set()
1685 futurelargelinkrevs = set()
1684 for head in self.headrevs():
1686 for head in self.headrevs():
1685 headlinkrev = self.linkrev(head)
1687 headlinkrev = self.linkrev(head)
1686 heads[head] = headlinkrev
1688 heads[head] = headlinkrev
1687 if headlinkrev >= minlink:
1689 if headlinkrev >= minlink:
1688 futurelargelinkrevs.add(headlinkrev)
1690 futurelargelinkrevs.add(headlinkrev)
1689
1691
1690 # This algorithm involves walking down the rev graph, starting at the
1692 # This algorithm involves walking down the rev graph, starting at the
1691 # heads. Since the revs are topologically sorted according to linkrev,
1693 # heads. Since the revs are topologically sorted according to linkrev,
1692 # once all head linkrevs are below the minlink, we know there are
1694 # once all head linkrevs are below the minlink, we know there are
1693 # no more revs that could have a linkrev greater than minlink.
1695 # no more revs that could have a linkrev greater than minlink.
1694 # So we can stop walking.
1696 # So we can stop walking.
1695 while futurelargelinkrevs:
1697 while futurelargelinkrevs:
1696 strippoint -= 1
1698 strippoint -= 1
1697 linkrev = heads.pop(strippoint)
1699 linkrev = heads.pop(strippoint)
1698
1700
1699 if linkrev < minlink:
1701 if linkrev < minlink:
1700 brokenrevs.add(strippoint)
1702 brokenrevs.add(strippoint)
1701 else:
1703 else:
1702 futurelargelinkrevs.remove(linkrev)
1704 futurelargelinkrevs.remove(linkrev)
1703
1705
1704 for p in self.parentrevs(strippoint):
1706 for p in self.parentrevs(strippoint):
1705 if p != nullrev:
1707 if p != nullrev:
1706 plinkrev = self.linkrev(p)
1708 plinkrev = self.linkrev(p)
1707 heads[p] = plinkrev
1709 heads[p] = plinkrev
1708 if plinkrev >= minlink:
1710 if plinkrev >= minlink:
1709 futurelargelinkrevs.add(plinkrev)
1711 futurelargelinkrevs.add(plinkrev)
1710
1712
1711 return strippoint, brokenrevs
1713 return strippoint, brokenrevs
1712
1714
1713 def strip(self, minlink, transaction):
1715 def strip(self, minlink, transaction):
1714 """truncate the revlog on the first revision with a linkrev >= minlink
1716 """truncate the revlog on the first revision with a linkrev >= minlink
1715
1717
1716 This function is called when we're stripping revision minlink and
1718 This function is called when we're stripping revision minlink and
1717 its descendants from the repository.
1719 its descendants from the repository.
1718
1720
1719 We have to remove all revisions with linkrev >= minlink, because
1721 We have to remove all revisions with linkrev >= minlink, because
1720 the equivalent changelog revisions will be renumbered after the
1722 the equivalent changelog revisions will be renumbered after the
1721 strip.
1723 strip.
1722
1724
1723 So we truncate the revlog on the first of these revisions, and
1725 So we truncate the revlog on the first of these revisions, and
1724 trust that the caller has saved the revisions that shouldn't be
1726 trust that the caller has saved the revisions that shouldn't be
1725 removed and that it'll re-add them after this truncation.
1727 removed and that it'll re-add them after this truncation.
1726 """
1728 """
1727 if len(self) == 0:
1729 if len(self) == 0:
1728 return
1730 return
1729
1731
1730 rev, _ = self.getstrippoint(minlink)
1732 rev, _ = self.getstrippoint(minlink)
1731 if rev == len(self):
1733 if rev == len(self):
1732 return
1734 return
1733
1735
1734 # first truncate the files on disk
1736 # first truncate the files on disk
1735 end = self.start(rev)
1737 end = self.start(rev)
1736 if not self._inline:
1738 if not self._inline:
1737 transaction.add(self.datafile, end)
1739 transaction.add(self.datafile, end)
1738 end = rev * self._io.size
1740 end = rev * self._io.size
1739 else:
1741 else:
1740 end += rev * self._io.size
1742 end += rev * self._io.size
1741
1743
1742 transaction.add(self.indexfile, end)
1744 transaction.add(self.indexfile, end)
1743
1745
1744 # then reset internal state in memory to forget those revisions
1746 # then reset internal state in memory to forget those revisions
1745 self._cache = None
1747 self._cache = None
1746 self._chaininfocache = {}
1748 self._chaininfocache = {}
1747 self._chunkclear()
1749 self._chunkclear()
1748 for x in xrange(rev, len(self)):
1750 for x in xrange(rev, len(self)):
1749 del self.nodemap[self.node(x)]
1751 del self.nodemap[self.node(x)]
1750
1752
1751 del self.index[rev:-1]
1753 del self.index[rev:-1]
1752
1754
1753 def checksize(self):
1755 def checksize(self):
1754 expected = 0
1756 expected = 0
1755 if len(self):
1757 if len(self):
1756 expected = max(0, self.end(len(self) - 1))
1758 expected = max(0, self.end(len(self) - 1))
1757
1759
1758 try:
1760 try:
1759 f = self.opener(self.datafile)
1761 f = self.opener(self.datafile)
1760 f.seek(0, 2)
1762 f.seek(0, 2)
1761 actual = f.tell()
1763 actual = f.tell()
1762 f.close()
1764 f.close()
1763 dd = actual - expected
1765 dd = actual - expected
1764 except IOError as inst:
1766 except IOError as inst:
1765 if inst.errno != errno.ENOENT:
1767 if inst.errno != errno.ENOENT:
1766 raise
1768 raise
1767 dd = 0
1769 dd = 0
1768
1770
1769 try:
1771 try:
1770 f = self.opener(self.indexfile)
1772 f = self.opener(self.indexfile)
1771 f.seek(0, 2)
1773 f.seek(0, 2)
1772 actual = f.tell()
1774 actual = f.tell()
1773 f.close()
1775 f.close()
1774 s = self._io.size
1776 s = self._io.size
1775 i = max(0, actual // s)
1777 i = max(0, actual // s)
1776 di = actual - (i * s)
1778 di = actual - (i * s)
1777 if self._inline:
1779 if self._inline:
1778 databytes = 0
1780 databytes = 0
1779 for r in self:
1781 for r in self:
1780 databytes += max(0, self.length(r))
1782 databytes += max(0, self.length(r))
1781 dd = 0
1783 dd = 0
1782 di = actual - len(self) * s - databytes
1784 di = actual - len(self) * s - databytes
1783 except IOError as inst:
1785 except IOError as inst:
1784 if inst.errno != errno.ENOENT:
1786 if inst.errno != errno.ENOENT:
1785 raise
1787 raise
1786 di = 0
1788 di = 0
1787
1789
1788 return (dd, di)
1790 return (dd, di)
1789
1791
1790 def files(self):
1792 def files(self):
1791 res = [self.indexfile]
1793 res = [self.indexfile]
1792 if not self._inline:
1794 if not self._inline:
1793 res.append(self.datafile)
1795 res.append(self.datafile)
1794 return res
1796 return res
General Comments 0
You need to be logged in to leave comments. Login now