##// END OF EJS Templates
revlog: avoid shadowing several variables using list comprehensions
Augie Fackler -
r30391:2ded17b6 default
parent child Browse files
Show More
@@ -1,1820 +1,1820 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215
215
216 If checkambig, indexfile is opened with checkambig=True at
216 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
217 writing, to avoid file stat ambiguity.
218 """
218 """
219 def __init__(self, opener, indexfile, checkambig=False):
219 def __init__(self, opener, indexfile, checkambig=False):
220 """
220 """
221 create a revlog object
221 create a revlog object
222
222
223 opener is a function that abstracts the file opening operation
223 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
224 and can be used to implement COW semantics or the like.
225 """
225 """
226 self.indexfile = indexfile
226 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
227 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
228 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
229 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
230 # avoid file stat ambiguity.
231 self._checkambig = checkambig
231 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
232 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
233 self._cache = None
234 # Maps rev to chain base rev.
234 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
235 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
237 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
238 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
239 self._chunkcachesize = 65536
240 self._maxchainlen = None
240 self._maxchainlen = None
241 self._aggressivemergedeltas = False
241 self._aggressivemergedeltas = False
242 self.index = []
242 self.index = []
243 # Mapping of partial identifiers to full nodes.
243 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
244 self._pcache = {}
245 # Mapping of revision integer to full node.
245 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
246 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
247 self._nodepos = None
248
248
249 v = REVLOG_DEFAULT_VERSION
249 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
250 opts = getattr(opener, 'options', None)
251 if opts is not None:
251 if opts is not None:
252 if 'revlogv1' in opts:
252 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
253 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
254 v |= REVLOGGENERALDELTA
255 else:
255 else:
256 v = 0
256 v = 0
257 if 'chunkcachesize' in opts:
257 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
258 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
259 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
260 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
261 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
264
265 if self._chunkcachesize <= 0:
265 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
267 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
270 'of 2') % self._chunkcachesize)
271
271
272 indexdata = ''
272 indexdata = ''
273 self._initempty = True
273 self._initempty = True
274 try:
274 try:
275 f = self.opener(self.indexfile)
275 f = self.opener(self.indexfile)
276 indexdata = f.read()
276 indexdata = f.read()
277 f.close()
277 f.close()
278 if len(indexdata) > 0:
278 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
280 self._initempty = False
281 except IOError as inst:
281 except IOError as inst:
282 if inst.errno != errno.ENOENT:
282 if inst.errno != errno.ENOENT:
283 raise
283 raise
284
284
285 self.version = v
285 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
286 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
287 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
288 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
289 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
290 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
292 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
295 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
296 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
297 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
298 % (self.indexfile, fmt))
299
299
300 self.storedeltachains = True
300 self.storedeltachains = True
301
301
302 self._io = revlogio()
302 self._io = revlogio()
303 if self.version == REVLOGV0:
303 if self.version == REVLOGV0:
304 self._io = revlogoldio()
304 self._io = revlogoldio()
305 try:
305 try:
306 d = self._io.parseindex(indexdata, self._inline)
306 d = self._io.parseindex(indexdata, self._inline)
307 except (ValueError, IndexError):
307 except (ValueError, IndexError):
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 self.index, nodemap, self._chunkcache = d
309 self.index, nodemap, self._chunkcache = d
310 if nodemap is not None:
310 if nodemap is not None:
311 self.nodemap = self._nodecache = nodemap
311 self.nodemap = self._nodecache = nodemap
312 if not self._chunkcache:
312 if not self._chunkcache:
313 self._chunkclear()
313 self._chunkclear()
314 # revnum -> (chain-length, sum-delta-length)
314 # revnum -> (chain-length, sum-delta-length)
315 self._chaininfocache = {}
315 self._chaininfocache = {}
316
316
317 def tip(self):
317 def tip(self):
318 return self.node(len(self.index) - 2)
318 return self.node(len(self.index) - 2)
319 def __contains__(self, rev):
319 def __contains__(self, rev):
320 return 0 <= rev < len(self)
320 return 0 <= rev < len(self)
321 def __len__(self):
321 def __len__(self):
322 return len(self.index) - 1
322 return len(self.index) - 1
323 def __iter__(self):
323 def __iter__(self):
324 return iter(xrange(len(self)))
324 return iter(xrange(len(self)))
325 def revs(self, start=0, stop=None):
325 def revs(self, start=0, stop=None):
326 """iterate over all rev in this revlog (from start to stop)"""
326 """iterate over all rev in this revlog (from start to stop)"""
327 step = 1
327 step = 1
328 if stop is not None:
328 if stop is not None:
329 if start > stop:
329 if start > stop:
330 step = -1
330 step = -1
331 stop += step
331 stop += step
332 else:
332 else:
333 stop = len(self)
333 stop = len(self)
334 return xrange(start, stop, step)
334 return xrange(start, stop, step)
335
335
336 @util.propertycache
336 @util.propertycache
337 def nodemap(self):
337 def nodemap(self):
338 self.rev(self.node(0))
338 self.rev(self.node(0))
339 return self._nodecache
339 return self._nodecache
340
340
341 def hasnode(self, node):
341 def hasnode(self, node):
342 try:
342 try:
343 self.rev(node)
343 self.rev(node)
344 return True
344 return True
345 except KeyError:
345 except KeyError:
346 return False
346 return False
347
347
348 def clearcaches(self):
348 def clearcaches(self):
349 self._cache = None
349 self._cache = None
350 self._chainbasecache.clear()
350 self._chainbasecache.clear()
351 self._chunkcache = (0, '')
351 self._chunkcache = (0, '')
352 self._pcache = {}
352 self._pcache = {}
353
353
354 try:
354 try:
355 self._nodecache.clearcaches()
355 self._nodecache.clearcaches()
356 except AttributeError:
356 except AttributeError:
357 self._nodecache = {nullid: nullrev}
357 self._nodecache = {nullid: nullrev}
358 self._nodepos = None
358 self._nodepos = None
359
359
360 def rev(self, node):
360 def rev(self, node):
361 try:
361 try:
362 return self._nodecache[node]
362 return self._nodecache[node]
363 except TypeError:
363 except TypeError:
364 raise
364 raise
365 except RevlogError:
365 except RevlogError:
366 # parsers.c radix tree lookup failed
366 # parsers.c radix tree lookup failed
367 raise LookupError(node, self.indexfile, _('no node'))
367 raise LookupError(node, self.indexfile, _('no node'))
368 except KeyError:
368 except KeyError:
369 # pure python cache lookup failed
369 # pure python cache lookup failed
370 n = self._nodecache
370 n = self._nodecache
371 i = self.index
371 i = self.index
372 p = self._nodepos
372 p = self._nodepos
373 if p is None:
373 if p is None:
374 p = len(i) - 2
374 p = len(i) - 2
375 for r in xrange(p, -1, -1):
375 for r in xrange(p, -1, -1):
376 v = i[r][7]
376 v = i[r][7]
377 n[v] = r
377 n[v] = r
378 if v == node:
378 if v == node:
379 self._nodepos = r - 1
379 self._nodepos = r - 1
380 return r
380 return r
381 raise LookupError(node, self.indexfile, _('no node'))
381 raise LookupError(node, self.indexfile, _('no node'))
382
382
383 # Accessors for index entries.
383 # Accessors for index entries.
384
384
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 # are flags.
386 # are flags.
387 def start(self, rev):
387 def start(self, rev):
388 return int(self.index[rev][0] >> 16)
388 return int(self.index[rev][0] >> 16)
389
389
390 def flags(self, rev):
390 def flags(self, rev):
391 return self.index[rev][0] & 0xFFFF
391 return self.index[rev][0] & 0xFFFF
392
392
393 def length(self, rev):
393 def length(self, rev):
394 return self.index[rev][1]
394 return self.index[rev][1]
395
395
396 def rawsize(self, rev):
396 def rawsize(self, rev):
397 """return the length of the uncompressed text for a given revision"""
397 """return the length of the uncompressed text for a given revision"""
398 l = self.index[rev][2]
398 l = self.index[rev][2]
399 if l >= 0:
399 if l >= 0:
400 return l
400 return l
401
401
402 t = self.revision(self.node(rev))
402 t = self.revision(self.node(rev))
403 return len(t)
403 return len(t)
404 size = rawsize
404 size = rawsize
405
405
406 def chainbase(self, rev):
406 def chainbase(self, rev):
407 base = self._chainbasecache.get(rev)
407 base = self._chainbasecache.get(rev)
408 if base is not None:
408 if base is not None:
409 return base
409 return base
410
410
411 index = self.index
411 index = self.index
412 base = index[rev][3]
412 base = index[rev][3]
413 while base != rev:
413 while base != rev:
414 rev = base
414 rev = base
415 base = index[rev][3]
415 base = index[rev][3]
416
416
417 self._chainbasecache[rev] = base
417 self._chainbasecache[rev] = base
418 return base
418 return base
419
419
420 def linkrev(self, rev):
420 def linkrev(self, rev):
421 return self.index[rev][4]
421 return self.index[rev][4]
422
422
423 def parentrevs(self, rev):
423 def parentrevs(self, rev):
424 return self.index[rev][5:7]
424 return self.index[rev][5:7]
425
425
426 def node(self, rev):
426 def node(self, rev):
427 return self.index[rev][7]
427 return self.index[rev][7]
428
428
429 # Derived from index values.
429 # Derived from index values.
430
430
431 def end(self, rev):
431 def end(self, rev):
432 return self.start(rev) + self.length(rev)
432 return self.start(rev) + self.length(rev)
433
433
434 def parents(self, node):
434 def parents(self, node):
435 i = self.index
435 i = self.index
436 d = i[self.rev(node)]
436 d = i[self.rev(node)]
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438
438
439 def chainlen(self, rev):
439 def chainlen(self, rev):
440 return self._chaininfo(rev)[0]
440 return self._chaininfo(rev)[0]
441
441
442 def _chaininfo(self, rev):
442 def _chaininfo(self, rev):
443 chaininfocache = self._chaininfocache
443 chaininfocache = self._chaininfocache
444 if rev in chaininfocache:
444 if rev in chaininfocache:
445 return chaininfocache[rev]
445 return chaininfocache[rev]
446 index = self.index
446 index = self.index
447 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
448 iterrev = rev
448 iterrev = rev
449 e = index[iterrev]
449 e = index[iterrev]
450 clen = 0
450 clen = 0
451 compresseddeltalen = 0
451 compresseddeltalen = 0
452 while iterrev != e[3]:
452 while iterrev != e[3]:
453 clen += 1
453 clen += 1
454 compresseddeltalen += e[1]
454 compresseddeltalen += e[1]
455 if generaldelta:
455 if generaldelta:
456 iterrev = e[3]
456 iterrev = e[3]
457 else:
457 else:
458 iterrev -= 1
458 iterrev -= 1
459 if iterrev in chaininfocache:
459 if iterrev in chaininfocache:
460 t = chaininfocache[iterrev]
460 t = chaininfocache[iterrev]
461 clen += t[0]
461 clen += t[0]
462 compresseddeltalen += t[1]
462 compresseddeltalen += t[1]
463 break
463 break
464 e = index[iterrev]
464 e = index[iterrev]
465 else:
465 else:
466 # Add text length of base since decompressing that also takes
466 # Add text length of base since decompressing that also takes
467 # work. For cache hits the length is already included.
467 # work. For cache hits the length is already included.
468 compresseddeltalen += e[1]
468 compresseddeltalen += e[1]
469 r = (clen, compresseddeltalen)
469 r = (clen, compresseddeltalen)
470 chaininfocache[rev] = r
470 chaininfocache[rev] = r
471 return r
471 return r
472
472
473 def _deltachain(self, rev, stoprev=None):
473 def _deltachain(self, rev, stoprev=None):
474 """Obtain the delta chain for a revision.
474 """Obtain the delta chain for a revision.
475
475
476 ``stoprev`` specifies a revision to stop at. If not specified, we
476 ``stoprev`` specifies a revision to stop at. If not specified, we
477 stop at the base of the chain.
477 stop at the base of the chain.
478
478
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 revs in ascending order and ``stopped`` is a bool indicating whether
480 revs in ascending order and ``stopped`` is a bool indicating whether
481 ``stoprev`` was hit.
481 ``stoprev`` was hit.
482 """
482 """
483 chain = []
483 chain = []
484
484
485 # Alias to prevent attribute lookup in tight loop.
485 # Alias to prevent attribute lookup in tight loop.
486 index = self.index
486 index = self.index
487 generaldelta = self._generaldelta
487 generaldelta = self._generaldelta
488
488
489 iterrev = rev
489 iterrev = rev
490 e = index[iterrev]
490 e = index[iterrev]
491 while iterrev != e[3] and iterrev != stoprev:
491 while iterrev != e[3] and iterrev != stoprev:
492 chain.append(iterrev)
492 chain.append(iterrev)
493 if generaldelta:
493 if generaldelta:
494 iterrev = e[3]
494 iterrev = e[3]
495 else:
495 else:
496 iterrev -= 1
496 iterrev -= 1
497 e = index[iterrev]
497 e = index[iterrev]
498
498
499 if iterrev == stoprev:
499 if iterrev == stoprev:
500 stopped = True
500 stopped = True
501 else:
501 else:
502 chain.append(iterrev)
502 chain.append(iterrev)
503 stopped = False
503 stopped = False
504
504
505 chain.reverse()
505 chain.reverse()
506 return chain, stopped
506 return chain, stopped
507
507
508 def ancestors(self, revs, stoprev=0, inclusive=False):
508 def ancestors(self, revs, stoprev=0, inclusive=False):
509 """Generate the ancestors of 'revs' in reverse topological order.
509 """Generate the ancestors of 'revs' in reverse topological order.
510 Does not generate revs lower than stoprev.
510 Does not generate revs lower than stoprev.
511
511
512 See the documentation for ancestor.lazyancestors for more details."""
512 See the documentation for ancestor.lazyancestors for more details."""
513
513
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 inclusive=inclusive)
515 inclusive=inclusive)
516
516
517 def descendants(self, revs):
517 def descendants(self, revs):
518 """Generate the descendants of 'revs' in revision order.
518 """Generate the descendants of 'revs' in revision order.
519
519
520 Yield a sequence of revision numbers starting with a child of
520 Yield a sequence of revision numbers starting with a child of
521 some rev in revs, i.e., each revision is *not* considered a
521 some rev in revs, i.e., each revision is *not* considered a
522 descendant of itself. Results are ordered by revision number (a
522 descendant of itself. Results are ordered by revision number (a
523 topological sort)."""
523 topological sort)."""
524 first = min(revs)
524 first = min(revs)
525 if first == nullrev:
525 if first == nullrev:
526 for i in self:
526 for i in self:
527 yield i
527 yield i
528 return
528 return
529
529
530 seen = set(revs)
530 seen = set(revs)
531 for i in self.revs(start=first + 1):
531 for i in self.revs(start=first + 1):
532 for x in self.parentrevs(i):
532 for x in self.parentrevs(i):
533 if x != nullrev and x in seen:
533 if x != nullrev and x in seen:
534 seen.add(i)
534 seen.add(i)
535 yield i
535 yield i
536 break
536 break
537
537
538 def findcommonmissing(self, common=None, heads=None):
538 def findcommonmissing(self, common=None, heads=None):
539 """Return a tuple of the ancestors of common and the ancestors of heads
539 """Return a tuple of the ancestors of common and the ancestors of heads
540 that are not ancestors of common. In revset terminology, we return the
540 that are not ancestors of common. In revset terminology, we return the
541 tuple:
541 tuple:
542
542
543 ::common, (::heads) - (::common)
543 ::common, (::heads) - (::common)
544
544
545 The list is sorted by revision number, meaning it is
545 The list is sorted by revision number, meaning it is
546 topologically sorted.
546 topologically sorted.
547
547
548 'heads' and 'common' are both lists of node IDs. If heads is
548 'heads' and 'common' are both lists of node IDs. If heads is
549 not supplied, uses all of the revlog's heads. If common is not
549 not supplied, uses all of the revlog's heads. If common is not
550 supplied, uses nullid."""
550 supplied, uses nullid."""
551 if common is None:
551 if common is None:
552 common = [nullid]
552 common = [nullid]
553 if heads is None:
553 if heads is None:
554 heads = self.heads()
554 heads = self.heads()
555
555
556 common = [self.rev(n) for n in common]
556 common = [self.rev(n) for n in common]
557 heads = [self.rev(n) for n in heads]
557 heads = [self.rev(n) for n in heads]
558
558
559 # we want the ancestors, but inclusive
559 # we want the ancestors, but inclusive
560 class lazyset(object):
560 class lazyset(object):
561 def __init__(self, lazyvalues):
561 def __init__(self, lazyvalues):
562 self.addedvalues = set()
562 self.addedvalues = set()
563 self.lazyvalues = lazyvalues
563 self.lazyvalues = lazyvalues
564
564
565 def __contains__(self, value):
565 def __contains__(self, value):
566 return value in self.addedvalues or value in self.lazyvalues
566 return value in self.addedvalues or value in self.lazyvalues
567
567
568 def __iter__(self):
568 def __iter__(self):
569 added = self.addedvalues
569 added = self.addedvalues
570 for r in added:
570 for r in added:
571 yield r
571 yield r
572 for r in self.lazyvalues:
572 for r in self.lazyvalues:
573 if not r in added:
573 if not r in added:
574 yield r
574 yield r
575
575
576 def add(self, value):
576 def add(self, value):
577 self.addedvalues.add(value)
577 self.addedvalues.add(value)
578
578
579 def update(self, values):
579 def update(self, values):
580 self.addedvalues.update(values)
580 self.addedvalues.update(values)
581
581
582 has = lazyset(self.ancestors(common))
582 has = lazyset(self.ancestors(common))
583 has.add(nullrev)
583 has.add(nullrev)
584 has.update(common)
584 has.update(common)
585
585
586 # take all ancestors from heads that aren't in has
586 # take all ancestors from heads that aren't in has
587 missing = set()
587 missing = set()
588 visit = collections.deque(r for r in heads if r not in has)
588 visit = collections.deque(r for r in heads if r not in has)
589 while visit:
589 while visit:
590 r = visit.popleft()
590 r = visit.popleft()
591 if r in missing:
591 if r in missing:
592 continue
592 continue
593 else:
593 else:
594 missing.add(r)
594 missing.add(r)
595 for p in self.parentrevs(r):
595 for p in self.parentrevs(r):
596 if p not in has:
596 if p not in has:
597 visit.append(p)
597 visit.append(p)
598 missing = list(missing)
598 missing = list(missing)
599 missing.sort()
599 missing.sort()
600 return has, [self.node(r) for r in missing]
600 return has, [self.node(miss) for miss in missing]
601
601
602 def incrementalmissingrevs(self, common=None):
602 def incrementalmissingrevs(self, common=None):
603 """Return an object that can be used to incrementally compute the
603 """Return an object that can be used to incrementally compute the
604 revision numbers of the ancestors of arbitrary sets that are not
604 revision numbers of the ancestors of arbitrary sets that are not
605 ancestors of common. This is an ancestor.incrementalmissingancestors
605 ancestors of common. This is an ancestor.incrementalmissingancestors
606 object.
606 object.
607
607
608 'common' is a list of revision numbers. If common is not supplied, uses
608 'common' is a list of revision numbers. If common is not supplied, uses
609 nullrev.
609 nullrev.
610 """
610 """
611 if common is None:
611 if common is None:
612 common = [nullrev]
612 common = [nullrev]
613
613
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615
615
616 def findmissingrevs(self, common=None, heads=None):
616 def findmissingrevs(self, common=None, heads=None):
617 """Return the revision numbers of the ancestors of heads that
617 """Return the revision numbers of the ancestors of heads that
618 are not ancestors of common.
618 are not ancestors of common.
619
619
620 More specifically, return a list of revision numbers corresponding to
620 More specifically, return a list of revision numbers corresponding to
621 nodes N such that every N satisfies the following constraints:
621 nodes N such that every N satisfies the following constraints:
622
622
623 1. N is an ancestor of some node in 'heads'
623 1. N is an ancestor of some node in 'heads'
624 2. N is not an ancestor of any node in 'common'
624 2. N is not an ancestor of any node in 'common'
625
625
626 The list is sorted by revision number, meaning it is
626 The list is sorted by revision number, meaning it is
627 topologically sorted.
627 topologically sorted.
628
628
629 'heads' and 'common' are both lists of revision numbers. If heads is
629 'heads' and 'common' are both lists of revision numbers. If heads is
630 not supplied, uses all of the revlog's heads. If common is not
630 not supplied, uses all of the revlog's heads. If common is not
631 supplied, uses nullid."""
631 supplied, uses nullid."""
632 if common is None:
632 if common is None:
633 common = [nullrev]
633 common = [nullrev]
634 if heads is None:
634 if heads is None:
635 heads = self.headrevs()
635 heads = self.headrevs()
636
636
637 inc = self.incrementalmissingrevs(common=common)
637 inc = self.incrementalmissingrevs(common=common)
638 return inc.missingancestors(heads)
638 return inc.missingancestors(heads)
639
639
640 def findmissing(self, common=None, heads=None):
640 def findmissing(self, common=None, heads=None):
641 """Return the ancestors of heads that are not ancestors of common.
641 """Return the ancestors of heads that are not ancestors of common.
642
642
643 More specifically, return a list of nodes N such that every N
643 More specifically, return a list of nodes N such that every N
644 satisfies the following constraints:
644 satisfies the following constraints:
645
645
646 1. N is an ancestor of some node in 'heads'
646 1. N is an ancestor of some node in 'heads'
647 2. N is not an ancestor of any node in 'common'
647 2. N is not an ancestor of any node in 'common'
648
648
649 The list is sorted by revision number, meaning it is
649 The list is sorted by revision number, meaning it is
650 topologically sorted.
650 topologically sorted.
651
651
652 'heads' and 'common' are both lists of node IDs. If heads is
652 'heads' and 'common' are both lists of node IDs. If heads is
653 not supplied, uses all of the revlog's heads. If common is not
653 not supplied, uses all of the revlog's heads. If common is not
654 supplied, uses nullid."""
654 supplied, uses nullid."""
655 if common is None:
655 if common is None:
656 common = [nullid]
656 common = [nullid]
657 if heads is None:
657 if heads is None:
658 heads = self.heads()
658 heads = self.heads()
659
659
660 common = [self.rev(n) for n in common]
660 common = [self.rev(n) for n in common]
661 heads = [self.rev(n) for n in heads]
661 heads = [self.rev(n) for n in heads]
662
662
663 inc = self.incrementalmissingrevs(common=common)
663 inc = self.incrementalmissingrevs(common=common)
664 return [self.node(r) for r in inc.missingancestors(heads)]
664 return [self.node(r) for r in inc.missingancestors(heads)]
665
665
666 def nodesbetween(self, roots=None, heads=None):
666 def nodesbetween(self, roots=None, heads=None):
667 """Return a topological path from 'roots' to 'heads'.
667 """Return a topological path from 'roots' to 'heads'.
668
668
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 topologically sorted list of all nodes N that satisfy both of
670 topologically sorted list of all nodes N that satisfy both of
671 these constraints:
671 these constraints:
672
672
673 1. N is a descendant of some node in 'roots'
673 1. N is a descendant of some node in 'roots'
674 2. N is an ancestor of some node in 'heads'
674 2. N is an ancestor of some node in 'heads'
675
675
676 Every node is considered to be both a descendant and an ancestor
676 Every node is considered to be both a descendant and an ancestor
677 of itself, so every reachable node in 'roots' and 'heads' will be
677 of itself, so every reachable node in 'roots' and 'heads' will be
678 included in 'nodes'.
678 included in 'nodes'.
679
679
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 subset of 'roots' that is returned in 'nodes'. Likewise,
681 subset of 'roots' that is returned in 'nodes'. Likewise,
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
683
683
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 unspecified, uses nullid as the only root. If 'heads' is
685 unspecified, uses nullid as the only root. If 'heads' is
686 unspecified, uses list of all of the revlog's heads."""
686 unspecified, uses list of all of the revlog's heads."""
687 nonodes = ([], [], [])
687 nonodes = ([], [], [])
688 if roots is not None:
688 if roots is not None:
689 roots = list(roots)
689 roots = list(roots)
690 if not roots:
690 if not roots:
691 return nonodes
691 return nonodes
692 lowestrev = min([self.rev(n) for n in roots])
692 lowestrev = min([self.rev(n) for n in roots])
693 else:
693 else:
694 roots = [nullid] # Everybody's a descendant of nullid
694 roots = [nullid] # Everybody's a descendant of nullid
695 lowestrev = nullrev
695 lowestrev = nullrev
696 if (lowestrev == nullrev) and (heads is None):
696 if (lowestrev == nullrev) and (heads is None):
697 # We want _all_ the nodes!
697 # We want _all_ the nodes!
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 if heads is None:
699 if heads is None:
700 # All nodes are ancestors, so the latest ancestor is the last
700 # All nodes are ancestors, so the latest ancestor is the last
701 # node.
701 # node.
702 highestrev = len(self) - 1
702 highestrev = len(self) - 1
703 # Set ancestors to None to signal that every node is an ancestor.
703 # Set ancestors to None to signal that every node is an ancestor.
704 ancestors = None
704 ancestors = None
705 # Set heads to an empty dictionary for later discovery of heads
705 # Set heads to an empty dictionary for later discovery of heads
706 heads = {}
706 heads = {}
707 else:
707 else:
708 heads = list(heads)
708 heads = list(heads)
709 if not heads:
709 if not heads:
710 return nonodes
710 return nonodes
711 ancestors = set()
711 ancestors = set()
712 # Turn heads into a dictionary so we can remove 'fake' heads.
712 # Turn heads into a dictionary so we can remove 'fake' heads.
713 # Also, later we will be using it to filter out the heads we can't
713 # Also, later we will be using it to filter out the heads we can't
714 # find from roots.
714 # find from roots.
715 heads = dict.fromkeys(heads, False)
715 heads = dict.fromkeys(heads, False)
716 # Start at the top and keep marking parents until we're done.
716 # Start at the top and keep marking parents until we're done.
717 nodestotag = set(heads)
717 nodestotag = set(heads)
718 # Remember where the top was so we can use it as a limit later.
718 # Remember where the top was so we can use it as a limit later.
719 highestrev = max([self.rev(n) for n in nodestotag])
719 highestrev = max([self.rev(n) for n in nodestotag])
720 while nodestotag:
720 while nodestotag:
721 # grab a node to tag
721 # grab a node to tag
722 n = nodestotag.pop()
722 n = nodestotag.pop()
723 # Never tag nullid
723 # Never tag nullid
724 if n == nullid:
724 if n == nullid:
725 continue
725 continue
726 # A node's revision number represents its place in a
726 # A node's revision number represents its place in a
727 # topologically sorted list of nodes.
727 # topologically sorted list of nodes.
728 r = self.rev(n)
728 r = self.rev(n)
729 if r >= lowestrev:
729 if r >= lowestrev:
730 if n not in ancestors:
730 if n not in ancestors:
731 # If we are possibly a descendant of one of the roots
731 # If we are possibly a descendant of one of the roots
732 # and we haven't already been marked as an ancestor
732 # and we haven't already been marked as an ancestor
733 ancestors.add(n) # Mark as ancestor
733 ancestors.add(n) # Mark as ancestor
734 # Add non-nullid parents to list of nodes to tag.
734 # Add non-nullid parents to list of nodes to tag.
735 nodestotag.update([p for p in self.parents(n) if
735 nodestotag.update([p for p in self.parents(n) if
736 p != nullid])
736 p != nullid])
737 elif n in heads: # We've seen it before, is it a fake head?
737 elif n in heads: # We've seen it before, is it a fake head?
738 # So it is, real heads should not be the ancestors of
738 # So it is, real heads should not be the ancestors of
739 # any other heads.
739 # any other heads.
740 heads.pop(n)
740 heads.pop(n)
741 if not ancestors:
741 if not ancestors:
742 return nonodes
742 return nonodes
743 # Now that we have our set of ancestors, we want to remove any
743 # Now that we have our set of ancestors, we want to remove any
744 # roots that are not ancestors.
744 # roots that are not ancestors.
745
745
746 # If one of the roots was nullid, everything is included anyway.
746 # If one of the roots was nullid, everything is included anyway.
747 if lowestrev > nullrev:
747 if lowestrev > nullrev:
748 # But, since we weren't, let's recompute the lowest rev to not
748 # But, since we weren't, let's recompute the lowest rev to not
749 # include roots that aren't ancestors.
749 # include roots that aren't ancestors.
750
750
751 # Filter out roots that aren't ancestors of heads
751 # Filter out roots that aren't ancestors of heads
752 roots = [n for n in roots if n in ancestors]
752 roots = [root for root in roots if root in ancestors]
753 # Recompute the lowest revision
753 # Recompute the lowest revision
754 if roots:
754 if roots:
755 lowestrev = min([self.rev(n) for n in roots])
755 lowestrev = min([self.rev(root) for root in roots])
756 else:
756 else:
757 # No more roots? Return empty list
757 # No more roots? Return empty list
758 return nonodes
758 return nonodes
759 else:
759 else:
760 # We are descending from nullid, and don't need to care about
760 # We are descending from nullid, and don't need to care about
761 # any other roots.
761 # any other roots.
762 lowestrev = nullrev
762 lowestrev = nullrev
763 roots = [nullid]
763 roots = [nullid]
764 # Transform our roots list into a set.
764 # Transform our roots list into a set.
765 descendants = set(roots)
765 descendants = set(roots)
766 # Also, keep the original roots so we can filter out roots that aren't
766 # Also, keep the original roots so we can filter out roots that aren't
767 # 'real' roots (i.e. are descended from other roots).
767 # 'real' roots (i.e. are descended from other roots).
768 roots = descendants.copy()
768 roots = descendants.copy()
769 # Our topologically sorted list of output nodes.
769 # Our topologically sorted list of output nodes.
770 orderedout = []
770 orderedout = []
771 # Don't start at nullid since we don't want nullid in our output list,
771 # Don't start at nullid since we don't want nullid in our output list,
772 # and if nullid shows up in descendants, empty parents will look like
772 # and if nullid shows up in descendants, empty parents will look like
773 # they're descendants.
773 # they're descendants.
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 n = self.node(r)
775 n = self.node(r)
776 isdescendant = False
776 isdescendant = False
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 isdescendant = True
778 isdescendant = True
779 elif n in descendants:
779 elif n in descendants:
780 # n is already a descendant
780 # n is already a descendant
781 isdescendant = True
781 isdescendant = True
782 # This check only needs to be done here because all the roots
782 # This check only needs to be done here because all the roots
783 # will start being marked is descendants before the loop.
783 # will start being marked is descendants before the loop.
784 if n in roots:
784 if n in roots:
785 # If n was a root, check if it's a 'real' root.
785 # If n was a root, check if it's a 'real' root.
786 p = tuple(self.parents(n))
786 p = tuple(self.parents(n))
787 # If any of its parents are descendants, it's not a root.
787 # If any of its parents are descendants, it's not a root.
788 if (p[0] in descendants) or (p[1] in descendants):
788 if (p[0] in descendants) or (p[1] in descendants):
789 roots.remove(n)
789 roots.remove(n)
790 else:
790 else:
791 p = tuple(self.parents(n))
791 p = tuple(self.parents(n))
792 # A node is a descendant if either of its parents are
792 # A node is a descendant if either of its parents are
793 # descendants. (We seeded the dependents list with the roots
793 # descendants. (We seeded the dependents list with the roots
794 # up there, remember?)
794 # up there, remember?)
795 if (p[0] in descendants) or (p[1] in descendants):
795 if (p[0] in descendants) or (p[1] in descendants):
796 descendants.add(n)
796 descendants.add(n)
797 isdescendant = True
797 isdescendant = True
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 # Only include nodes that are both descendants and ancestors.
799 # Only include nodes that are both descendants and ancestors.
800 orderedout.append(n)
800 orderedout.append(n)
801 if (ancestors is not None) and (n in heads):
801 if (ancestors is not None) and (n in heads):
802 # We're trying to figure out which heads are reachable
802 # We're trying to figure out which heads are reachable
803 # from roots.
803 # from roots.
804 # Mark this head as having been reached
804 # Mark this head as having been reached
805 heads[n] = True
805 heads[n] = True
806 elif ancestors is None:
806 elif ancestors is None:
807 # Otherwise, we're trying to discover the heads.
807 # Otherwise, we're trying to discover the heads.
808 # Assume this is a head because if it isn't, the next step
808 # Assume this is a head because if it isn't, the next step
809 # will eventually remove it.
809 # will eventually remove it.
810 heads[n] = True
810 heads[n] = True
811 # But, obviously its parents aren't.
811 # But, obviously its parents aren't.
812 for p in self.parents(n):
812 for p in self.parents(n):
813 heads.pop(p, None)
813 heads.pop(p, None)
814 heads = [n for n, flag in heads.iteritems() if flag]
814 heads = [head for head, flag in heads.iteritems() if flag]
815 roots = list(roots)
815 roots = list(roots)
816 assert orderedout
816 assert orderedout
817 assert roots
817 assert roots
818 assert heads
818 assert heads
819 return (orderedout, roots, heads)
819 return (orderedout, roots, heads)
820
820
821 def headrevs(self):
821 def headrevs(self):
822 try:
822 try:
823 return self.index.headrevs()
823 return self.index.headrevs()
824 except AttributeError:
824 except AttributeError:
825 return self._headrevs()
825 return self._headrevs()
826
826
827 def computephases(self, roots):
827 def computephases(self, roots):
828 return self.index.computephasesmapsets(roots)
828 return self.index.computephasesmapsets(roots)
829
829
830 def _headrevs(self):
830 def _headrevs(self):
831 count = len(self)
831 count = len(self)
832 if not count:
832 if not count:
833 return [nullrev]
833 return [nullrev]
834 # we won't iter over filtered rev so nobody is a head at start
834 # we won't iter over filtered rev so nobody is a head at start
835 ishead = [0] * (count + 1)
835 ishead = [0] * (count + 1)
836 index = self.index
836 index = self.index
837 for r in self:
837 for r in self:
838 ishead[r] = 1 # I may be an head
838 ishead[r] = 1 # I may be an head
839 e = index[r]
839 e = index[r]
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 return [r for r, val in enumerate(ishead) if val]
841 return [r for r, val in enumerate(ishead) if val]
842
842
843 def heads(self, start=None, stop=None):
843 def heads(self, start=None, stop=None):
844 """return the list of all nodes that have no children
844 """return the list of all nodes that have no children
845
845
846 if start is specified, only heads that are descendants of
846 if start is specified, only heads that are descendants of
847 start will be returned
847 start will be returned
848 if stop is specified, it will consider all the revs from stop
848 if stop is specified, it will consider all the revs from stop
849 as if they had no children
849 as if they had no children
850 """
850 """
851 if start is None and stop is None:
851 if start is None and stop is None:
852 if not len(self):
852 if not len(self):
853 return [nullid]
853 return [nullid]
854 return [self.node(r) for r in self.headrevs()]
854 return [self.node(r) for r in self.headrevs()]
855
855
856 if start is None:
856 if start is None:
857 start = nullid
857 start = nullid
858 if stop is None:
858 if stop is None:
859 stop = []
859 stop = []
860 stoprevs = set([self.rev(n) for n in stop])
860 stoprevs = set([self.rev(n) for n in stop])
861 startrev = self.rev(start)
861 startrev = self.rev(start)
862 reachable = set((startrev,))
862 reachable = set((startrev,))
863 heads = set((startrev,))
863 heads = set((startrev,))
864
864
865 parentrevs = self.parentrevs
865 parentrevs = self.parentrevs
866 for r in self.revs(start=startrev + 1):
866 for r in self.revs(start=startrev + 1):
867 for p in parentrevs(r):
867 for p in parentrevs(r):
868 if p in reachable:
868 if p in reachable:
869 if r not in stoprevs:
869 if r not in stoprevs:
870 reachable.add(r)
870 reachable.add(r)
871 heads.add(r)
871 heads.add(r)
872 if p in heads and p not in stoprevs:
872 if p in heads and p not in stoprevs:
873 heads.remove(p)
873 heads.remove(p)
874
874
875 return [self.node(r) for r in heads]
875 return [self.node(r) for r in heads]
876
876
877 def children(self, node):
877 def children(self, node):
878 """find the children of a given node"""
878 """find the children of a given node"""
879 c = []
879 c = []
880 p = self.rev(node)
880 p = self.rev(node)
881 for r in self.revs(start=p + 1):
881 for r in self.revs(start=p + 1):
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 if prevs:
883 if prevs:
884 for pr in prevs:
884 for pr in prevs:
885 if pr == p:
885 if pr == p:
886 c.append(self.node(r))
886 c.append(self.node(r))
887 elif p == nullrev:
887 elif p == nullrev:
888 c.append(self.node(r))
888 c.append(self.node(r))
889 return c
889 return c
890
890
891 def descendant(self, start, end):
891 def descendant(self, start, end):
892 if start == nullrev:
892 if start == nullrev:
893 return True
893 return True
894 for i in self.descendants([start]):
894 for i in self.descendants([start]):
895 if i == end:
895 if i == end:
896 return True
896 return True
897 elif i > end:
897 elif i > end:
898 break
898 break
899 return False
899 return False
900
900
901 def commonancestorsheads(self, a, b):
901 def commonancestorsheads(self, a, b):
902 """calculate all the heads of the common ancestors of nodes a and b"""
902 """calculate all the heads of the common ancestors of nodes a and b"""
903 a, b = self.rev(a), self.rev(b)
903 a, b = self.rev(a), self.rev(b)
904 try:
904 try:
905 ancs = self.index.commonancestorsheads(a, b)
905 ancs = self.index.commonancestorsheads(a, b)
906 except (AttributeError, OverflowError): # C implementation failed
906 except (AttributeError, OverflowError): # C implementation failed
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 return map(self.node, ancs)
908 return map(self.node, ancs)
909
909
910 def isancestor(self, a, b):
910 def isancestor(self, a, b):
911 """return True if node a is an ancestor of node b
911 """return True if node a is an ancestor of node b
912
912
913 The implementation of this is trivial but the use of
913 The implementation of this is trivial but the use of
914 commonancestorsheads is not."""
914 commonancestorsheads is not."""
915 return a in self.commonancestorsheads(a, b)
915 return a in self.commonancestorsheads(a, b)
916
916
917 def ancestor(self, a, b):
917 def ancestor(self, a, b):
918 """calculate the "best" common ancestor of nodes a and b"""
918 """calculate the "best" common ancestor of nodes a and b"""
919
919
920 a, b = self.rev(a), self.rev(b)
920 a, b = self.rev(a), self.rev(b)
921 try:
921 try:
922 ancs = self.index.ancestors(a, b)
922 ancs = self.index.ancestors(a, b)
923 except (AttributeError, OverflowError):
923 except (AttributeError, OverflowError):
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 if ancs:
925 if ancs:
926 # choose a consistent winner when there's a tie
926 # choose a consistent winner when there's a tie
927 return min(map(self.node, ancs))
927 return min(map(self.node, ancs))
928 return nullid
928 return nullid
929
929
930 def _match(self, id):
930 def _match(self, id):
931 if isinstance(id, int):
931 if isinstance(id, int):
932 # rev
932 # rev
933 return self.node(id)
933 return self.node(id)
934 if len(id) == 20:
934 if len(id) == 20:
935 # possibly a binary node
935 # possibly a binary node
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 try:
937 try:
938 node = id
938 node = id
939 self.rev(node) # quick search the index
939 self.rev(node) # quick search the index
940 return node
940 return node
941 except LookupError:
941 except LookupError:
942 pass # may be partial hex id
942 pass # may be partial hex id
943 try:
943 try:
944 # str(rev)
944 # str(rev)
945 rev = int(id)
945 rev = int(id)
946 if str(rev) != id:
946 if str(rev) != id:
947 raise ValueError
947 raise ValueError
948 if rev < 0:
948 if rev < 0:
949 rev = len(self) + rev
949 rev = len(self) + rev
950 if rev < 0 or rev >= len(self):
950 if rev < 0 or rev >= len(self):
951 raise ValueError
951 raise ValueError
952 return self.node(rev)
952 return self.node(rev)
953 except (ValueError, OverflowError):
953 except (ValueError, OverflowError):
954 pass
954 pass
955 if len(id) == 40:
955 if len(id) == 40:
956 try:
956 try:
957 # a full hex nodeid?
957 # a full hex nodeid?
958 node = bin(id)
958 node = bin(id)
959 self.rev(node)
959 self.rev(node)
960 return node
960 return node
961 except (TypeError, LookupError):
961 except (TypeError, LookupError):
962 pass
962 pass
963
963
964 def _partialmatch(self, id):
964 def _partialmatch(self, id):
965 try:
965 try:
966 n = self.index.partialmatch(id)
966 partial = self.index.partialmatch(id)
967 if n and self.hasnode(n):
967 if partial and self.hasnode(partial):
968 return n
968 return partial
969 return None
969 return None
970 except RevlogError:
970 except RevlogError:
971 # parsers.c radix tree lookup gave multiple matches
971 # parsers.c radix tree lookup gave multiple matches
972 # fast path: for unfiltered changelog, radix tree is accurate
972 # fast path: for unfiltered changelog, radix tree is accurate
973 if not getattr(self, 'filteredrevs', None):
973 if not getattr(self, 'filteredrevs', None):
974 raise LookupError(id, self.indexfile,
974 raise LookupError(id, self.indexfile,
975 _('ambiguous identifier'))
975 _('ambiguous identifier'))
976 # fall through to slow path that filters hidden revisions
976 # fall through to slow path that filters hidden revisions
977 except (AttributeError, ValueError):
977 except (AttributeError, ValueError):
978 # we are pure python, or key was too short to search radix tree
978 # we are pure python, or key was too short to search radix tree
979 pass
979 pass
980
980
981 if id in self._pcache:
981 if id in self._pcache:
982 return self._pcache[id]
982 return self._pcache[id]
983
983
984 if len(id) < 40:
984 if len(id) < 40:
985 try:
985 try:
986 # hex(node)[:...]
986 # hex(node)[:...]
987 l = len(id) // 2 # grab an even number of digits
987 l = len(id) // 2 # grab an even number of digits
988 prefix = bin(id[:l * 2])
988 prefix = bin(id[:l * 2])
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 nl = [n for n in nl if hex(n).startswith(id) and
990 nl = [n for n in nl if hex(n).startswith(id) and
991 self.hasnode(n)]
991 self.hasnode(n)]
992 if len(nl) > 0:
992 if len(nl) > 0:
993 if len(nl) == 1:
993 if len(nl) == 1:
994 self._pcache[id] = nl[0]
994 self._pcache[id] = nl[0]
995 return nl[0]
995 return nl[0]
996 raise LookupError(id, self.indexfile,
996 raise LookupError(id, self.indexfile,
997 _('ambiguous identifier'))
997 _('ambiguous identifier'))
998 return None
998 return None
999 except TypeError:
999 except TypeError:
1000 pass
1000 pass
1001
1001
1002 def lookup(self, id):
1002 def lookup(self, id):
1003 """locate a node based on:
1003 """locate a node based on:
1004 - revision number or str(revision number)
1004 - revision number or str(revision number)
1005 - nodeid or subset of hex nodeid
1005 - nodeid or subset of hex nodeid
1006 """
1006 """
1007 n = self._match(id)
1007 n = self._match(id)
1008 if n is not None:
1008 if n is not None:
1009 return n
1009 return n
1010 n = self._partialmatch(id)
1010 n = self._partialmatch(id)
1011 if n:
1011 if n:
1012 return n
1012 return n
1013
1013
1014 raise LookupError(id, self.indexfile, _('no match found'))
1014 raise LookupError(id, self.indexfile, _('no match found'))
1015
1015
1016 def cmp(self, node, text):
1016 def cmp(self, node, text):
1017 """compare text with a given file revision
1017 """compare text with a given file revision
1018
1018
1019 returns True if text is different than what is stored.
1019 returns True if text is different than what is stored.
1020 """
1020 """
1021 p1, p2 = self.parents(node)
1021 p1, p2 = self.parents(node)
1022 return hash(text, p1, p2) != node
1022 return hash(text, p1, p2) != node
1023
1023
1024 def _addchunk(self, offset, data):
1024 def _addchunk(self, offset, data):
1025 """Add a segment to the revlog cache.
1025 """Add a segment to the revlog cache.
1026
1026
1027 Accepts an absolute offset and the data that is at that location.
1027 Accepts an absolute offset and the data that is at that location.
1028 """
1028 """
1029 o, d = self._chunkcache
1029 o, d = self._chunkcache
1030 # try to add to existing cache
1030 # try to add to existing cache
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 self._chunkcache = o, d + data
1032 self._chunkcache = o, d + data
1033 else:
1033 else:
1034 self._chunkcache = offset, data
1034 self._chunkcache = offset, data
1035
1035
1036 def _loadchunk(self, offset, length, df=None):
1036 def _loadchunk(self, offset, length, df=None):
1037 """Load a segment of raw data from the revlog.
1037 """Load a segment of raw data from the revlog.
1038
1038
1039 Accepts an absolute offset, length to read, and an optional existing
1039 Accepts an absolute offset, length to read, and an optional existing
1040 file handle to read from.
1040 file handle to read from.
1041
1041
1042 If an existing file handle is passed, it will be seeked and the
1042 If an existing file handle is passed, it will be seeked and the
1043 original seek position will NOT be restored.
1043 original seek position will NOT be restored.
1044
1044
1045 Returns a str or buffer of raw byte data.
1045 Returns a str or buffer of raw byte data.
1046 """
1046 """
1047 if df is not None:
1047 if df is not None:
1048 closehandle = False
1048 closehandle = False
1049 else:
1049 else:
1050 if self._inline:
1050 if self._inline:
1051 df = self.opener(self.indexfile)
1051 df = self.opener(self.indexfile)
1052 else:
1052 else:
1053 df = self.opener(self.datafile)
1053 df = self.opener(self.datafile)
1054 closehandle = True
1054 closehandle = True
1055
1055
1056 # Cache data both forward and backward around the requested
1056 # Cache data both forward and backward around the requested
1057 # data, in a fixed size window. This helps speed up operations
1057 # data, in a fixed size window. This helps speed up operations
1058 # involving reading the revlog backwards.
1058 # involving reading the revlog backwards.
1059 cachesize = self._chunkcachesize
1059 cachesize = self._chunkcachesize
1060 realoffset = offset & ~(cachesize - 1)
1060 realoffset = offset & ~(cachesize - 1)
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 - realoffset)
1062 - realoffset)
1063 df.seek(realoffset)
1063 df.seek(realoffset)
1064 d = df.read(reallength)
1064 d = df.read(reallength)
1065 if closehandle:
1065 if closehandle:
1066 df.close()
1066 df.close()
1067 self._addchunk(realoffset, d)
1067 self._addchunk(realoffset, d)
1068 if offset != realoffset or reallength != length:
1068 if offset != realoffset or reallength != length:
1069 return util.buffer(d, offset - realoffset, length)
1069 return util.buffer(d, offset - realoffset, length)
1070 return d
1070 return d
1071
1071
1072 def _getchunk(self, offset, length, df=None):
1072 def _getchunk(self, offset, length, df=None):
1073 """Obtain a segment of raw data from the revlog.
1073 """Obtain a segment of raw data from the revlog.
1074
1074
1075 Accepts an absolute offset, length of bytes to obtain, and an
1075 Accepts an absolute offset, length of bytes to obtain, and an
1076 optional file handle to the already-opened revlog. If the file
1076 optional file handle to the already-opened revlog. If the file
1077 handle is used, it's original seek position will not be preserved.
1077 handle is used, it's original seek position will not be preserved.
1078
1078
1079 Requests for data may be returned from a cache.
1079 Requests for data may be returned from a cache.
1080
1080
1081 Returns a str or a buffer instance of raw byte data.
1081 Returns a str or a buffer instance of raw byte data.
1082 """
1082 """
1083 o, d = self._chunkcache
1083 o, d = self._chunkcache
1084 l = len(d)
1084 l = len(d)
1085
1085
1086 # is it in the cache?
1086 # is it in the cache?
1087 cachestart = offset - o
1087 cachestart = offset - o
1088 cacheend = cachestart + length
1088 cacheend = cachestart + length
1089 if cachestart >= 0 and cacheend <= l:
1089 if cachestart >= 0 and cacheend <= l:
1090 if cachestart == 0 and cacheend == l:
1090 if cachestart == 0 and cacheend == l:
1091 return d # avoid a copy
1091 return d # avoid a copy
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1093
1093
1094 return self._loadchunk(offset, length, df=df)
1094 return self._loadchunk(offset, length, df=df)
1095
1095
1096 def _chunkraw(self, startrev, endrev, df=None):
1096 def _chunkraw(self, startrev, endrev, df=None):
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1098
1098
1099 Accepts the start and end revisions and an optional already-open
1099 Accepts the start and end revisions and an optional already-open
1100 file handle to be used for reading. If the file handle is read, its
1100 file handle to be used for reading. If the file handle is read, its
1101 seek position will not be preserved.
1101 seek position will not be preserved.
1102
1102
1103 Requests for data may be satisfied by a cache.
1103 Requests for data may be satisfied by a cache.
1104
1104
1105 Returns a 2-tuple of (offset, data) for the requested range of
1105 Returns a 2-tuple of (offset, data) for the requested range of
1106 revisions. Offset is the integer offset from the beginning of the
1106 revisions. Offset is the integer offset from the beginning of the
1107 revlog and data is a str or buffer of the raw byte data.
1107 revlog and data is a str or buffer of the raw byte data.
1108
1108
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 to determine where each revision's data begins and ends.
1110 to determine where each revision's data begins and ends.
1111 """
1111 """
1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 # (functions are expensive).
1113 # (functions are expensive).
1114 index = self.index
1114 index = self.index
1115 istart = index[startrev]
1115 istart = index[startrev]
1116 start = int(istart[0] >> 16)
1116 start = int(istart[0] >> 16)
1117 if startrev == endrev:
1117 if startrev == endrev:
1118 end = start + istart[1]
1118 end = start + istart[1]
1119 else:
1119 else:
1120 iend = index[endrev]
1120 iend = index[endrev]
1121 end = int(iend[0] >> 16) + iend[1]
1121 end = int(iend[0] >> 16) + iend[1]
1122
1122
1123 if self._inline:
1123 if self._inline:
1124 start += (startrev + 1) * self._io.size
1124 start += (startrev + 1) * self._io.size
1125 end += (endrev + 1) * self._io.size
1125 end += (endrev + 1) * self._io.size
1126 length = end - start
1126 length = end - start
1127
1127
1128 return start, self._getchunk(start, length, df=df)
1128 return start, self._getchunk(start, length, df=df)
1129
1129
1130 def _chunk(self, rev, df=None):
1130 def _chunk(self, rev, df=None):
1131 """Obtain a single decompressed chunk for a revision.
1131 """Obtain a single decompressed chunk for a revision.
1132
1132
1133 Accepts an integer revision and an optional already-open file handle
1133 Accepts an integer revision and an optional already-open file handle
1134 to be used for reading. If used, the seek position of the file will not
1134 to be used for reading. If used, the seek position of the file will not
1135 be preserved.
1135 be preserved.
1136
1136
1137 Returns a str holding uncompressed data for the requested revision.
1137 Returns a str holding uncompressed data for the requested revision.
1138 """
1138 """
1139 return decompress(self._chunkraw(rev, rev, df=df)[1])
1139 return decompress(self._chunkraw(rev, rev, df=df)[1])
1140
1140
1141 def _chunks(self, revs, df=None):
1141 def _chunks(self, revs, df=None):
1142 """Obtain decompressed chunks for the specified revisions.
1142 """Obtain decompressed chunks for the specified revisions.
1143
1143
1144 Accepts an iterable of numeric revisions that are assumed to be in
1144 Accepts an iterable of numeric revisions that are assumed to be in
1145 ascending order. Also accepts an optional already-open file handle
1145 ascending order. Also accepts an optional already-open file handle
1146 to be used for reading. If used, the seek position of the file will
1146 to be used for reading. If used, the seek position of the file will
1147 not be preserved.
1147 not be preserved.
1148
1148
1149 This function is similar to calling ``self._chunk()`` multiple times,
1149 This function is similar to calling ``self._chunk()`` multiple times,
1150 but is faster.
1150 but is faster.
1151
1151
1152 Returns a list with decompressed data for each requested revision.
1152 Returns a list with decompressed data for each requested revision.
1153 """
1153 """
1154 if not revs:
1154 if not revs:
1155 return []
1155 return []
1156 start = self.start
1156 start = self.start
1157 length = self.length
1157 length = self.length
1158 inline = self._inline
1158 inline = self._inline
1159 iosize = self._io.size
1159 iosize = self._io.size
1160 buffer = util.buffer
1160 buffer = util.buffer
1161
1161
1162 l = []
1162 l = []
1163 ladd = l.append
1163 ladd = l.append
1164
1164
1165 try:
1165 try:
1166 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1166 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1167 except OverflowError:
1167 except OverflowError:
1168 # issue4215 - we can't cache a run of chunks greater than
1168 # issue4215 - we can't cache a run of chunks greater than
1169 # 2G on Windows
1169 # 2G on Windows
1170 return [self._chunk(rev, df=df) for rev in revs]
1170 return [self._chunk(rev, df=df) for rev in revs]
1171
1171
1172 for rev in revs:
1172 for rev in revs:
1173 chunkstart = start(rev)
1173 chunkstart = start(rev)
1174 if inline:
1174 if inline:
1175 chunkstart += (rev + 1) * iosize
1175 chunkstart += (rev + 1) * iosize
1176 chunklength = length(rev)
1176 chunklength = length(rev)
1177 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1177 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1178
1178
1179 return l
1179 return l
1180
1180
1181 def _chunkclear(self):
1181 def _chunkclear(self):
1182 """Clear the raw chunk cache."""
1182 """Clear the raw chunk cache."""
1183 self._chunkcache = (0, '')
1183 self._chunkcache = (0, '')
1184
1184
1185 def deltaparent(self, rev):
1185 def deltaparent(self, rev):
1186 """return deltaparent of the given revision"""
1186 """return deltaparent of the given revision"""
1187 base = self.index[rev][3]
1187 base = self.index[rev][3]
1188 if base == rev:
1188 if base == rev:
1189 return nullrev
1189 return nullrev
1190 elif self._generaldelta:
1190 elif self._generaldelta:
1191 return base
1191 return base
1192 else:
1192 else:
1193 return rev - 1
1193 return rev - 1
1194
1194
1195 def revdiff(self, rev1, rev2):
1195 def revdiff(self, rev1, rev2):
1196 """return or calculate a delta between two revisions"""
1196 """return or calculate a delta between two revisions"""
1197 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1197 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1198 return str(self._chunk(rev2))
1198 return str(self._chunk(rev2))
1199
1199
1200 return mdiff.textdiff(self.revision(rev1),
1200 return mdiff.textdiff(self.revision(rev1),
1201 self.revision(rev2))
1201 self.revision(rev2))
1202
1202
1203 def revision(self, nodeorrev, _df=None):
1203 def revision(self, nodeorrev, _df=None):
1204 """return an uncompressed revision of a given node or revision
1204 """return an uncompressed revision of a given node or revision
1205 number.
1205 number.
1206
1206
1207 _df is an existing file handle to read from. It is meant to only be
1207 _df is an existing file handle to read from. It is meant to only be
1208 used internally.
1208 used internally.
1209 """
1209 """
1210 if isinstance(nodeorrev, int):
1210 if isinstance(nodeorrev, int):
1211 rev = nodeorrev
1211 rev = nodeorrev
1212 node = self.node(rev)
1212 node = self.node(rev)
1213 else:
1213 else:
1214 node = nodeorrev
1214 node = nodeorrev
1215 rev = None
1215 rev = None
1216
1216
1217 cachedrev = None
1217 cachedrev = None
1218 if node == nullid:
1218 if node == nullid:
1219 return ""
1219 return ""
1220 if self._cache:
1220 if self._cache:
1221 if self._cache[0] == node:
1221 if self._cache[0] == node:
1222 return self._cache[2]
1222 return self._cache[2]
1223 cachedrev = self._cache[1]
1223 cachedrev = self._cache[1]
1224
1224
1225 # look up what we need to read
1225 # look up what we need to read
1226 text = None
1226 text = None
1227 if rev is None:
1227 if rev is None:
1228 rev = self.rev(node)
1228 rev = self.rev(node)
1229
1229
1230 # check rev flags
1230 # check rev flags
1231 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1231 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1232 raise RevlogError(_('incompatible revision flag %x') %
1232 raise RevlogError(_('incompatible revision flag %x') %
1233 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1233 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1234
1234
1235 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1235 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1236 if stopped:
1236 if stopped:
1237 text = self._cache[2]
1237 text = self._cache[2]
1238
1238
1239 # drop cache to save memory
1239 # drop cache to save memory
1240 self._cache = None
1240 self._cache = None
1241
1241
1242 bins = self._chunks(chain, df=_df)
1242 bins = self._chunks(chain, df=_df)
1243 if text is None:
1243 if text is None:
1244 text = str(bins[0])
1244 text = str(bins[0])
1245 bins = bins[1:]
1245 bins = bins[1:]
1246
1246
1247 text = mdiff.patches(text, bins)
1247 text = mdiff.patches(text, bins)
1248
1248
1249 text = self._checkhash(text, node, rev)
1249 text = self._checkhash(text, node, rev)
1250
1250
1251 self._cache = (node, rev, text)
1251 self._cache = (node, rev, text)
1252 return text
1252 return text
1253
1253
1254 def hash(self, text, p1, p2):
1254 def hash(self, text, p1, p2):
1255 """Compute a node hash.
1255 """Compute a node hash.
1256
1256
1257 Available as a function so that subclasses can replace the hash
1257 Available as a function so that subclasses can replace the hash
1258 as needed.
1258 as needed.
1259 """
1259 """
1260 return hash(text, p1, p2)
1260 return hash(text, p1, p2)
1261
1261
1262 def _checkhash(self, text, node, rev):
1262 def _checkhash(self, text, node, rev):
1263 p1, p2 = self.parents(node)
1263 p1, p2 = self.parents(node)
1264 self.checkhash(text, p1, p2, node, rev)
1264 self.checkhash(text, p1, p2, node, rev)
1265 return text
1265 return text
1266
1266
1267 def checkhash(self, text, p1, p2, node, rev=None):
1267 def checkhash(self, text, p1, p2, node, rev=None):
1268 if node != self.hash(text, p1, p2):
1268 if node != self.hash(text, p1, p2):
1269 revornode = rev
1269 revornode = rev
1270 if revornode is None:
1270 if revornode is None:
1271 revornode = templatefilters.short(hex(node))
1271 revornode = templatefilters.short(hex(node))
1272 raise RevlogError(_("integrity check failed on %s:%s")
1272 raise RevlogError(_("integrity check failed on %s:%s")
1273 % (self.indexfile, revornode))
1273 % (self.indexfile, revornode))
1274
1274
1275 def checkinlinesize(self, tr, fp=None):
1275 def checkinlinesize(self, tr, fp=None):
1276 """Check if the revlog is too big for inline and convert if so.
1276 """Check if the revlog is too big for inline and convert if so.
1277
1277
1278 This should be called after revisions are added to the revlog. If the
1278 This should be called after revisions are added to the revlog. If the
1279 revlog has grown too large to be an inline revlog, it will convert it
1279 revlog has grown too large to be an inline revlog, it will convert it
1280 to use multiple index and data files.
1280 to use multiple index and data files.
1281 """
1281 """
1282 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1282 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1283 return
1283 return
1284
1284
1285 trinfo = tr.find(self.indexfile)
1285 trinfo = tr.find(self.indexfile)
1286 if trinfo is None:
1286 if trinfo is None:
1287 raise RevlogError(_("%s not found in the transaction")
1287 raise RevlogError(_("%s not found in the transaction")
1288 % self.indexfile)
1288 % self.indexfile)
1289
1289
1290 trindex = trinfo[2]
1290 trindex = trinfo[2]
1291 if trindex is not None:
1291 if trindex is not None:
1292 dataoff = self.start(trindex)
1292 dataoff = self.start(trindex)
1293 else:
1293 else:
1294 # revlog was stripped at start of transaction, use all leftover data
1294 # revlog was stripped at start of transaction, use all leftover data
1295 trindex = len(self) - 1
1295 trindex = len(self) - 1
1296 dataoff = self.end(-2)
1296 dataoff = self.end(-2)
1297
1297
1298 tr.add(self.datafile, dataoff)
1298 tr.add(self.datafile, dataoff)
1299
1299
1300 if fp:
1300 if fp:
1301 fp.flush()
1301 fp.flush()
1302 fp.close()
1302 fp.close()
1303
1303
1304 df = self.opener(self.datafile, 'w')
1304 df = self.opener(self.datafile, 'w')
1305 try:
1305 try:
1306 for r in self:
1306 for r in self:
1307 df.write(self._chunkraw(r, r)[1])
1307 df.write(self._chunkraw(r, r)[1])
1308 finally:
1308 finally:
1309 df.close()
1309 df.close()
1310
1310
1311 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1311 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1312 checkambig=self._checkambig)
1312 checkambig=self._checkambig)
1313 self.version &= ~(REVLOGNGINLINEDATA)
1313 self.version &= ~(REVLOGNGINLINEDATA)
1314 self._inline = False
1314 self._inline = False
1315 for i in self:
1315 for i in self:
1316 e = self._io.packentry(self.index[i], self.node, self.version, i)
1316 e = self._io.packentry(self.index[i], self.node, self.version, i)
1317 fp.write(e)
1317 fp.write(e)
1318
1318
1319 # if we don't call close, the temp file will never replace the
1319 # if we don't call close, the temp file will never replace the
1320 # real index
1320 # real index
1321 fp.close()
1321 fp.close()
1322
1322
1323 tr.replace(self.indexfile, trindex * self._io.size)
1323 tr.replace(self.indexfile, trindex * self._io.size)
1324 self._chunkclear()
1324 self._chunkclear()
1325
1325
1326 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1326 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1327 node=None):
1327 node=None):
1328 """add a revision to the log
1328 """add a revision to the log
1329
1329
1330 text - the revision data to add
1330 text - the revision data to add
1331 transaction - the transaction object used for rollback
1331 transaction - the transaction object used for rollback
1332 link - the linkrev data to add
1332 link - the linkrev data to add
1333 p1, p2 - the parent nodeids of the revision
1333 p1, p2 - the parent nodeids of the revision
1334 cachedelta - an optional precomputed delta
1334 cachedelta - an optional precomputed delta
1335 node - nodeid of revision; typically node is not specified, and it is
1335 node - nodeid of revision; typically node is not specified, and it is
1336 computed by default as hash(text, p1, p2), however subclasses might
1336 computed by default as hash(text, p1, p2), however subclasses might
1337 use different hashing method (and override checkhash() in such case)
1337 use different hashing method (and override checkhash() in such case)
1338 """
1338 """
1339 if link == nullrev:
1339 if link == nullrev:
1340 raise RevlogError(_("attempted to add linkrev -1 to %s")
1340 raise RevlogError(_("attempted to add linkrev -1 to %s")
1341 % self.indexfile)
1341 % self.indexfile)
1342
1342
1343 if len(text) > _maxentrysize:
1343 if len(text) > _maxentrysize:
1344 raise RevlogError(
1344 raise RevlogError(
1345 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1345 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1346 % (self.indexfile, len(text)))
1346 % (self.indexfile, len(text)))
1347
1347
1348 node = node or self.hash(text, p1, p2)
1348 node = node or self.hash(text, p1, p2)
1349 if node in self.nodemap:
1349 if node in self.nodemap:
1350 return node
1350 return node
1351
1351
1352 dfh = None
1352 dfh = None
1353 if not self._inline:
1353 if not self._inline:
1354 dfh = self.opener(self.datafile, "a+")
1354 dfh = self.opener(self.datafile, "a+")
1355 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1355 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1356 try:
1356 try:
1357 return self._addrevision(node, text, transaction, link, p1, p2,
1357 return self._addrevision(node, text, transaction, link, p1, p2,
1358 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1358 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1359 finally:
1359 finally:
1360 if dfh:
1360 if dfh:
1361 dfh.close()
1361 dfh.close()
1362 ifh.close()
1362 ifh.close()
1363
1363
1364 def compress(self, text):
1364 def compress(self, text):
1365 """ generate a possibly-compressed representation of text """
1365 """ generate a possibly-compressed representation of text """
1366 if not text:
1366 if not text:
1367 return ("", text)
1367 return ("", text)
1368 l = len(text)
1368 l = len(text)
1369 bin = None
1369 bin = None
1370 if l < 44:
1370 if l < 44:
1371 pass
1371 pass
1372 elif l > 1000000:
1372 elif l > 1000000:
1373 # zlib makes an internal copy, thus doubling memory usage for
1373 # zlib makes an internal copy, thus doubling memory usage for
1374 # large files, so lets do this in pieces
1374 # large files, so lets do this in pieces
1375 z = zlib.compressobj()
1375 z = zlib.compressobj()
1376 p = []
1376 p = []
1377 pos = 0
1377 pos = 0
1378 while pos < l:
1378 while pos < l:
1379 pos2 = pos + 2**20
1379 pos2 = pos + 2**20
1380 p.append(z.compress(text[pos:pos2]))
1380 p.append(z.compress(text[pos:pos2]))
1381 pos = pos2
1381 pos = pos2
1382 p.append(z.flush())
1382 p.append(z.flush())
1383 if sum(map(len, p)) < l:
1383 if sum(map(len, p)) < l:
1384 bin = "".join(p)
1384 bin = "".join(p)
1385 else:
1385 else:
1386 bin = _compress(text)
1386 bin = _compress(text)
1387 if bin is None or len(bin) > l:
1387 if bin is None or len(bin) > l:
1388 if text[0] == '\0':
1388 if text[0] == '\0':
1389 return ("", text)
1389 return ("", text)
1390 return ('u', text)
1390 return ('u', text)
1391 return ("", bin)
1391 return ("", bin)
1392
1392
1393 def _isgooddelta(self, d, textlen):
1393 def _isgooddelta(self, d, textlen):
1394 """Returns True if the given delta is good. Good means that it is within
1394 """Returns True if the given delta is good. Good means that it is within
1395 the disk span, disk size, and chain length bounds that we know to be
1395 the disk span, disk size, and chain length bounds that we know to be
1396 performant."""
1396 performant."""
1397 if d is None:
1397 if d is None:
1398 return False
1398 return False
1399
1399
1400 # - 'dist' is the distance from the base revision -- bounding it limits
1400 # - 'dist' is the distance from the base revision -- bounding it limits
1401 # the amount of I/O we need to do.
1401 # the amount of I/O we need to do.
1402 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1402 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1403 # to apply -- bounding it limits the amount of CPU we consume.
1403 # to apply -- bounding it limits the amount of CPU we consume.
1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405 if (dist > textlen * 4 or l > textlen or
1405 if (dist > textlen * 4 or l > textlen or
1406 compresseddeltalen > textlen * 2 or
1406 compresseddeltalen > textlen * 2 or
1407 (self._maxchainlen and chainlen > self._maxchainlen)):
1407 (self._maxchainlen and chainlen > self._maxchainlen)):
1408 return False
1408 return False
1409
1409
1410 return True
1410 return True
1411
1411
1412 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1412 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1413 cachedelta, ifh, dfh, alwayscache=False):
1413 cachedelta, ifh, dfh, alwayscache=False):
1414 """internal function to add revisions to the log
1414 """internal function to add revisions to the log
1415
1415
1416 see addrevision for argument descriptions.
1416 see addrevision for argument descriptions.
1417 invariants:
1417 invariants:
1418 - text is optional (can be None); if not set, cachedelta must be set.
1418 - text is optional (can be None); if not set, cachedelta must be set.
1419 if both are set, they must correspond to each other.
1419 if both are set, they must correspond to each other.
1420 """
1420 """
1421 btext = [text]
1421 btext = [text]
1422 def buildtext():
1422 def buildtext():
1423 if btext[0] is not None:
1423 if btext[0] is not None:
1424 return btext[0]
1424 return btext[0]
1425 baserev = cachedelta[0]
1425 baserev = cachedelta[0]
1426 delta = cachedelta[1]
1426 delta = cachedelta[1]
1427 # special case deltas which replace entire base; no need to decode
1427 # special case deltas which replace entire base; no need to decode
1428 # base revision. this neatly avoids censored bases, which throw when
1428 # base revision. this neatly avoids censored bases, which throw when
1429 # they're decoded.
1429 # they're decoded.
1430 hlen = struct.calcsize(">lll")
1430 hlen = struct.calcsize(">lll")
1431 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1431 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1432 len(delta) - hlen):
1432 len(delta) - hlen):
1433 btext[0] = delta[hlen:]
1433 btext[0] = delta[hlen:]
1434 else:
1434 else:
1435 if self._inline:
1435 if self._inline:
1436 fh = ifh
1436 fh = ifh
1437 else:
1437 else:
1438 fh = dfh
1438 fh = dfh
1439 basetext = self.revision(self.node(baserev), _df=fh)
1439 basetext = self.revision(self.node(baserev), _df=fh)
1440 btext[0] = mdiff.patch(basetext, delta)
1440 btext[0] = mdiff.patch(basetext, delta)
1441 try:
1441 try:
1442 self.checkhash(btext[0], p1, p2, node)
1442 self.checkhash(btext[0], p1, p2, node)
1443 if flags & REVIDX_ISCENSORED:
1443 if flags & REVIDX_ISCENSORED:
1444 raise RevlogError(_('node %s is not censored') % node)
1444 raise RevlogError(_('node %s is not censored') % node)
1445 except CensoredNodeError:
1445 except CensoredNodeError:
1446 # must pass the censored index flag to add censored revisions
1446 # must pass the censored index flag to add censored revisions
1447 if not flags & REVIDX_ISCENSORED:
1447 if not flags & REVIDX_ISCENSORED:
1448 raise
1448 raise
1449 return btext[0]
1449 return btext[0]
1450
1450
1451 def builddelta(rev):
1451 def builddelta(rev):
1452 # can we use the cached delta?
1452 # can we use the cached delta?
1453 if cachedelta and cachedelta[0] == rev:
1453 if cachedelta and cachedelta[0] == rev:
1454 delta = cachedelta[1]
1454 delta = cachedelta[1]
1455 else:
1455 else:
1456 t = buildtext()
1456 t = buildtext()
1457 if self.iscensored(rev):
1457 if self.iscensored(rev):
1458 # deltas based on a censored revision must replace the
1458 # deltas based on a censored revision must replace the
1459 # full content in one patch, so delta works everywhere
1459 # full content in one patch, so delta works everywhere
1460 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1460 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1461 delta = header + t
1461 delta = header + t
1462 else:
1462 else:
1463 if self._inline:
1463 if self._inline:
1464 fh = ifh
1464 fh = ifh
1465 else:
1465 else:
1466 fh = dfh
1466 fh = dfh
1467 ptext = self.revision(self.node(rev), _df=fh)
1467 ptext = self.revision(self.node(rev), _df=fh)
1468 delta = mdiff.textdiff(ptext, t)
1468 delta = mdiff.textdiff(ptext, t)
1469 header, data = self.compress(delta)
1469 header, data = self.compress(delta)
1470 deltalen = len(header) + len(data)
1470 deltalen = len(header) + len(data)
1471 chainbase = self.chainbase(rev)
1471 chainbase = self.chainbase(rev)
1472 dist = deltalen + offset - self.start(chainbase)
1472 dist = deltalen + offset - self.start(chainbase)
1473 if self._generaldelta:
1473 if self._generaldelta:
1474 base = rev
1474 base = rev
1475 else:
1475 else:
1476 base = chainbase
1476 base = chainbase
1477 chainlen, compresseddeltalen = self._chaininfo(rev)
1477 chainlen, compresseddeltalen = self._chaininfo(rev)
1478 chainlen += 1
1478 chainlen += 1
1479 compresseddeltalen += deltalen
1479 compresseddeltalen += deltalen
1480 return (dist, deltalen, (header, data), base,
1480 return (dist, deltalen, (header, data), base,
1481 chainbase, chainlen, compresseddeltalen)
1481 chainbase, chainlen, compresseddeltalen)
1482
1482
1483 curr = len(self)
1483 curr = len(self)
1484 prev = curr - 1
1484 prev = curr - 1
1485 offset = self.end(prev)
1485 offset = self.end(prev)
1486 delta = None
1486 delta = None
1487 p1r, p2r = self.rev(p1), self.rev(p2)
1487 p1r, p2r = self.rev(p1), self.rev(p2)
1488
1488
1489 # full versions are inserted when the needed deltas
1489 # full versions are inserted when the needed deltas
1490 # become comparable to the uncompressed text
1490 # become comparable to the uncompressed text
1491 if text is None:
1491 if text is None:
1492 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1492 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1493 cachedelta[1])
1493 cachedelta[1])
1494 else:
1494 else:
1495 textlen = len(text)
1495 textlen = len(text)
1496
1496
1497 # should we try to build a delta?
1497 # should we try to build a delta?
1498 if prev != nullrev and self.storedeltachains:
1498 if prev != nullrev and self.storedeltachains:
1499 tested = set()
1499 tested = set()
1500 # This condition is true most of the time when processing
1500 # This condition is true most of the time when processing
1501 # changegroup data into a generaldelta repo. The only time it
1501 # changegroup data into a generaldelta repo. The only time it
1502 # isn't true is if this is the first revision in a delta chain
1502 # isn't true is if this is the first revision in a delta chain
1503 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1503 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1504 if cachedelta and self._generaldelta and self._lazydeltabase:
1504 if cachedelta and self._generaldelta and self._lazydeltabase:
1505 # Assume what we received from the server is a good choice
1505 # Assume what we received from the server is a good choice
1506 # build delta will reuse the cache
1506 # build delta will reuse the cache
1507 candidatedelta = builddelta(cachedelta[0])
1507 candidatedelta = builddelta(cachedelta[0])
1508 tested.add(cachedelta[0])
1508 tested.add(cachedelta[0])
1509 if self._isgooddelta(candidatedelta, textlen):
1509 if self._isgooddelta(candidatedelta, textlen):
1510 delta = candidatedelta
1510 delta = candidatedelta
1511 if delta is None and self._generaldelta:
1511 if delta is None and self._generaldelta:
1512 # exclude already lazy tested base if any
1512 # exclude already lazy tested base if any
1513 parents = [p for p in (p1r, p2r)
1513 parents = [p for p in (p1r, p2r)
1514 if p != nullrev and p not in tested]
1514 if p != nullrev and p not in tested]
1515 if parents and not self._aggressivemergedeltas:
1515 if parents and not self._aggressivemergedeltas:
1516 # Pick whichever parent is closer to us (to minimize the
1516 # Pick whichever parent is closer to us (to minimize the
1517 # chance of having to build a fulltext).
1517 # chance of having to build a fulltext).
1518 parents = [max(parents)]
1518 parents = [max(parents)]
1519 tested.update(parents)
1519 tested.update(parents)
1520 pdeltas = []
1520 pdeltas = []
1521 for p in parents:
1521 for p in parents:
1522 pd = builddelta(p)
1522 pd = builddelta(p)
1523 if self._isgooddelta(pd, textlen):
1523 if self._isgooddelta(pd, textlen):
1524 pdeltas.append(pd)
1524 pdeltas.append(pd)
1525 if pdeltas:
1525 if pdeltas:
1526 delta = min(pdeltas, key=lambda x: x[1])
1526 delta = min(pdeltas, key=lambda x: x[1])
1527 if delta is None and prev not in tested:
1527 if delta is None and prev not in tested:
1528 # other approach failed try against prev to hopefully save us a
1528 # other approach failed try against prev to hopefully save us a
1529 # fulltext.
1529 # fulltext.
1530 candidatedelta = builddelta(prev)
1530 candidatedelta = builddelta(prev)
1531 if self._isgooddelta(candidatedelta, textlen):
1531 if self._isgooddelta(candidatedelta, textlen):
1532 delta = candidatedelta
1532 delta = candidatedelta
1533 if delta is not None:
1533 if delta is not None:
1534 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1534 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1535 else:
1535 else:
1536 text = buildtext()
1536 text = buildtext()
1537 data = self.compress(text)
1537 data = self.compress(text)
1538 l = len(data[1]) + len(data[0])
1538 l = len(data[1]) + len(data[0])
1539 base = chainbase = curr
1539 base = chainbase = curr
1540
1540
1541 e = (offset_type(offset, flags), l, textlen,
1541 e = (offset_type(offset, flags), l, textlen,
1542 base, link, p1r, p2r, node)
1542 base, link, p1r, p2r, node)
1543 self.index.insert(-1, e)
1543 self.index.insert(-1, e)
1544 self.nodemap[node] = curr
1544 self.nodemap[node] = curr
1545
1545
1546 entry = self._io.packentry(e, self.node, self.version, curr)
1546 entry = self._io.packentry(e, self.node, self.version, curr)
1547 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1547 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1548
1548
1549 if alwayscache and text is None:
1549 if alwayscache and text is None:
1550 text = buildtext()
1550 text = buildtext()
1551
1551
1552 if type(text) == str: # only accept immutable objects
1552 if type(text) == str: # only accept immutable objects
1553 self._cache = (node, curr, text)
1553 self._cache = (node, curr, text)
1554 self._chainbasecache[curr] = chainbase
1554 self._chainbasecache[curr] = chainbase
1555 return node
1555 return node
1556
1556
1557 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1557 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1558 # Files opened in a+ mode have inconsistent behavior on various
1558 # Files opened in a+ mode have inconsistent behavior on various
1559 # platforms. Windows requires that a file positioning call be made
1559 # platforms. Windows requires that a file positioning call be made
1560 # when the file handle transitions between reads and writes. See
1560 # when the file handle transitions between reads and writes. See
1561 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1561 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1562 # platforms, Python or the platform itself can be buggy. Some versions
1562 # platforms, Python or the platform itself can be buggy. Some versions
1563 # of Solaris have been observed to not append at the end of the file
1563 # of Solaris have been observed to not append at the end of the file
1564 # if the file was seeked to before the end. See issue4943 for more.
1564 # if the file was seeked to before the end. See issue4943 for more.
1565 #
1565 #
1566 # We work around this issue by inserting a seek() before writing.
1566 # We work around this issue by inserting a seek() before writing.
1567 # Note: This is likely not necessary on Python 3.
1567 # Note: This is likely not necessary on Python 3.
1568 ifh.seek(0, os.SEEK_END)
1568 ifh.seek(0, os.SEEK_END)
1569 if dfh:
1569 if dfh:
1570 dfh.seek(0, os.SEEK_END)
1570 dfh.seek(0, os.SEEK_END)
1571
1571
1572 curr = len(self) - 1
1572 curr = len(self) - 1
1573 if not self._inline:
1573 if not self._inline:
1574 transaction.add(self.datafile, offset)
1574 transaction.add(self.datafile, offset)
1575 transaction.add(self.indexfile, curr * len(entry))
1575 transaction.add(self.indexfile, curr * len(entry))
1576 if data[0]:
1576 if data[0]:
1577 dfh.write(data[0])
1577 dfh.write(data[0])
1578 dfh.write(data[1])
1578 dfh.write(data[1])
1579 ifh.write(entry)
1579 ifh.write(entry)
1580 else:
1580 else:
1581 offset += curr * self._io.size
1581 offset += curr * self._io.size
1582 transaction.add(self.indexfile, offset, curr)
1582 transaction.add(self.indexfile, offset, curr)
1583 ifh.write(entry)
1583 ifh.write(entry)
1584 ifh.write(data[0])
1584 ifh.write(data[0])
1585 ifh.write(data[1])
1585 ifh.write(data[1])
1586 self.checkinlinesize(transaction, ifh)
1586 self.checkinlinesize(transaction, ifh)
1587
1587
1588 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1588 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1589 """
1589 """
1590 add a delta group
1590 add a delta group
1591
1591
1592 given a set of deltas, add them to the revision log. the
1592 given a set of deltas, add them to the revision log. the
1593 first delta is against its parent, which should be in our
1593 first delta is against its parent, which should be in our
1594 log, the rest are against the previous delta.
1594 log, the rest are against the previous delta.
1595
1595
1596 If ``addrevisioncb`` is defined, it will be called with arguments of
1596 If ``addrevisioncb`` is defined, it will be called with arguments of
1597 this revlog and the node that was added.
1597 this revlog and the node that was added.
1598 """
1598 """
1599
1599
1600 # track the base of the current delta log
1600 # track the base of the current delta log
1601 content = []
1601 content = []
1602 node = None
1602 node = None
1603
1603
1604 r = len(self)
1604 r = len(self)
1605 end = 0
1605 end = 0
1606 if r:
1606 if r:
1607 end = self.end(r - 1)
1607 end = self.end(r - 1)
1608 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1608 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1609 isize = r * self._io.size
1609 isize = r * self._io.size
1610 if self._inline:
1610 if self._inline:
1611 transaction.add(self.indexfile, end + isize, r)
1611 transaction.add(self.indexfile, end + isize, r)
1612 dfh = None
1612 dfh = None
1613 else:
1613 else:
1614 transaction.add(self.indexfile, isize, r)
1614 transaction.add(self.indexfile, isize, r)
1615 transaction.add(self.datafile, end)
1615 transaction.add(self.datafile, end)
1616 dfh = self.opener(self.datafile, "a+")
1616 dfh = self.opener(self.datafile, "a+")
1617 def flush():
1617 def flush():
1618 if dfh:
1618 if dfh:
1619 dfh.flush()
1619 dfh.flush()
1620 ifh.flush()
1620 ifh.flush()
1621 try:
1621 try:
1622 # loop through our set of deltas
1622 # loop through our set of deltas
1623 chain = None
1623 chain = None
1624 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1624 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1625 node = chunkdata['node']
1625 node = chunkdata['node']
1626 p1 = chunkdata['p1']
1626 p1 = chunkdata['p1']
1627 p2 = chunkdata['p2']
1627 p2 = chunkdata['p2']
1628 cs = chunkdata['cs']
1628 cs = chunkdata['cs']
1629 deltabase = chunkdata['deltabase']
1629 deltabase = chunkdata['deltabase']
1630 delta = chunkdata['delta']
1630 delta = chunkdata['delta']
1631 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1631 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1632
1632
1633 content.append(node)
1633 content.append(node)
1634
1634
1635 link = linkmapper(cs)
1635 link = linkmapper(cs)
1636 if node in self.nodemap:
1636 if node in self.nodemap:
1637 # this can happen if two branches make the same change
1637 # this can happen if two branches make the same change
1638 chain = node
1638 chain = node
1639 continue
1639 continue
1640
1640
1641 for p in (p1, p2):
1641 for p in (p1, p2):
1642 if p not in self.nodemap:
1642 if p not in self.nodemap:
1643 raise LookupError(p, self.indexfile,
1643 raise LookupError(p, self.indexfile,
1644 _('unknown parent'))
1644 _('unknown parent'))
1645
1645
1646 if deltabase not in self.nodemap:
1646 if deltabase not in self.nodemap:
1647 raise LookupError(deltabase, self.indexfile,
1647 raise LookupError(deltabase, self.indexfile,
1648 _('unknown delta base'))
1648 _('unknown delta base'))
1649
1649
1650 baserev = self.rev(deltabase)
1650 baserev = self.rev(deltabase)
1651
1651
1652 if baserev != nullrev and self.iscensored(baserev):
1652 if baserev != nullrev and self.iscensored(baserev):
1653 # if base is censored, delta must be full replacement in a
1653 # if base is censored, delta must be full replacement in a
1654 # single patch operation
1654 # single patch operation
1655 hlen = struct.calcsize(">lll")
1655 hlen = struct.calcsize(">lll")
1656 oldlen = self.rawsize(baserev)
1656 oldlen = self.rawsize(baserev)
1657 newlen = len(delta) - hlen
1657 newlen = len(delta) - hlen
1658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1659 raise error.CensoredBaseError(self.indexfile,
1659 raise error.CensoredBaseError(self.indexfile,
1660 self.node(baserev))
1660 self.node(baserev))
1661
1661
1662 if not flags and self._peek_iscensored(baserev, delta, flush):
1662 if not flags and self._peek_iscensored(baserev, delta, flush):
1663 flags |= REVIDX_ISCENSORED
1663 flags |= REVIDX_ISCENSORED
1664
1664
1665 # We assume consumers of addrevisioncb will want to retrieve
1665 # We assume consumers of addrevisioncb will want to retrieve
1666 # the added revision, which will require a call to
1666 # the added revision, which will require a call to
1667 # revision(). revision() will fast path if there is a cache
1667 # revision(). revision() will fast path if there is a cache
1668 # hit. So, we tell _addrevision() to always cache in this case.
1668 # hit. So, we tell _addrevision() to always cache in this case.
1669 chain = self._addrevision(node, None, transaction, link,
1669 chain = self._addrevision(node, None, transaction, link,
1670 p1, p2, flags, (baserev, delta),
1670 p1, p2, flags, (baserev, delta),
1671 ifh, dfh,
1671 ifh, dfh,
1672 alwayscache=bool(addrevisioncb))
1672 alwayscache=bool(addrevisioncb))
1673
1673
1674 if addrevisioncb:
1674 if addrevisioncb:
1675 addrevisioncb(self, chain)
1675 addrevisioncb(self, chain)
1676
1676
1677 if not dfh and not self._inline:
1677 if not dfh and not self._inline:
1678 # addrevision switched from inline to conventional
1678 # addrevision switched from inline to conventional
1679 # reopen the index
1679 # reopen the index
1680 ifh.close()
1680 ifh.close()
1681 dfh = self.opener(self.datafile, "a+")
1681 dfh = self.opener(self.datafile, "a+")
1682 ifh = self.opener(self.indexfile, "a+",
1682 ifh = self.opener(self.indexfile, "a+",
1683 checkambig=self._checkambig)
1683 checkambig=self._checkambig)
1684 finally:
1684 finally:
1685 if dfh:
1685 if dfh:
1686 dfh.close()
1686 dfh.close()
1687 ifh.close()
1687 ifh.close()
1688
1688
1689 return content
1689 return content
1690
1690
1691 def iscensored(self, rev):
1691 def iscensored(self, rev):
1692 """Check if a file revision is censored."""
1692 """Check if a file revision is censored."""
1693 return False
1693 return False
1694
1694
1695 def _peek_iscensored(self, baserev, delta, flush):
1695 def _peek_iscensored(self, baserev, delta, flush):
1696 """Quickly check if a delta produces a censored revision."""
1696 """Quickly check if a delta produces a censored revision."""
1697 return False
1697 return False
1698
1698
1699 def getstrippoint(self, minlink):
1699 def getstrippoint(self, minlink):
1700 """find the minimum rev that must be stripped to strip the linkrev
1700 """find the minimum rev that must be stripped to strip the linkrev
1701
1701
1702 Returns a tuple containing the minimum rev and a set of all revs that
1702 Returns a tuple containing the minimum rev and a set of all revs that
1703 have linkrevs that will be broken by this strip.
1703 have linkrevs that will be broken by this strip.
1704 """
1704 """
1705 brokenrevs = set()
1705 brokenrevs = set()
1706 strippoint = len(self)
1706 strippoint = len(self)
1707
1707
1708 heads = {}
1708 heads = {}
1709 futurelargelinkrevs = set()
1709 futurelargelinkrevs = set()
1710 for head in self.headrevs():
1710 for head in self.headrevs():
1711 headlinkrev = self.linkrev(head)
1711 headlinkrev = self.linkrev(head)
1712 heads[head] = headlinkrev
1712 heads[head] = headlinkrev
1713 if headlinkrev >= minlink:
1713 if headlinkrev >= minlink:
1714 futurelargelinkrevs.add(headlinkrev)
1714 futurelargelinkrevs.add(headlinkrev)
1715
1715
1716 # This algorithm involves walking down the rev graph, starting at the
1716 # This algorithm involves walking down the rev graph, starting at the
1717 # heads. Since the revs are topologically sorted according to linkrev,
1717 # heads. Since the revs are topologically sorted according to linkrev,
1718 # once all head linkrevs are below the minlink, we know there are
1718 # once all head linkrevs are below the minlink, we know there are
1719 # no more revs that could have a linkrev greater than minlink.
1719 # no more revs that could have a linkrev greater than minlink.
1720 # So we can stop walking.
1720 # So we can stop walking.
1721 while futurelargelinkrevs:
1721 while futurelargelinkrevs:
1722 strippoint -= 1
1722 strippoint -= 1
1723 linkrev = heads.pop(strippoint)
1723 linkrev = heads.pop(strippoint)
1724
1724
1725 if linkrev < minlink:
1725 if linkrev < minlink:
1726 brokenrevs.add(strippoint)
1726 brokenrevs.add(strippoint)
1727 else:
1727 else:
1728 futurelargelinkrevs.remove(linkrev)
1728 futurelargelinkrevs.remove(linkrev)
1729
1729
1730 for p in self.parentrevs(strippoint):
1730 for p in self.parentrevs(strippoint):
1731 if p != nullrev:
1731 if p != nullrev:
1732 plinkrev = self.linkrev(p)
1732 plinkrev = self.linkrev(p)
1733 heads[p] = plinkrev
1733 heads[p] = plinkrev
1734 if plinkrev >= minlink:
1734 if plinkrev >= minlink:
1735 futurelargelinkrevs.add(plinkrev)
1735 futurelargelinkrevs.add(plinkrev)
1736
1736
1737 return strippoint, brokenrevs
1737 return strippoint, brokenrevs
1738
1738
1739 def strip(self, minlink, transaction):
1739 def strip(self, minlink, transaction):
1740 """truncate the revlog on the first revision with a linkrev >= minlink
1740 """truncate the revlog on the first revision with a linkrev >= minlink
1741
1741
1742 This function is called when we're stripping revision minlink and
1742 This function is called when we're stripping revision minlink and
1743 its descendants from the repository.
1743 its descendants from the repository.
1744
1744
1745 We have to remove all revisions with linkrev >= minlink, because
1745 We have to remove all revisions with linkrev >= minlink, because
1746 the equivalent changelog revisions will be renumbered after the
1746 the equivalent changelog revisions will be renumbered after the
1747 strip.
1747 strip.
1748
1748
1749 So we truncate the revlog on the first of these revisions, and
1749 So we truncate the revlog on the first of these revisions, and
1750 trust that the caller has saved the revisions that shouldn't be
1750 trust that the caller has saved the revisions that shouldn't be
1751 removed and that it'll re-add them after this truncation.
1751 removed and that it'll re-add them after this truncation.
1752 """
1752 """
1753 if len(self) == 0:
1753 if len(self) == 0:
1754 return
1754 return
1755
1755
1756 rev, _ = self.getstrippoint(minlink)
1756 rev, _ = self.getstrippoint(minlink)
1757 if rev == len(self):
1757 if rev == len(self):
1758 return
1758 return
1759
1759
1760 # first truncate the files on disk
1760 # first truncate the files on disk
1761 end = self.start(rev)
1761 end = self.start(rev)
1762 if not self._inline:
1762 if not self._inline:
1763 transaction.add(self.datafile, end)
1763 transaction.add(self.datafile, end)
1764 end = rev * self._io.size
1764 end = rev * self._io.size
1765 else:
1765 else:
1766 end += rev * self._io.size
1766 end += rev * self._io.size
1767
1767
1768 transaction.add(self.indexfile, end)
1768 transaction.add(self.indexfile, end)
1769
1769
1770 # then reset internal state in memory to forget those revisions
1770 # then reset internal state in memory to forget those revisions
1771 self._cache = None
1771 self._cache = None
1772 self._chaininfocache = {}
1772 self._chaininfocache = {}
1773 self._chunkclear()
1773 self._chunkclear()
1774 for x in xrange(rev, len(self)):
1774 for x in xrange(rev, len(self)):
1775 del self.nodemap[self.node(x)]
1775 del self.nodemap[self.node(x)]
1776
1776
1777 del self.index[rev:-1]
1777 del self.index[rev:-1]
1778
1778
1779 def checksize(self):
1779 def checksize(self):
1780 expected = 0
1780 expected = 0
1781 if len(self):
1781 if len(self):
1782 expected = max(0, self.end(len(self) - 1))
1782 expected = max(0, self.end(len(self) - 1))
1783
1783
1784 try:
1784 try:
1785 f = self.opener(self.datafile)
1785 f = self.opener(self.datafile)
1786 f.seek(0, 2)
1786 f.seek(0, 2)
1787 actual = f.tell()
1787 actual = f.tell()
1788 f.close()
1788 f.close()
1789 dd = actual - expected
1789 dd = actual - expected
1790 except IOError as inst:
1790 except IOError as inst:
1791 if inst.errno != errno.ENOENT:
1791 if inst.errno != errno.ENOENT:
1792 raise
1792 raise
1793 dd = 0
1793 dd = 0
1794
1794
1795 try:
1795 try:
1796 f = self.opener(self.indexfile)
1796 f = self.opener(self.indexfile)
1797 f.seek(0, 2)
1797 f.seek(0, 2)
1798 actual = f.tell()
1798 actual = f.tell()
1799 f.close()
1799 f.close()
1800 s = self._io.size
1800 s = self._io.size
1801 i = max(0, actual // s)
1801 i = max(0, actual // s)
1802 di = actual - (i * s)
1802 di = actual - (i * s)
1803 if self._inline:
1803 if self._inline:
1804 databytes = 0
1804 databytes = 0
1805 for r in self:
1805 for r in self:
1806 databytes += max(0, self.length(r))
1806 databytes += max(0, self.length(r))
1807 dd = 0
1807 dd = 0
1808 di = actual - len(self) * s - databytes
1808 di = actual - len(self) * s - databytes
1809 except IOError as inst:
1809 except IOError as inst:
1810 if inst.errno != errno.ENOENT:
1810 if inst.errno != errno.ENOENT:
1811 raise
1811 raise
1812 di = 0
1812 di = 0
1813
1813
1814 return (dd, di)
1814 return (dd, di)
1815
1815
1816 def files(self):
1816 def files(self):
1817 res = [self.indexfile]
1817 res = [self.indexfile]
1818 if not self._inline:
1818 if not self._inline:
1819 res.append(self.datafile)
1819 res.append(self.datafile)
1820 return res
1820 return res
General Comments 0
You need to be logged in to leave comments. Login now