##// END OF EJS Templates
revlog: pass revlog flags to addrevision...
Remi Chaintron -
r30744:e12c0fa1 default
parent child Browse files
Show More
@@ -1,1832 +1,1833 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
75 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
76 raise ValueError('unknown revlog index flags')
76 raise ValueError('unknown revlog index flags')
77 return long(long(offset) << 16 | type)
77 return long(long(offset) << 16 | type)
78
78
79 _nullhash = hashlib.sha1(nullid)
79 _nullhash = hashlib.sha1(nullid)
80
80
81 def hash(text, p1, p2):
81 def hash(text, p1, p2):
82 """generate a hash from the given text and its parent hashes
82 """generate a hash from the given text and its parent hashes
83
83
84 This hash combines both the current file contents and its history
84 This hash combines both the current file contents and its history
85 in a manner that makes it easy to distinguish nodes with the same
85 in a manner that makes it easy to distinguish nodes with the same
86 content in the revision graph.
86 content in the revision graph.
87 """
87 """
88 # As of now, if one of the parent node is null, p2 is null
88 # As of now, if one of the parent node is null, p2 is null
89 if p2 == nullid:
89 if p2 == nullid:
90 # deep copy of a hash is faster than creating one
90 # deep copy of a hash is faster than creating one
91 s = _nullhash.copy()
91 s = _nullhash.copy()
92 s.update(p1)
92 s.update(p1)
93 else:
93 else:
94 # none of the parent nodes are nullid
94 # none of the parent nodes are nullid
95 l = [p1, p2]
95 l = [p1, p2]
96 l.sort()
96 l.sort()
97 s = hashlib.sha1(l[0])
97 s = hashlib.sha1(l[0])
98 s.update(l[1])
98 s.update(l[1])
99 s.update(text)
99 s.update(text)
100 return s.digest()
100 return s.digest()
101
101
102 def decompress(bin):
102 def decompress(bin):
103 """ decompress the given input """
103 """ decompress the given input """
104 if not bin:
104 if not bin:
105 return bin
105 return bin
106 t = bin[0]
106 t = bin[0]
107 if t == '\0':
107 if t == '\0':
108 return bin
108 return bin
109 if t == 'x':
109 if t == 'x':
110 try:
110 try:
111 return _decompress(bin)
111 return _decompress(bin)
112 except zlib.error as e:
112 except zlib.error as e:
113 raise RevlogError(_("revlog decompress error: %s") % str(e))
113 raise RevlogError(_("revlog decompress error: %s") % str(e))
114 if t == 'u':
114 if t == 'u':
115 return util.buffer(bin, 1)
115 return util.buffer(bin, 1)
116 raise RevlogError(_("unknown compression type %r") % t)
116 raise RevlogError(_("unknown compression type %r") % t)
117
117
118 # index v0:
118 # index v0:
119 # 4 bytes: offset
119 # 4 bytes: offset
120 # 4 bytes: compressed length
120 # 4 bytes: compressed length
121 # 4 bytes: base rev
121 # 4 bytes: base rev
122 # 4 bytes: link rev
122 # 4 bytes: link rev
123 # 20 bytes: parent 1 nodeid
123 # 20 bytes: parent 1 nodeid
124 # 20 bytes: parent 2 nodeid
124 # 20 bytes: parent 2 nodeid
125 # 20 bytes: nodeid
125 # 20 bytes: nodeid
126 indexformatv0 = ">4l20s20s20s"
126 indexformatv0 = ">4l20s20s20s"
127
127
128 class revlogoldio(object):
128 class revlogoldio(object):
129 def __init__(self):
129 def __init__(self):
130 self.size = struct.calcsize(indexformatv0)
130 self.size = struct.calcsize(indexformatv0)
131
131
132 def parseindex(self, data, inline):
132 def parseindex(self, data, inline):
133 s = self.size
133 s = self.size
134 index = []
134 index = []
135 nodemap = {nullid: nullrev}
135 nodemap = {nullid: nullrev}
136 n = off = 0
136 n = off = 0
137 l = len(data)
137 l = len(data)
138 while off + s <= l:
138 while off + s <= l:
139 cur = data[off:off + s]
139 cur = data[off:off + s]
140 off += s
140 off += s
141 e = _unpack(indexformatv0, cur)
141 e = _unpack(indexformatv0, cur)
142 # transform to revlogv1 format
142 # transform to revlogv1 format
143 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
143 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
144 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
144 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
145 index.append(e2)
145 index.append(e2)
146 nodemap[e[6]] = n
146 nodemap[e[6]] = n
147 n += 1
147 n += 1
148
148
149 # add the magic null revision at -1
149 # add the magic null revision at -1
150 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
151
151
152 return index, nodemap, None
152 return index, nodemap, None
153
153
154 def packentry(self, entry, node, version, rev):
154 def packentry(self, entry, node, version, rev):
155 if gettype(entry[0]):
155 if gettype(entry[0]):
156 raise RevlogError(_("index entry flags need RevlogNG"))
156 raise RevlogError(_("index entry flags need RevlogNG"))
157 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
157 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
158 node(entry[5]), node(entry[6]), entry[7])
158 node(entry[5]), node(entry[6]), entry[7])
159 return _pack(indexformatv0, *e2)
159 return _pack(indexformatv0, *e2)
160
160
161 # index ng:
161 # index ng:
162 # 6 bytes: offset
162 # 6 bytes: offset
163 # 2 bytes: flags
163 # 2 bytes: flags
164 # 4 bytes: compressed length
164 # 4 bytes: compressed length
165 # 4 bytes: uncompressed length
165 # 4 bytes: uncompressed length
166 # 4 bytes: base rev
166 # 4 bytes: base rev
167 # 4 bytes: link rev
167 # 4 bytes: link rev
168 # 4 bytes: parent 1 rev
168 # 4 bytes: parent 1 rev
169 # 4 bytes: parent 2 rev
169 # 4 bytes: parent 2 rev
170 # 32 bytes: nodeid
170 # 32 bytes: nodeid
171 indexformatng = ">Qiiiiii20s12x"
171 indexformatng = ">Qiiiiii20s12x"
172 versionformat = ">I"
172 versionformat = ">I"
173
173
174 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
174 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
175 # signed integer)
175 # signed integer)
176 _maxentrysize = 0x7fffffff
176 _maxentrysize = 0x7fffffff
177
177
178 class revlogio(object):
178 class revlogio(object):
179 def __init__(self):
179 def __init__(self):
180 self.size = struct.calcsize(indexformatng)
180 self.size = struct.calcsize(indexformatng)
181
181
182 def parseindex(self, data, inline):
182 def parseindex(self, data, inline):
183 # call the C implementation to parse the index data
183 # call the C implementation to parse the index data
184 index, cache = parsers.parse_index2(data, inline)
184 index, cache = parsers.parse_index2(data, inline)
185 return index, getattr(index, 'nodemap', None), cache
185 return index, getattr(index, 'nodemap', None), cache
186
186
187 def packentry(self, entry, node, version, rev):
187 def packentry(self, entry, node, version, rev):
188 p = _pack(indexformatng, *entry)
188 p = _pack(indexformatng, *entry)
189 if rev == 0:
189 if rev == 0:
190 p = _pack(versionformat, version) + p[4:]
190 p = _pack(versionformat, version) + p[4:]
191 return p
191 return p
192
192
193 class revlog(object):
193 class revlog(object):
194 """
194 """
195 the underlying revision storage object
195 the underlying revision storage object
196
196
197 A revlog consists of two parts, an index and the revision data.
197 A revlog consists of two parts, an index and the revision data.
198
198
199 The index is a file with a fixed record size containing
199 The index is a file with a fixed record size containing
200 information on each revision, including its nodeid (hash), the
200 information on each revision, including its nodeid (hash), the
201 nodeids of its parents, the position and offset of its data within
201 nodeids of its parents, the position and offset of its data within
202 the data file, and the revision it's based on. Finally, each entry
202 the data file, and the revision it's based on. Finally, each entry
203 contains a linkrev entry that can serve as a pointer to external
203 contains a linkrev entry that can serve as a pointer to external
204 data.
204 data.
205
205
206 The revision data itself is a linear collection of data chunks.
206 The revision data itself is a linear collection of data chunks.
207 Each chunk represents a revision and is usually represented as a
207 Each chunk represents a revision and is usually represented as a
208 delta against the previous chunk. To bound lookup time, runs of
208 delta against the previous chunk. To bound lookup time, runs of
209 deltas are limited to about 2 times the length of the original
209 deltas are limited to about 2 times the length of the original
210 version data. This makes retrieval of a version proportional to
210 version data. This makes retrieval of a version proportional to
211 its size, or O(1) relative to the number of revisions.
211 its size, or O(1) relative to the number of revisions.
212
212
213 Both pieces of the revlog are written to in an append-only
213 Both pieces of the revlog are written to in an append-only
214 fashion, which means we never need to rewrite a file to insert or
214 fashion, which means we never need to rewrite a file to insert or
215 remove data, and can use some simple techniques to avoid the need
215 remove data, and can use some simple techniques to avoid the need
216 for locking while reading.
216 for locking while reading.
217
217
218 If checkambig, indexfile is opened with checkambig=True at
218 If checkambig, indexfile is opened with checkambig=True at
219 writing, to avoid file stat ambiguity.
219 writing, to avoid file stat ambiguity.
220 """
220 """
221 def __init__(self, opener, indexfile, checkambig=False):
221 def __init__(self, opener, indexfile, checkambig=False):
222 """
222 """
223 create a revlog object
223 create a revlog object
224
224
225 opener is a function that abstracts the file opening operation
225 opener is a function that abstracts the file opening operation
226 and can be used to implement COW semantics or the like.
226 and can be used to implement COW semantics or the like.
227 """
227 """
228 self.indexfile = indexfile
228 self.indexfile = indexfile
229 self.datafile = indexfile[:-2] + ".d"
229 self.datafile = indexfile[:-2] + ".d"
230 self.opener = opener
230 self.opener = opener
231 # When True, indexfile is opened with checkambig=True at writing, to
231 # When True, indexfile is opened with checkambig=True at writing, to
232 # avoid file stat ambiguity.
232 # avoid file stat ambiguity.
233 self._checkambig = checkambig
233 self._checkambig = checkambig
234 # 3-tuple of (node, rev, text) for a raw revision.
234 # 3-tuple of (node, rev, text) for a raw revision.
235 self._cache = None
235 self._cache = None
236 # Maps rev to chain base rev.
236 # Maps rev to chain base rev.
237 self._chainbasecache = util.lrucachedict(100)
237 self._chainbasecache = util.lrucachedict(100)
238 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
238 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
239 self._chunkcache = (0, '')
239 self._chunkcache = (0, '')
240 # How much data to read and cache into the raw revlog data cache.
240 # How much data to read and cache into the raw revlog data cache.
241 self._chunkcachesize = 65536
241 self._chunkcachesize = 65536
242 self._maxchainlen = None
242 self._maxchainlen = None
243 self._aggressivemergedeltas = False
243 self._aggressivemergedeltas = False
244 self.index = []
244 self.index = []
245 # Mapping of partial identifiers to full nodes.
245 # Mapping of partial identifiers to full nodes.
246 self._pcache = {}
246 self._pcache = {}
247 # Mapping of revision integer to full node.
247 # Mapping of revision integer to full node.
248 self._nodecache = {nullid: nullrev}
248 self._nodecache = {nullid: nullrev}
249 self._nodepos = None
249 self._nodepos = None
250
250
251 v = REVLOG_DEFAULT_VERSION
251 v = REVLOG_DEFAULT_VERSION
252 opts = getattr(opener, 'options', None)
252 opts = getattr(opener, 'options', None)
253 if opts is not None:
253 if opts is not None:
254 if 'revlogv1' in opts:
254 if 'revlogv1' in opts:
255 if 'generaldelta' in opts:
255 if 'generaldelta' in opts:
256 v |= REVLOGGENERALDELTA
256 v |= REVLOGGENERALDELTA
257 else:
257 else:
258 v = 0
258 v = 0
259 if 'chunkcachesize' in opts:
259 if 'chunkcachesize' in opts:
260 self._chunkcachesize = opts['chunkcachesize']
260 self._chunkcachesize = opts['chunkcachesize']
261 if 'maxchainlen' in opts:
261 if 'maxchainlen' in opts:
262 self._maxchainlen = opts['maxchainlen']
262 self._maxchainlen = opts['maxchainlen']
263 if 'aggressivemergedeltas' in opts:
263 if 'aggressivemergedeltas' in opts:
264 self._aggressivemergedeltas = opts['aggressivemergedeltas']
264 self._aggressivemergedeltas = opts['aggressivemergedeltas']
265 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
265 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
266
266
267 if self._chunkcachesize <= 0:
267 if self._chunkcachesize <= 0:
268 raise RevlogError(_('revlog chunk cache size %r is not greater '
268 raise RevlogError(_('revlog chunk cache size %r is not greater '
269 'than 0') % self._chunkcachesize)
269 'than 0') % self._chunkcachesize)
270 elif self._chunkcachesize & (self._chunkcachesize - 1):
270 elif self._chunkcachesize & (self._chunkcachesize - 1):
271 raise RevlogError(_('revlog chunk cache size %r is not a power '
271 raise RevlogError(_('revlog chunk cache size %r is not a power '
272 'of 2') % self._chunkcachesize)
272 'of 2') % self._chunkcachesize)
273
273
274 indexdata = ''
274 indexdata = ''
275 self._initempty = True
275 self._initempty = True
276 try:
276 try:
277 f = self.opener(self.indexfile)
277 f = self.opener(self.indexfile)
278 indexdata = f.read()
278 indexdata = f.read()
279 f.close()
279 f.close()
280 if len(indexdata) > 0:
280 if len(indexdata) > 0:
281 v = struct.unpack(versionformat, indexdata[:4])[0]
281 v = struct.unpack(versionformat, indexdata[:4])[0]
282 self._initempty = False
282 self._initempty = False
283 except IOError as inst:
283 except IOError as inst:
284 if inst.errno != errno.ENOENT:
284 if inst.errno != errno.ENOENT:
285 raise
285 raise
286
286
287 self.version = v
287 self.version = v
288 self._inline = v & REVLOGNGINLINEDATA
288 self._inline = v & REVLOGNGINLINEDATA
289 self._generaldelta = v & REVLOGGENERALDELTA
289 self._generaldelta = v & REVLOGGENERALDELTA
290 flags = v & ~0xFFFF
290 flags = v & ~0xFFFF
291 fmt = v & 0xFFFF
291 fmt = v & 0xFFFF
292 if fmt == REVLOGV0 and flags:
292 if fmt == REVLOGV0 and flags:
293 raise RevlogError(_("index %s unknown flags %#04x for format v0")
293 raise RevlogError(_("index %s unknown flags %#04x for format v0")
294 % (self.indexfile, flags >> 16))
294 % (self.indexfile, flags >> 16))
295 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
295 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
296 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
296 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
297 % (self.indexfile, flags >> 16))
297 % (self.indexfile, flags >> 16))
298 elif fmt > REVLOGNG:
298 elif fmt > REVLOGNG:
299 raise RevlogError(_("index %s unknown format %d")
299 raise RevlogError(_("index %s unknown format %d")
300 % (self.indexfile, fmt))
300 % (self.indexfile, fmt))
301
301
302 self.storedeltachains = True
302 self.storedeltachains = True
303
303
304 self._io = revlogio()
304 self._io = revlogio()
305 if self.version == REVLOGV0:
305 if self.version == REVLOGV0:
306 self._io = revlogoldio()
306 self._io = revlogoldio()
307 try:
307 try:
308 d = self._io.parseindex(indexdata, self._inline)
308 d = self._io.parseindex(indexdata, self._inline)
309 except (ValueError, IndexError):
309 except (ValueError, IndexError):
310 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
310 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
311 self.index, nodemap, self._chunkcache = d
311 self.index, nodemap, self._chunkcache = d
312 if nodemap is not None:
312 if nodemap is not None:
313 self.nodemap = self._nodecache = nodemap
313 self.nodemap = self._nodecache = nodemap
314 if not self._chunkcache:
314 if not self._chunkcache:
315 self._chunkclear()
315 self._chunkclear()
316 # revnum -> (chain-length, sum-delta-length)
316 # revnum -> (chain-length, sum-delta-length)
317 self._chaininfocache = {}
317 self._chaininfocache = {}
318
318
319 def tip(self):
319 def tip(self):
320 return self.node(len(self.index) - 2)
320 return self.node(len(self.index) - 2)
321 def __contains__(self, rev):
321 def __contains__(self, rev):
322 return 0 <= rev < len(self)
322 return 0 <= rev < len(self)
323 def __len__(self):
323 def __len__(self):
324 return len(self.index) - 1
324 return len(self.index) - 1
325 def __iter__(self):
325 def __iter__(self):
326 return iter(xrange(len(self)))
326 return iter(xrange(len(self)))
327 def revs(self, start=0, stop=None):
327 def revs(self, start=0, stop=None):
328 """iterate over all rev in this revlog (from start to stop)"""
328 """iterate over all rev in this revlog (from start to stop)"""
329 step = 1
329 step = 1
330 if stop is not None:
330 if stop is not None:
331 if start > stop:
331 if start > stop:
332 step = -1
332 step = -1
333 stop += step
333 stop += step
334 else:
334 else:
335 stop = len(self)
335 stop = len(self)
336 return xrange(start, stop, step)
336 return xrange(start, stop, step)
337
337
338 @util.propertycache
338 @util.propertycache
339 def nodemap(self):
339 def nodemap(self):
340 self.rev(self.node(0))
340 self.rev(self.node(0))
341 return self._nodecache
341 return self._nodecache
342
342
343 def hasnode(self, node):
343 def hasnode(self, node):
344 try:
344 try:
345 self.rev(node)
345 self.rev(node)
346 return True
346 return True
347 except KeyError:
347 except KeyError:
348 return False
348 return False
349
349
350 def clearcaches(self):
350 def clearcaches(self):
351 self._cache = None
351 self._cache = None
352 self._chainbasecache.clear()
352 self._chainbasecache.clear()
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 self._pcache = {}
354 self._pcache = {}
355
355
356 try:
356 try:
357 self._nodecache.clearcaches()
357 self._nodecache.clearcaches()
358 except AttributeError:
358 except AttributeError:
359 self._nodecache = {nullid: nullrev}
359 self._nodecache = {nullid: nullrev}
360 self._nodepos = None
360 self._nodepos = None
361
361
362 def rev(self, node):
362 def rev(self, node):
363 try:
363 try:
364 return self._nodecache[node]
364 return self._nodecache[node]
365 except TypeError:
365 except TypeError:
366 raise
366 raise
367 except RevlogError:
367 except RevlogError:
368 # parsers.c radix tree lookup failed
368 # parsers.c radix tree lookup failed
369 raise LookupError(node, self.indexfile, _('no node'))
369 raise LookupError(node, self.indexfile, _('no node'))
370 except KeyError:
370 except KeyError:
371 # pure python cache lookup failed
371 # pure python cache lookup failed
372 n = self._nodecache
372 n = self._nodecache
373 i = self.index
373 i = self.index
374 p = self._nodepos
374 p = self._nodepos
375 if p is None:
375 if p is None:
376 p = len(i) - 2
376 p = len(i) - 2
377 for r in xrange(p, -1, -1):
377 for r in xrange(p, -1, -1):
378 v = i[r][7]
378 v = i[r][7]
379 n[v] = r
379 n[v] = r
380 if v == node:
380 if v == node:
381 self._nodepos = r - 1
381 self._nodepos = r - 1
382 return r
382 return r
383 raise LookupError(node, self.indexfile, _('no node'))
383 raise LookupError(node, self.indexfile, _('no node'))
384
384
385 # Accessors for index entries.
385 # Accessors for index entries.
386
386
387 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
387 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
388 # are flags.
388 # are flags.
389 def start(self, rev):
389 def start(self, rev):
390 return int(self.index[rev][0] >> 16)
390 return int(self.index[rev][0] >> 16)
391
391
392 def flags(self, rev):
392 def flags(self, rev):
393 return self.index[rev][0] & 0xFFFF
393 return self.index[rev][0] & 0xFFFF
394
394
395 def length(self, rev):
395 def length(self, rev):
396 return self.index[rev][1]
396 return self.index[rev][1]
397
397
398 def rawsize(self, rev):
398 def rawsize(self, rev):
399 """return the length of the uncompressed text for a given revision"""
399 """return the length of the uncompressed text for a given revision"""
400 l = self.index[rev][2]
400 l = self.index[rev][2]
401 if l >= 0:
401 if l >= 0:
402 return l
402 return l
403
403
404 t = self.revision(self.node(rev))
404 t = self.revision(self.node(rev))
405 return len(t)
405 return len(t)
406 size = rawsize
406 size = rawsize
407
407
408 def chainbase(self, rev):
408 def chainbase(self, rev):
409 base = self._chainbasecache.get(rev)
409 base = self._chainbasecache.get(rev)
410 if base is not None:
410 if base is not None:
411 return base
411 return base
412
412
413 index = self.index
413 index = self.index
414 base = index[rev][3]
414 base = index[rev][3]
415 while base != rev:
415 while base != rev:
416 rev = base
416 rev = base
417 base = index[rev][3]
417 base = index[rev][3]
418
418
419 self._chainbasecache[rev] = base
419 self._chainbasecache[rev] = base
420 return base
420 return base
421
421
422 def linkrev(self, rev):
422 def linkrev(self, rev):
423 return self.index[rev][4]
423 return self.index[rev][4]
424
424
425 def parentrevs(self, rev):
425 def parentrevs(self, rev):
426 return self.index[rev][5:7]
426 return self.index[rev][5:7]
427
427
428 def node(self, rev):
428 def node(self, rev):
429 return self.index[rev][7]
429 return self.index[rev][7]
430
430
431 # Derived from index values.
431 # Derived from index values.
432
432
433 def end(self, rev):
433 def end(self, rev):
434 return self.start(rev) + self.length(rev)
434 return self.start(rev) + self.length(rev)
435
435
436 def parents(self, node):
436 def parents(self, node):
437 i = self.index
437 i = self.index
438 d = i[self.rev(node)]
438 d = i[self.rev(node)]
439 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
439 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
440
440
441 def chainlen(self, rev):
441 def chainlen(self, rev):
442 return self._chaininfo(rev)[0]
442 return self._chaininfo(rev)[0]
443
443
444 def _chaininfo(self, rev):
444 def _chaininfo(self, rev):
445 chaininfocache = self._chaininfocache
445 chaininfocache = self._chaininfocache
446 if rev in chaininfocache:
446 if rev in chaininfocache:
447 return chaininfocache[rev]
447 return chaininfocache[rev]
448 index = self.index
448 index = self.index
449 generaldelta = self._generaldelta
449 generaldelta = self._generaldelta
450 iterrev = rev
450 iterrev = rev
451 e = index[iterrev]
451 e = index[iterrev]
452 clen = 0
452 clen = 0
453 compresseddeltalen = 0
453 compresseddeltalen = 0
454 while iterrev != e[3]:
454 while iterrev != e[3]:
455 clen += 1
455 clen += 1
456 compresseddeltalen += e[1]
456 compresseddeltalen += e[1]
457 if generaldelta:
457 if generaldelta:
458 iterrev = e[3]
458 iterrev = e[3]
459 else:
459 else:
460 iterrev -= 1
460 iterrev -= 1
461 if iterrev in chaininfocache:
461 if iterrev in chaininfocache:
462 t = chaininfocache[iterrev]
462 t = chaininfocache[iterrev]
463 clen += t[0]
463 clen += t[0]
464 compresseddeltalen += t[1]
464 compresseddeltalen += t[1]
465 break
465 break
466 e = index[iterrev]
466 e = index[iterrev]
467 else:
467 else:
468 # Add text length of base since decompressing that also takes
468 # Add text length of base since decompressing that also takes
469 # work. For cache hits the length is already included.
469 # work. For cache hits the length is already included.
470 compresseddeltalen += e[1]
470 compresseddeltalen += e[1]
471 r = (clen, compresseddeltalen)
471 r = (clen, compresseddeltalen)
472 chaininfocache[rev] = r
472 chaininfocache[rev] = r
473 return r
473 return r
474
474
475 def _deltachain(self, rev, stoprev=None):
475 def _deltachain(self, rev, stoprev=None):
476 """Obtain the delta chain for a revision.
476 """Obtain the delta chain for a revision.
477
477
478 ``stoprev`` specifies a revision to stop at. If not specified, we
478 ``stoprev`` specifies a revision to stop at. If not specified, we
479 stop at the base of the chain.
479 stop at the base of the chain.
480
480
481 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
481 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
482 revs in ascending order and ``stopped`` is a bool indicating whether
482 revs in ascending order and ``stopped`` is a bool indicating whether
483 ``stoprev`` was hit.
483 ``stoprev`` was hit.
484 """
484 """
485 chain = []
485 chain = []
486
486
487 # Alias to prevent attribute lookup in tight loop.
487 # Alias to prevent attribute lookup in tight loop.
488 index = self.index
488 index = self.index
489 generaldelta = self._generaldelta
489 generaldelta = self._generaldelta
490
490
491 iterrev = rev
491 iterrev = rev
492 e = index[iterrev]
492 e = index[iterrev]
493 while iterrev != e[3] and iterrev != stoprev:
493 while iterrev != e[3] and iterrev != stoprev:
494 chain.append(iterrev)
494 chain.append(iterrev)
495 if generaldelta:
495 if generaldelta:
496 iterrev = e[3]
496 iterrev = e[3]
497 else:
497 else:
498 iterrev -= 1
498 iterrev -= 1
499 e = index[iterrev]
499 e = index[iterrev]
500
500
501 if iterrev == stoprev:
501 if iterrev == stoprev:
502 stopped = True
502 stopped = True
503 else:
503 else:
504 chain.append(iterrev)
504 chain.append(iterrev)
505 stopped = False
505 stopped = False
506
506
507 chain.reverse()
507 chain.reverse()
508 return chain, stopped
508 return chain, stopped
509
509
510 def ancestors(self, revs, stoprev=0, inclusive=False):
510 def ancestors(self, revs, stoprev=0, inclusive=False):
511 """Generate the ancestors of 'revs' in reverse topological order.
511 """Generate the ancestors of 'revs' in reverse topological order.
512 Does not generate revs lower than stoprev.
512 Does not generate revs lower than stoprev.
513
513
514 See the documentation for ancestor.lazyancestors for more details."""
514 See the documentation for ancestor.lazyancestors for more details."""
515
515
516 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
516 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
517 inclusive=inclusive)
517 inclusive=inclusive)
518
518
519 def descendants(self, revs):
519 def descendants(self, revs):
520 """Generate the descendants of 'revs' in revision order.
520 """Generate the descendants of 'revs' in revision order.
521
521
522 Yield a sequence of revision numbers starting with a child of
522 Yield a sequence of revision numbers starting with a child of
523 some rev in revs, i.e., each revision is *not* considered a
523 some rev in revs, i.e., each revision is *not* considered a
524 descendant of itself. Results are ordered by revision number (a
524 descendant of itself. Results are ordered by revision number (a
525 topological sort)."""
525 topological sort)."""
526 first = min(revs)
526 first = min(revs)
527 if first == nullrev:
527 if first == nullrev:
528 for i in self:
528 for i in self:
529 yield i
529 yield i
530 return
530 return
531
531
532 seen = set(revs)
532 seen = set(revs)
533 for i in self.revs(start=first + 1):
533 for i in self.revs(start=first + 1):
534 for x in self.parentrevs(i):
534 for x in self.parentrevs(i):
535 if x != nullrev and x in seen:
535 if x != nullrev and x in seen:
536 seen.add(i)
536 seen.add(i)
537 yield i
537 yield i
538 break
538 break
539
539
540 def findcommonmissing(self, common=None, heads=None):
540 def findcommonmissing(self, common=None, heads=None):
541 """Return a tuple of the ancestors of common and the ancestors of heads
541 """Return a tuple of the ancestors of common and the ancestors of heads
542 that are not ancestors of common. In revset terminology, we return the
542 that are not ancestors of common. In revset terminology, we return the
543 tuple:
543 tuple:
544
544
545 ::common, (::heads) - (::common)
545 ::common, (::heads) - (::common)
546
546
547 The list is sorted by revision number, meaning it is
547 The list is sorted by revision number, meaning it is
548 topologically sorted.
548 topologically sorted.
549
549
550 'heads' and 'common' are both lists of node IDs. If heads is
550 'heads' and 'common' are both lists of node IDs. If heads is
551 not supplied, uses all of the revlog's heads. If common is not
551 not supplied, uses all of the revlog's heads. If common is not
552 supplied, uses nullid."""
552 supplied, uses nullid."""
553 if common is None:
553 if common is None:
554 common = [nullid]
554 common = [nullid]
555 if heads is None:
555 if heads is None:
556 heads = self.heads()
556 heads = self.heads()
557
557
558 common = [self.rev(n) for n in common]
558 common = [self.rev(n) for n in common]
559 heads = [self.rev(n) for n in heads]
559 heads = [self.rev(n) for n in heads]
560
560
561 # we want the ancestors, but inclusive
561 # we want the ancestors, but inclusive
562 class lazyset(object):
562 class lazyset(object):
563 def __init__(self, lazyvalues):
563 def __init__(self, lazyvalues):
564 self.addedvalues = set()
564 self.addedvalues = set()
565 self.lazyvalues = lazyvalues
565 self.lazyvalues = lazyvalues
566
566
567 def __contains__(self, value):
567 def __contains__(self, value):
568 return value in self.addedvalues or value in self.lazyvalues
568 return value in self.addedvalues or value in self.lazyvalues
569
569
570 def __iter__(self):
570 def __iter__(self):
571 added = self.addedvalues
571 added = self.addedvalues
572 for r in added:
572 for r in added:
573 yield r
573 yield r
574 for r in self.lazyvalues:
574 for r in self.lazyvalues:
575 if not r in added:
575 if not r in added:
576 yield r
576 yield r
577
577
578 def add(self, value):
578 def add(self, value):
579 self.addedvalues.add(value)
579 self.addedvalues.add(value)
580
580
581 def update(self, values):
581 def update(self, values):
582 self.addedvalues.update(values)
582 self.addedvalues.update(values)
583
583
584 has = lazyset(self.ancestors(common))
584 has = lazyset(self.ancestors(common))
585 has.add(nullrev)
585 has.add(nullrev)
586 has.update(common)
586 has.update(common)
587
587
588 # take all ancestors from heads that aren't in has
588 # take all ancestors from heads that aren't in has
589 missing = set()
589 missing = set()
590 visit = collections.deque(r for r in heads if r not in has)
590 visit = collections.deque(r for r in heads if r not in has)
591 while visit:
591 while visit:
592 r = visit.popleft()
592 r = visit.popleft()
593 if r in missing:
593 if r in missing:
594 continue
594 continue
595 else:
595 else:
596 missing.add(r)
596 missing.add(r)
597 for p in self.parentrevs(r):
597 for p in self.parentrevs(r):
598 if p not in has:
598 if p not in has:
599 visit.append(p)
599 visit.append(p)
600 missing = list(missing)
600 missing = list(missing)
601 missing.sort()
601 missing.sort()
602 return has, [self.node(miss) for miss in missing]
602 return has, [self.node(miss) for miss in missing]
603
603
604 def incrementalmissingrevs(self, common=None):
604 def incrementalmissingrevs(self, common=None):
605 """Return an object that can be used to incrementally compute the
605 """Return an object that can be used to incrementally compute the
606 revision numbers of the ancestors of arbitrary sets that are not
606 revision numbers of the ancestors of arbitrary sets that are not
607 ancestors of common. This is an ancestor.incrementalmissingancestors
607 ancestors of common. This is an ancestor.incrementalmissingancestors
608 object.
608 object.
609
609
610 'common' is a list of revision numbers. If common is not supplied, uses
610 'common' is a list of revision numbers. If common is not supplied, uses
611 nullrev.
611 nullrev.
612 """
612 """
613 if common is None:
613 if common is None:
614 common = [nullrev]
614 common = [nullrev]
615
615
616 return ancestor.incrementalmissingancestors(self.parentrevs, common)
616 return ancestor.incrementalmissingancestors(self.parentrevs, common)
617
617
618 def findmissingrevs(self, common=None, heads=None):
618 def findmissingrevs(self, common=None, heads=None):
619 """Return the revision numbers of the ancestors of heads that
619 """Return the revision numbers of the ancestors of heads that
620 are not ancestors of common.
620 are not ancestors of common.
621
621
622 More specifically, return a list of revision numbers corresponding to
622 More specifically, return a list of revision numbers corresponding to
623 nodes N such that every N satisfies the following constraints:
623 nodes N such that every N satisfies the following constraints:
624
624
625 1. N is an ancestor of some node in 'heads'
625 1. N is an ancestor of some node in 'heads'
626 2. N is not an ancestor of any node in 'common'
626 2. N is not an ancestor of any node in 'common'
627
627
628 The list is sorted by revision number, meaning it is
628 The list is sorted by revision number, meaning it is
629 topologically sorted.
629 topologically sorted.
630
630
631 'heads' and 'common' are both lists of revision numbers. If heads is
631 'heads' and 'common' are both lists of revision numbers. If heads is
632 not supplied, uses all of the revlog's heads. If common is not
632 not supplied, uses all of the revlog's heads. If common is not
633 supplied, uses nullid."""
633 supplied, uses nullid."""
634 if common is None:
634 if common is None:
635 common = [nullrev]
635 common = [nullrev]
636 if heads is None:
636 if heads is None:
637 heads = self.headrevs()
637 heads = self.headrevs()
638
638
639 inc = self.incrementalmissingrevs(common=common)
639 inc = self.incrementalmissingrevs(common=common)
640 return inc.missingancestors(heads)
640 return inc.missingancestors(heads)
641
641
642 def findmissing(self, common=None, heads=None):
642 def findmissing(self, common=None, heads=None):
643 """Return the ancestors of heads that are not ancestors of common.
643 """Return the ancestors of heads that are not ancestors of common.
644
644
645 More specifically, return a list of nodes N such that every N
645 More specifically, return a list of nodes N such that every N
646 satisfies the following constraints:
646 satisfies the following constraints:
647
647
648 1. N is an ancestor of some node in 'heads'
648 1. N is an ancestor of some node in 'heads'
649 2. N is not an ancestor of any node in 'common'
649 2. N is not an ancestor of any node in 'common'
650
650
651 The list is sorted by revision number, meaning it is
651 The list is sorted by revision number, meaning it is
652 topologically sorted.
652 topologically sorted.
653
653
654 'heads' and 'common' are both lists of node IDs. If heads is
654 'heads' and 'common' are both lists of node IDs. If heads is
655 not supplied, uses all of the revlog's heads. If common is not
655 not supplied, uses all of the revlog's heads. If common is not
656 supplied, uses nullid."""
656 supplied, uses nullid."""
657 if common is None:
657 if common is None:
658 common = [nullid]
658 common = [nullid]
659 if heads is None:
659 if heads is None:
660 heads = self.heads()
660 heads = self.heads()
661
661
662 common = [self.rev(n) for n in common]
662 common = [self.rev(n) for n in common]
663 heads = [self.rev(n) for n in heads]
663 heads = [self.rev(n) for n in heads]
664
664
665 inc = self.incrementalmissingrevs(common=common)
665 inc = self.incrementalmissingrevs(common=common)
666 return [self.node(r) for r in inc.missingancestors(heads)]
666 return [self.node(r) for r in inc.missingancestors(heads)]
667
667
668 def nodesbetween(self, roots=None, heads=None):
668 def nodesbetween(self, roots=None, heads=None):
669 """Return a topological path from 'roots' to 'heads'.
669 """Return a topological path from 'roots' to 'heads'.
670
670
671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
672 topologically sorted list of all nodes N that satisfy both of
672 topologically sorted list of all nodes N that satisfy both of
673 these constraints:
673 these constraints:
674
674
675 1. N is a descendant of some node in 'roots'
675 1. N is a descendant of some node in 'roots'
676 2. N is an ancestor of some node in 'heads'
676 2. N is an ancestor of some node in 'heads'
677
677
678 Every node is considered to be both a descendant and an ancestor
678 Every node is considered to be both a descendant and an ancestor
679 of itself, so every reachable node in 'roots' and 'heads' will be
679 of itself, so every reachable node in 'roots' and 'heads' will be
680 included in 'nodes'.
680 included in 'nodes'.
681
681
682 'outroots' is the list of reachable nodes in 'roots', i.e., the
682 'outroots' is the list of reachable nodes in 'roots', i.e., the
683 subset of 'roots' that is returned in 'nodes'. Likewise,
683 subset of 'roots' that is returned in 'nodes'. Likewise,
684 'outheads' is the subset of 'heads' that is also in 'nodes'.
684 'outheads' is the subset of 'heads' that is also in 'nodes'.
685
685
686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
687 unspecified, uses nullid as the only root. If 'heads' is
687 unspecified, uses nullid as the only root. If 'heads' is
688 unspecified, uses list of all of the revlog's heads."""
688 unspecified, uses list of all of the revlog's heads."""
689 nonodes = ([], [], [])
689 nonodes = ([], [], [])
690 if roots is not None:
690 if roots is not None:
691 roots = list(roots)
691 roots = list(roots)
692 if not roots:
692 if not roots:
693 return nonodes
693 return nonodes
694 lowestrev = min([self.rev(n) for n in roots])
694 lowestrev = min([self.rev(n) for n in roots])
695 else:
695 else:
696 roots = [nullid] # Everybody's a descendant of nullid
696 roots = [nullid] # Everybody's a descendant of nullid
697 lowestrev = nullrev
697 lowestrev = nullrev
698 if (lowestrev == nullrev) and (heads is None):
698 if (lowestrev == nullrev) and (heads is None):
699 # We want _all_ the nodes!
699 # We want _all_ the nodes!
700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
701 if heads is None:
701 if heads is None:
702 # All nodes are ancestors, so the latest ancestor is the last
702 # All nodes are ancestors, so the latest ancestor is the last
703 # node.
703 # node.
704 highestrev = len(self) - 1
704 highestrev = len(self) - 1
705 # Set ancestors to None to signal that every node is an ancestor.
705 # Set ancestors to None to signal that every node is an ancestor.
706 ancestors = None
706 ancestors = None
707 # Set heads to an empty dictionary for later discovery of heads
707 # Set heads to an empty dictionary for later discovery of heads
708 heads = {}
708 heads = {}
709 else:
709 else:
710 heads = list(heads)
710 heads = list(heads)
711 if not heads:
711 if not heads:
712 return nonodes
712 return nonodes
713 ancestors = set()
713 ancestors = set()
714 # Turn heads into a dictionary so we can remove 'fake' heads.
714 # Turn heads into a dictionary so we can remove 'fake' heads.
715 # Also, later we will be using it to filter out the heads we can't
715 # Also, later we will be using it to filter out the heads we can't
716 # find from roots.
716 # find from roots.
717 heads = dict.fromkeys(heads, False)
717 heads = dict.fromkeys(heads, False)
718 # Start at the top and keep marking parents until we're done.
718 # Start at the top and keep marking parents until we're done.
719 nodestotag = set(heads)
719 nodestotag = set(heads)
720 # Remember where the top was so we can use it as a limit later.
720 # Remember where the top was so we can use it as a limit later.
721 highestrev = max([self.rev(n) for n in nodestotag])
721 highestrev = max([self.rev(n) for n in nodestotag])
722 while nodestotag:
722 while nodestotag:
723 # grab a node to tag
723 # grab a node to tag
724 n = nodestotag.pop()
724 n = nodestotag.pop()
725 # Never tag nullid
725 # Never tag nullid
726 if n == nullid:
726 if n == nullid:
727 continue
727 continue
728 # A node's revision number represents its place in a
728 # A node's revision number represents its place in a
729 # topologically sorted list of nodes.
729 # topologically sorted list of nodes.
730 r = self.rev(n)
730 r = self.rev(n)
731 if r >= lowestrev:
731 if r >= lowestrev:
732 if n not in ancestors:
732 if n not in ancestors:
733 # If we are possibly a descendant of one of the roots
733 # If we are possibly a descendant of one of the roots
734 # and we haven't already been marked as an ancestor
734 # and we haven't already been marked as an ancestor
735 ancestors.add(n) # Mark as ancestor
735 ancestors.add(n) # Mark as ancestor
736 # Add non-nullid parents to list of nodes to tag.
736 # Add non-nullid parents to list of nodes to tag.
737 nodestotag.update([p for p in self.parents(n) if
737 nodestotag.update([p for p in self.parents(n) if
738 p != nullid])
738 p != nullid])
739 elif n in heads: # We've seen it before, is it a fake head?
739 elif n in heads: # We've seen it before, is it a fake head?
740 # So it is, real heads should not be the ancestors of
740 # So it is, real heads should not be the ancestors of
741 # any other heads.
741 # any other heads.
742 heads.pop(n)
742 heads.pop(n)
743 if not ancestors:
743 if not ancestors:
744 return nonodes
744 return nonodes
745 # Now that we have our set of ancestors, we want to remove any
745 # Now that we have our set of ancestors, we want to remove any
746 # roots that are not ancestors.
746 # roots that are not ancestors.
747
747
748 # If one of the roots was nullid, everything is included anyway.
748 # If one of the roots was nullid, everything is included anyway.
749 if lowestrev > nullrev:
749 if lowestrev > nullrev:
750 # But, since we weren't, let's recompute the lowest rev to not
750 # But, since we weren't, let's recompute the lowest rev to not
751 # include roots that aren't ancestors.
751 # include roots that aren't ancestors.
752
752
753 # Filter out roots that aren't ancestors of heads
753 # Filter out roots that aren't ancestors of heads
754 roots = [root for root in roots if root in ancestors]
754 roots = [root for root in roots if root in ancestors]
755 # Recompute the lowest revision
755 # Recompute the lowest revision
756 if roots:
756 if roots:
757 lowestrev = min([self.rev(root) for root in roots])
757 lowestrev = min([self.rev(root) for root in roots])
758 else:
758 else:
759 # No more roots? Return empty list
759 # No more roots? Return empty list
760 return nonodes
760 return nonodes
761 else:
761 else:
762 # We are descending from nullid, and don't need to care about
762 # We are descending from nullid, and don't need to care about
763 # any other roots.
763 # any other roots.
764 lowestrev = nullrev
764 lowestrev = nullrev
765 roots = [nullid]
765 roots = [nullid]
766 # Transform our roots list into a set.
766 # Transform our roots list into a set.
767 descendants = set(roots)
767 descendants = set(roots)
768 # Also, keep the original roots so we can filter out roots that aren't
768 # Also, keep the original roots so we can filter out roots that aren't
769 # 'real' roots (i.e. are descended from other roots).
769 # 'real' roots (i.e. are descended from other roots).
770 roots = descendants.copy()
770 roots = descendants.copy()
771 # Our topologically sorted list of output nodes.
771 # Our topologically sorted list of output nodes.
772 orderedout = []
772 orderedout = []
773 # Don't start at nullid since we don't want nullid in our output list,
773 # Don't start at nullid since we don't want nullid in our output list,
774 # and if nullid shows up in descendants, empty parents will look like
774 # and if nullid shows up in descendants, empty parents will look like
775 # they're descendants.
775 # they're descendants.
776 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
776 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
777 n = self.node(r)
777 n = self.node(r)
778 isdescendant = False
778 isdescendant = False
779 if lowestrev == nullrev: # Everybody is a descendant of nullid
779 if lowestrev == nullrev: # Everybody is a descendant of nullid
780 isdescendant = True
780 isdescendant = True
781 elif n in descendants:
781 elif n in descendants:
782 # n is already a descendant
782 # n is already a descendant
783 isdescendant = True
783 isdescendant = True
784 # This check only needs to be done here because all the roots
784 # This check only needs to be done here because all the roots
785 # will start being marked is descendants before the loop.
785 # will start being marked is descendants before the loop.
786 if n in roots:
786 if n in roots:
787 # If n was a root, check if it's a 'real' root.
787 # If n was a root, check if it's a 'real' root.
788 p = tuple(self.parents(n))
788 p = tuple(self.parents(n))
789 # If any of its parents are descendants, it's not a root.
789 # If any of its parents are descendants, it's not a root.
790 if (p[0] in descendants) or (p[1] in descendants):
790 if (p[0] in descendants) or (p[1] in descendants):
791 roots.remove(n)
791 roots.remove(n)
792 else:
792 else:
793 p = tuple(self.parents(n))
793 p = tuple(self.parents(n))
794 # A node is a descendant if either of its parents are
794 # A node is a descendant if either of its parents are
795 # descendants. (We seeded the dependents list with the roots
795 # descendants. (We seeded the dependents list with the roots
796 # up there, remember?)
796 # up there, remember?)
797 if (p[0] in descendants) or (p[1] in descendants):
797 if (p[0] in descendants) or (p[1] in descendants):
798 descendants.add(n)
798 descendants.add(n)
799 isdescendant = True
799 isdescendant = True
800 if isdescendant and ((ancestors is None) or (n in ancestors)):
800 if isdescendant and ((ancestors is None) or (n in ancestors)):
801 # Only include nodes that are both descendants and ancestors.
801 # Only include nodes that are both descendants and ancestors.
802 orderedout.append(n)
802 orderedout.append(n)
803 if (ancestors is not None) and (n in heads):
803 if (ancestors is not None) and (n in heads):
804 # We're trying to figure out which heads are reachable
804 # We're trying to figure out which heads are reachable
805 # from roots.
805 # from roots.
806 # Mark this head as having been reached
806 # Mark this head as having been reached
807 heads[n] = True
807 heads[n] = True
808 elif ancestors is None:
808 elif ancestors is None:
809 # Otherwise, we're trying to discover the heads.
809 # Otherwise, we're trying to discover the heads.
810 # Assume this is a head because if it isn't, the next step
810 # Assume this is a head because if it isn't, the next step
811 # will eventually remove it.
811 # will eventually remove it.
812 heads[n] = True
812 heads[n] = True
813 # But, obviously its parents aren't.
813 # But, obviously its parents aren't.
814 for p in self.parents(n):
814 for p in self.parents(n):
815 heads.pop(p, None)
815 heads.pop(p, None)
816 heads = [head for head, flag in heads.iteritems() if flag]
816 heads = [head for head, flag in heads.iteritems() if flag]
817 roots = list(roots)
817 roots = list(roots)
818 assert orderedout
818 assert orderedout
819 assert roots
819 assert roots
820 assert heads
820 assert heads
821 return (orderedout, roots, heads)
821 return (orderedout, roots, heads)
822
822
823 def headrevs(self):
823 def headrevs(self):
824 try:
824 try:
825 return self.index.headrevs()
825 return self.index.headrevs()
826 except AttributeError:
826 except AttributeError:
827 return self._headrevs()
827 return self._headrevs()
828
828
829 def computephases(self, roots):
829 def computephases(self, roots):
830 return self.index.computephasesmapsets(roots)
830 return self.index.computephasesmapsets(roots)
831
831
832 def _headrevs(self):
832 def _headrevs(self):
833 count = len(self)
833 count = len(self)
834 if not count:
834 if not count:
835 return [nullrev]
835 return [nullrev]
836 # we won't iter over filtered rev so nobody is a head at start
836 # we won't iter over filtered rev so nobody is a head at start
837 ishead = [0] * (count + 1)
837 ishead = [0] * (count + 1)
838 index = self.index
838 index = self.index
839 for r in self:
839 for r in self:
840 ishead[r] = 1 # I may be an head
840 ishead[r] = 1 # I may be an head
841 e = index[r]
841 e = index[r]
842 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
842 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
843 return [r for r, val in enumerate(ishead) if val]
843 return [r for r, val in enumerate(ishead) if val]
844
844
845 def heads(self, start=None, stop=None):
845 def heads(self, start=None, stop=None):
846 """return the list of all nodes that have no children
846 """return the list of all nodes that have no children
847
847
848 if start is specified, only heads that are descendants of
848 if start is specified, only heads that are descendants of
849 start will be returned
849 start will be returned
850 if stop is specified, it will consider all the revs from stop
850 if stop is specified, it will consider all the revs from stop
851 as if they had no children
851 as if they had no children
852 """
852 """
853 if start is None and stop is None:
853 if start is None and stop is None:
854 if not len(self):
854 if not len(self):
855 return [nullid]
855 return [nullid]
856 return [self.node(r) for r in self.headrevs()]
856 return [self.node(r) for r in self.headrevs()]
857
857
858 if start is None:
858 if start is None:
859 start = nullid
859 start = nullid
860 if stop is None:
860 if stop is None:
861 stop = []
861 stop = []
862 stoprevs = set([self.rev(n) for n in stop])
862 stoprevs = set([self.rev(n) for n in stop])
863 startrev = self.rev(start)
863 startrev = self.rev(start)
864 reachable = set((startrev,))
864 reachable = set((startrev,))
865 heads = set((startrev,))
865 heads = set((startrev,))
866
866
867 parentrevs = self.parentrevs
867 parentrevs = self.parentrevs
868 for r in self.revs(start=startrev + 1):
868 for r in self.revs(start=startrev + 1):
869 for p in parentrevs(r):
869 for p in parentrevs(r):
870 if p in reachable:
870 if p in reachable:
871 if r not in stoprevs:
871 if r not in stoprevs:
872 reachable.add(r)
872 reachable.add(r)
873 heads.add(r)
873 heads.add(r)
874 if p in heads and p not in stoprevs:
874 if p in heads and p not in stoprevs:
875 heads.remove(p)
875 heads.remove(p)
876
876
877 return [self.node(r) for r in heads]
877 return [self.node(r) for r in heads]
878
878
879 def children(self, node):
879 def children(self, node):
880 """find the children of a given node"""
880 """find the children of a given node"""
881 c = []
881 c = []
882 p = self.rev(node)
882 p = self.rev(node)
883 for r in self.revs(start=p + 1):
883 for r in self.revs(start=p + 1):
884 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
884 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
885 if prevs:
885 if prevs:
886 for pr in prevs:
886 for pr in prevs:
887 if pr == p:
887 if pr == p:
888 c.append(self.node(r))
888 c.append(self.node(r))
889 elif p == nullrev:
889 elif p == nullrev:
890 c.append(self.node(r))
890 c.append(self.node(r))
891 return c
891 return c
892
892
893 def descendant(self, start, end):
893 def descendant(self, start, end):
894 if start == nullrev:
894 if start == nullrev:
895 return True
895 return True
896 for i in self.descendants([start]):
896 for i in self.descendants([start]):
897 if i == end:
897 if i == end:
898 return True
898 return True
899 elif i > end:
899 elif i > end:
900 break
900 break
901 return False
901 return False
902
902
903 def commonancestorsheads(self, a, b):
903 def commonancestorsheads(self, a, b):
904 """calculate all the heads of the common ancestors of nodes a and b"""
904 """calculate all the heads of the common ancestors of nodes a and b"""
905 a, b = self.rev(a), self.rev(b)
905 a, b = self.rev(a), self.rev(b)
906 try:
906 try:
907 ancs = self.index.commonancestorsheads(a, b)
907 ancs = self.index.commonancestorsheads(a, b)
908 except (AttributeError, OverflowError): # C implementation failed
908 except (AttributeError, OverflowError): # C implementation failed
909 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
909 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
910 return map(self.node, ancs)
910 return map(self.node, ancs)
911
911
912 def isancestor(self, a, b):
912 def isancestor(self, a, b):
913 """return True if node a is an ancestor of node b
913 """return True if node a is an ancestor of node b
914
914
915 The implementation of this is trivial but the use of
915 The implementation of this is trivial but the use of
916 commonancestorsheads is not."""
916 commonancestorsheads is not."""
917 return a in self.commonancestorsheads(a, b)
917 return a in self.commonancestorsheads(a, b)
918
918
919 def ancestor(self, a, b):
919 def ancestor(self, a, b):
920 """calculate the "best" common ancestor of nodes a and b"""
920 """calculate the "best" common ancestor of nodes a and b"""
921
921
922 a, b = self.rev(a), self.rev(b)
922 a, b = self.rev(a), self.rev(b)
923 try:
923 try:
924 ancs = self.index.ancestors(a, b)
924 ancs = self.index.ancestors(a, b)
925 except (AttributeError, OverflowError):
925 except (AttributeError, OverflowError):
926 ancs = ancestor.ancestors(self.parentrevs, a, b)
926 ancs = ancestor.ancestors(self.parentrevs, a, b)
927 if ancs:
927 if ancs:
928 # choose a consistent winner when there's a tie
928 # choose a consistent winner when there's a tie
929 return min(map(self.node, ancs))
929 return min(map(self.node, ancs))
930 return nullid
930 return nullid
931
931
932 def _match(self, id):
932 def _match(self, id):
933 if isinstance(id, int):
933 if isinstance(id, int):
934 # rev
934 # rev
935 return self.node(id)
935 return self.node(id)
936 if len(id) == 20:
936 if len(id) == 20:
937 # possibly a binary node
937 # possibly a binary node
938 # odds of a binary node being all hex in ASCII are 1 in 10**25
938 # odds of a binary node being all hex in ASCII are 1 in 10**25
939 try:
939 try:
940 node = id
940 node = id
941 self.rev(node) # quick search the index
941 self.rev(node) # quick search the index
942 return node
942 return node
943 except LookupError:
943 except LookupError:
944 pass # may be partial hex id
944 pass # may be partial hex id
945 try:
945 try:
946 # str(rev)
946 # str(rev)
947 rev = int(id)
947 rev = int(id)
948 if str(rev) != id:
948 if str(rev) != id:
949 raise ValueError
949 raise ValueError
950 if rev < 0:
950 if rev < 0:
951 rev = len(self) + rev
951 rev = len(self) + rev
952 if rev < 0 or rev >= len(self):
952 if rev < 0 or rev >= len(self):
953 raise ValueError
953 raise ValueError
954 return self.node(rev)
954 return self.node(rev)
955 except (ValueError, OverflowError):
955 except (ValueError, OverflowError):
956 pass
956 pass
957 if len(id) == 40:
957 if len(id) == 40:
958 try:
958 try:
959 # a full hex nodeid?
959 # a full hex nodeid?
960 node = bin(id)
960 node = bin(id)
961 self.rev(node)
961 self.rev(node)
962 return node
962 return node
963 except (TypeError, LookupError):
963 except (TypeError, LookupError):
964 pass
964 pass
965
965
966 def _partialmatch(self, id):
966 def _partialmatch(self, id):
967 try:
967 try:
968 partial = self.index.partialmatch(id)
968 partial = self.index.partialmatch(id)
969 if partial and self.hasnode(partial):
969 if partial and self.hasnode(partial):
970 return partial
970 return partial
971 return None
971 return None
972 except RevlogError:
972 except RevlogError:
973 # parsers.c radix tree lookup gave multiple matches
973 # parsers.c radix tree lookup gave multiple matches
974 # fast path: for unfiltered changelog, radix tree is accurate
974 # fast path: for unfiltered changelog, radix tree is accurate
975 if not getattr(self, 'filteredrevs', None):
975 if not getattr(self, 'filteredrevs', None):
976 raise LookupError(id, self.indexfile,
976 raise LookupError(id, self.indexfile,
977 _('ambiguous identifier'))
977 _('ambiguous identifier'))
978 # fall through to slow path that filters hidden revisions
978 # fall through to slow path that filters hidden revisions
979 except (AttributeError, ValueError):
979 except (AttributeError, ValueError):
980 # we are pure python, or key was too short to search radix tree
980 # we are pure python, or key was too short to search radix tree
981 pass
981 pass
982
982
983 if id in self._pcache:
983 if id in self._pcache:
984 return self._pcache[id]
984 return self._pcache[id]
985
985
986 if len(id) < 40:
986 if len(id) < 40:
987 try:
987 try:
988 # hex(node)[:...]
988 # hex(node)[:...]
989 l = len(id) // 2 # grab an even number of digits
989 l = len(id) // 2 # grab an even number of digits
990 prefix = bin(id[:l * 2])
990 prefix = bin(id[:l * 2])
991 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
991 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
992 nl = [n for n in nl if hex(n).startswith(id) and
992 nl = [n for n in nl if hex(n).startswith(id) and
993 self.hasnode(n)]
993 self.hasnode(n)]
994 if len(nl) > 0:
994 if len(nl) > 0:
995 if len(nl) == 1:
995 if len(nl) == 1:
996 self._pcache[id] = nl[0]
996 self._pcache[id] = nl[0]
997 return nl[0]
997 return nl[0]
998 raise LookupError(id, self.indexfile,
998 raise LookupError(id, self.indexfile,
999 _('ambiguous identifier'))
999 _('ambiguous identifier'))
1000 return None
1000 return None
1001 except TypeError:
1001 except TypeError:
1002 pass
1002 pass
1003
1003
1004 def lookup(self, id):
1004 def lookup(self, id):
1005 """locate a node based on:
1005 """locate a node based on:
1006 - revision number or str(revision number)
1006 - revision number or str(revision number)
1007 - nodeid or subset of hex nodeid
1007 - nodeid or subset of hex nodeid
1008 """
1008 """
1009 n = self._match(id)
1009 n = self._match(id)
1010 if n is not None:
1010 if n is not None:
1011 return n
1011 return n
1012 n = self._partialmatch(id)
1012 n = self._partialmatch(id)
1013 if n:
1013 if n:
1014 return n
1014 return n
1015
1015
1016 raise LookupError(id, self.indexfile, _('no match found'))
1016 raise LookupError(id, self.indexfile, _('no match found'))
1017
1017
1018 def cmp(self, node, text):
1018 def cmp(self, node, text):
1019 """compare text with a given file revision
1019 """compare text with a given file revision
1020
1020
1021 returns True if text is different than what is stored.
1021 returns True if text is different than what is stored.
1022 """
1022 """
1023 p1, p2 = self.parents(node)
1023 p1, p2 = self.parents(node)
1024 return hash(text, p1, p2) != node
1024 return hash(text, p1, p2) != node
1025
1025
1026 def _addchunk(self, offset, data):
1026 def _addchunk(self, offset, data):
1027 """Add a segment to the revlog cache.
1027 """Add a segment to the revlog cache.
1028
1028
1029 Accepts an absolute offset and the data that is at that location.
1029 Accepts an absolute offset and the data that is at that location.
1030 """
1030 """
1031 o, d = self._chunkcache
1031 o, d = self._chunkcache
1032 # try to add to existing cache
1032 # try to add to existing cache
1033 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1033 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1034 self._chunkcache = o, d + data
1034 self._chunkcache = o, d + data
1035 else:
1035 else:
1036 self._chunkcache = offset, data
1036 self._chunkcache = offset, data
1037
1037
1038 def _loadchunk(self, offset, length, df=None):
1038 def _loadchunk(self, offset, length, df=None):
1039 """Load a segment of raw data from the revlog.
1039 """Load a segment of raw data from the revlog.
1040
1040
1041 Accepts an absolute offset, length to read, and an optional existing
1041 Accepts an absolute offset, length to read, and an optional existing
1042 file handle to read from.
1042 file handle to read from.
1043
1043
1044 If an existing file handle is passed, it will be seeked and the
1044 If an existing file handle is passed, it will be seeked and the
1045 original seek position will NOT be restored.
1045 original seek position will NOT be restored.
1046
1046
1047 Returns a str or buffer of raw byte data.
1047 Returns a str or buffer of raw byte data.
1048 """
1048 """
1049 if df is not None:
1049 if df is not None:
1050 closehandle = False
1050 closehandle = False
1051 else:
1051 else:
1052 if self._inline:
1052 if self._inline:
1053 df = self.opener(self.indexfile)
1053 df = self.opener(self.indexfile)
1054 else:
1054 else:
1055 df = self.opener(self.datafile)
1055 df = self.opener(self.datafile)
1056 closehandle = True
1056 closehandle = True
1057
1057
1058 # Cache data both forward and backward around the requested
1058 # Cache data both forward and backward around the requested
1059 # data, in a fixed size window. This helps speed up operations
1059 # data, in a fixed size window. This helps speed up operations
1060 # involving reading the revlog backwards.
1060 # involving reading the revlog backwards.
1061 cachesize = self._chunkcachesize
1061 cachesize = self._chunkcachesize
1062 realoffset = offset & ~(cachesize - 1)
1062 realoffset = offset & ~(cachesize - 1)
1063 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1063 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1064 - realoffset)
1064 - realoffset)
1065 df.seek(realoffset)
1065 df.seek(realoffset)
1066 d = df.read(reallength)
1066 d = df.read(reallength)
1067 if closehandle:
1067 if closehandle:
1068 df.close()
1068 df.close()
1069 self._addchunk(realoffset, d)
1069 self._addchunk(realoffset, d)
1070 if offset != realoffset or reallength != length:
1070 if offset != realoffset or reallength != length:
1071 return util.buffer(d, offset - realoffset, length)
1071 return util.buffer(d, offset - realoffset, length)
1072 return d
1072 return d
1073
1073
1074 def _getchunk(self, offset, length, df=None):
1074 def _getchunk(self, offset, length, df=None):
1075 """Obtain a segment of raw data from the revlog.
1075 """Obtain a segment of raw data from the revlog.
1076
1076
1077 Accepts an absolute offset, length of bytes to obtain, and an
1077 Accepts an absolute offset, length of bytes to obtain, and an
1078 optional file handle to the already-opened revlog. If the file
1078 optional file handle to the already-opened revlog. If the file
1079 handle is used, it's original seek position will not be preserved.
1079 handle is used, it's original seek position will not be preserved.
1080
1080
1081 Requests for data may be returned from a cache.
1081 Requests for data may be returned from a cache.
1082
1082
1083 Returns a str or a buffer instance of raw byte data.
1083 Returns a str or a buffer instance of raw byte data.
1084 """
1084 """
1085 o, d = self._chunkcache
1085 o, d = self._chunkcache
1086 l = len(d)
1086 l = len(d)
1087
1087
1088 # is it in the cache?
1088 # is it in the cache?
1089 cachestart = offset - o
1089 cachestart = offset - o
1090 cacheend = cachestart + length
1090 cacheend = cachestart + length
1091 if cachestart >= 0 and cacheend <= l:
1091 if cachestart >= 0 and cacheend <= l:
1092 if cachestart == 0 and cacheend == l:
1092 if cachestart == 0 and cacheend == l:
1093 return d # avoid a copy
1093 return d # avoid a copy
1094 return util.buffer(d, cachestart, cacheend - cachestart)
1094 return util.buffer(d, cachestart, cacheend - cachestart)
1095
1095
1096 return self._loadchunk(offset, length, df=df)
1096 return self._loadchunk(offset, length, df=df)
1097
1097
1098 def _chunkraw(self, startrev, endrev, df=None):
1098 def _chunkraw(self, startrev, endrev, df=None):
1099 """Obtain a segment of raw data corresponding to a range of revisions.
1099 """Obtain a segment of raw data corresponding to a range of revisions.
1100
1100
1101 Accepts the start and end revisions and an optional already-open
1101 Accepts the start and end revisions and an optional already-open
1102 file handle to be used for reading. If the file handle is read, its
1102 file handle to be used for reading. If the file handle is read, its
1103 seek position will not be preserved.
1103 seek position will not be preserved.
1104
1104
1105 Requests for data may be satisfied by a cache.
1105 Requests for data may be satisfied by a cache.
1106
1106
1107 Returns a 2-tuple of (offset, data) for the requested range of
1107 Returns a 2-tuple of (offset, data) for the requested range of
1108 revisions. Offset is the integer offset from the beginning of the
1108 revisions. Offset is the integer offset from the beginning of the
1109 revlog and data is a str or buffer of the raw byte data.
1109 revlog and data is a str or buffer of the raw byte data.
1110
1110
1111 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1111 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1112 to determine where each revision's data begins and ends.
1112 to determine where each revision's data begins and ends.
1113 """
1113 """
1114 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1114 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1115 # (functions are expensive).
1115 # (functions are expensive).
1116 index = self.index
1116 index = self.index
1117 istart = index[startrev]
1117 istart = index[startrev]
1118 start = int(istart[0] >> 16)
1118 start = int(istart[0] >> 16)
1119 if startrev == endrev:
1119 if startrev == endrev:
1120 end = start + istart[1]
1120 end = start + istart[1]
1121 else:
1121 else:
1122 iend = index[endrev]
1122 iend = index[endrev]
1123 end = int(iend[0] >> 16) + iend[1]
1123 end = int(iend[0] >> 16) + iend[1]
1124
1124
1125 if self._inline:
1125 if self._inline:
1126 start += (startrev + 1) * self._io.size
1126 start += (startrev + 1) * self._io.size
1127 end += (endrev + 1) * self._io.size
1127 end += (endrev + 1) * self._io.size
1128 length = end - start
1128 length = end - start
1129
1129
1130 return start, self._getchunk(start, length, df=df)
1130 return start, self._getchunk(start, length, df=df)
1131
1131
1132 def _chunk(self, rev, df=None):
1132 def _chunk(self, rev, df=None):
1133 """Obtain a single decompressed chunk for a revision.
1133 """Obtain a single decompressed chunk for a revision.
1134
1134
1135 Accepts an integer revision and an optional already-open file handle
1135 Accepts an integer revision and an optional already-open file handle
1136 to be used for reading. If used, the seek position of the file will not
1136 to be used for reading. If used, the seek position of the file will not
1137 be preserved.
1137 be preserved.
1138
1138
1139 Returns a str holding uncompressed data for the requested revision.
1139 Returns a str holding uncompressed data for the requested revision.
1140 """
1140 """
1141 return decompress(self._chunkraw(rev, rev, df=df)[1])
1141 return decompress(self._chunkraw(rev, rev, df=df)[1])
1142
1142
1143 def _chunks(self, revs, df=None):
1143 def _chunks(self, revs, df=None):
1144 """Obtain decompressed chunks for the specified revisions.
1144 """Obtain decompressed chunks for the specified revisions.
1145
1145
1146 Accepts an iterable of numeric revisions that are assumed to be in
1146 Accepts an iterable of numeric revisions that are assumed to be in
1147 ascending order. Also accepts an optional already-open file handle
1147 ascending order. Also accepts an optional already-open file handle
1148 to be used for reading. If used, the seek position of the file will
1148 to be used for reading. If used, the seek position of the file will
1149 not be preserved.
1149 not be preserved.
1150
1150
1151 This function is similar to calling ``self._chunk()`` multiple times,
1151 This function is similar to calling ``self._chunk()`` multiple times,
1152 but is faster.
1152 but is faster.
1153
1153
1154 Returns a list with decompressed data for each requested revision.
1154 Returns a list with decompressed data for each requested revision.
1155 """
1155 """
1156 if not revs:
1156 if not revs:
1157 return []
1157 return []
1158 start = self.start
1158 start = self.start
1159 length = self.length
1159 length = self.length
1160 inline = self._inline
1160 inline = self._inline
1161 iosize = self._io.size
1161 iosize = self._io.size
1162 buffer = util.buffer
1162 buffer = util.buffer
1163
1163
1164 l = []
1164 l = []
1165 ladd = l.append
1165 ladd = l.append
1166
1166
1167 try:
1167 try:
1168 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1168 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1169 except OverflowError:
1169 except OverflowError:
1170 # issue4215 - we can't cache a run of chunks greater than
1170 # issue4215 - we can't cache a run of chunks greater than
1171 # 2G on Windows
1171 # 2G on Windows
1172 return [self._chunk(rev, df=df) for rev in revs]
1172 return [self._chunk(rev, df=df) for rev in revs]
1173
1173
1174 for rev in revs:
1174 for rev in revs:
1175 chunkstart = start(rev)
1175 chunkstart = start(rev)
1176 if inline:
1176 if inline:
1177 chunkstart += (rev + 1) * iosize
1177 chunkstart += (rev + 1) * iosize
1178 chunklength = length(rev)
1178 chunklength = length(rev)
1179 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1179 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1180
1180
1181 return l
1181 return l
1182
1182
1183 def _chunkclear(self):
1183 def _chunkclear(self):
1184 """Clear the raw chunk cache."""
1184 """Clear the raw chunk cache."""
1185 self._chunkcache = (0, '')
1185 self._chunkcache = (0, '')
1186
1186
1187 def deltaparent(self, rev):
1187 def deltaparent(self, rev):
1188 """return deltaparent of the given revision"""
1188 """return deltaparent of the given revision"""
1189 base = self.index[rev][3]
1189 base = self.index[rev][3]
1190 if base == rev:
1190 if base == rev:
1191 return nullrev
1191 return nullrev
1192 elif self._generaldelta:
1192 elif self._generaldelta:
1193 return base
1193 return base
1194 else:
1194 else:
1195 return rev - 1
1195 return rev - 1
1196
1196
1197 def revdiff(self, rev1, rev2):
1197 def revdiff(self, rev1, rev2):
1198 """return or calculate a delta between two revisions"""
1198 """return or calculate a delta between two revisions"""
1199 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1199 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1200 return str(self._chunk(rev2))
1200 return str(self._chunk(rev2))
1201
1201
1202 return mdiff.textdiff(self.revision(rev1),
1202 return mdiff.textdiff(self.revision(rev1),
1203 self.revision(rev2))
1203 self.revision(rev2))
1204
1204
1205 def revision(self, nodeorrev, _df=None, raw=False):
1205 def revision(self, nodeorrev, _df=None, raw=False):
1206 """return an uncompressed revision of a given node or revision
1206 """return an uncompressed revision of a given node or revision
1207 number.
1207 number.
1208
1208
1209 _df - an existing file handle to read from. (internal-only)
1209 _df - an existing file handle to read from. (internal-only)
1210 raw - an optional argument specifying if the revision data is to be
1210 raw - an optional argument specifying if the revision data is to be
1211 treated as raw data when applying flag transforms. 'raw' should be set
1211 treated as raw data when applying flag transforms. 'raw' should be set
1212 to True when generating changegroups or in debug commands.
1212 to True when generating changegroups or in debug commands.
1213 """
1213 """
1214 if isinstance(nodeorrev, int):
1214 if isinstance(nodeorrev, int):
1215 rev = nodeorrev
1215 rev = nodeorrev
1216 node = self.node(rev)
1216 node = self.node(rev)
1217 else:
1217 else:
1218 node = nodeorrev
1218 node = nodeorrev
1219 rev = None
1219 rev = None
1220
1220
1221 cachedrev = None
1221 cachedrev = None
1222 if node == nullid:
1222 if node == nullid:
1223 return ""
1223 return ""
1224 if self._cache:
1224 if self._cache:
1225 if self._cache[0] == node:
1225 if self._cache[0] == node:
1226 return self._cache[2]
1226 return self._cache[2]
1227 cachedrev = self._cache[1]
1227 cachedrev = self._cache[1]
1228
1228
1229 # look up what we need to read
1229 # look up what we need to read
1230 text = None
1230 text = None
1231 if rev is None:
1231 if rev is None:
1232 rev = self.rev(node)
1232 rev = self.rev(node)
1233
1233
1234 # check rev flags
1234 # check rev flags
1235 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1235 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1236 raise RevlogError(_('incompatible revision flag %x') %
1236 raise RevlogError(_('incompatible revision flag %x') %
1237 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1237 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1238
1238
1239 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1239 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1240 if stopped:
1240 if stopped:
1241 text = self._cache[2]
1241 text = self._cache[2]
1242
1242
1243 # drop cache to save memory
1243 # drop cache to save memory
1244 self._cache = None
1244 self._cache = None
1245
1245
1246 bins = self._chunks(chain, df=_df)
1246 bins = self._chunks(chain, df=_df)
1247 if text is None:
1247 if text is None:
1248 text = str(bins[0])
1248 text = str(bins[0])
1249 bins = bins[1:]
1249 bins = bins[1:]
1250
1250
1251 text = mdiff.patches(text, bins)
1251 text = mdiff.patches(text, bins)
1252 self.checkhash(text, node, rev=rev)
1252 self.checkhash(text, node, rev=rev)
1253 self._cache = (node, rev, text)
1253 self._cache = (node, rev, text)
1254 return text
1254 return text
1255
1255
1256 def hash(self, text, p1, p2):
1256 def hash(self, text, p1, p2):
1257 """Compute a node hash.
1257 """Compute a node hash.
1258
1258
1259 Available as a function so that subclasses can replace the hash
1259 Available as a function so that subclasses can replace the hash
1260 as needed.
1260 as needed.
1261 """
1261 """
1262 return hash(text, p1, p2)
1262 return hash(text, p1, p2)
1263
1263
1264 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1264 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1265 """Check node hash integrity.
1265 """Check node hash integrity.
1266
1266
1267 Available as a function so that subclasses can extend hash mismatch
1267 Available as a function so that subclasses can extend hash mismatch
1268 behaviors as needed.
1268 behaviors as needed.
1269 """
1269 """
1270 if p1 is None and p2 is None:
1270 if p1 is None and p2 is None:
1271 p1, p2 = self.parents(node)
1271 p1, p2 = self.parents(node)
1272 if node != self.hash(text, p1, p2):
1272 if node != self.hash(text, p1, p2):
1273 revornode = rev
1273 revornode = rev
1274 if revornode is None:
1274 if revornode is None:
1275 revornode = templatefilters.short(hex(node))
1275 revornode = templatefilters.short(hex(node))
1276 raise RevlogError(_("integrity check failed on %s:%s")
1276 raise RevlogError(_("integrity check failed on %s:%s")
1277 % (self.indexfile, revornode))
1277 % (self.indexfile, revornode))
1278
1278
1279 def checkinlinesize(self, tr, fp=None):
1279 def checkinlinesize(self, tr, fp=None):
1280 """Check if the revlog is too big for inline and convert if so.
1280 """Check if the revlog is too big for inline and convert if so.
1281
1281
1282 This should be called after revisions are added to the revlog. If the
1282 This should be called after revisions are added to the revlog. If the
1283 revlog has grown too large to be an inline revlog, it will convert it
1283 revlog has grown too large to be an inline revlog, it will convert it
1284 to use multiple index and data files.
1284 to use multiple index and data files.
1285 """
1285 """
1286 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1286 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1287 return
1287 return
1288
1288
1289 trinfo = tr.find(self.indexfile)
1289 trinfo = tr.find(self.indexfile)
1290 if trinfo is None:
1290 if trinfo is None:
1291 raise RevlogError(_("%s not found in the transaction")
1291 raise RevlogError(_("%s not found in the transaction")
1292 % self.indexfile)
1292 % self.indexfile)
1293
1293
1294 trindex = trinfo[2]
1294 trindex = trinfo[2]
1295 if trindex is not None:
1295 if trindex is not None:
1296 dataoff = self.start(trindex)
1296 dataoff = self.start(trindex)
1297 else:
1297 else:
1298 # revlog was stripped at start of transaction, use all leftover data
1298 # revlog was stripped at start of transaction, use all leftover data
1299 trindex = len(self) - 1
1299 trindex = len(self) - 1
1300 dataoff = self.end(-2)
1300 dataoff = self.end(-2)
1301
1301
1302 tr.add(self.datafile, dataoff)
1302 tr.add(self.datafile, dataoff)
1303
1303
1304 if fp:
1304 if fp:
1305 fp.flush()
1305 fp.flush()
1306 fp.close()
1306 fp.close()
1307
1307
1308 df = self.opener(self.datafile, 'w')
1308 df = self.opener(self.datafile, 'w')
1309 try:
1309 try:
1310 for r in self:
1310 for r in self:
1311 df.write(self._chunkraw(r, r)[1])
1311 df.write(self._chunkraw(r, r)[1])
1312 finally:
1312 finally:
1313 df.close()
1313 df.close()
1314
1314
1315 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1315 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1316 checkambig=self._checkambig)
1316 checkambig=self._checkambig)
1317 self.version &= ~(REVLOGNGINLINEDATA)
1317 self.version &= ~(REVLOGNGINLINEDATA)
1318 self._inline = False
1318 self._inline = False
1319 for i in self:
1319 for i in self:
1320 e = self._io.packentry(self.index[i], self.node, self.version, i)
1320 e = self._io.packentry(self.index[i], self.node, self.version, i)
1321 fp.write(e)
1321 fp.write(e)
1322
1322
1323 # if we don't call close, the temp file will never replace the
1323 # if we don't call close, the temp file will never replace the
1324 # real index
1324 # real index
1325 fp.close()
1325 fp.close()
1326
1326
1327 tr.replace(self.indexfile, trindex * self._io.size)
1327 tr.replace(self.indexfile, trindex * self._io.size)
1328 self._chunkclear()
1328 self._chunkclear()
1329
1329
1330 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1330 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1331 node=None):
1331 node=None, flags=REVIDX_DEFAULT_FLAGS):
1332 """add a revision to the log
1332 """add a revision to the log
1333
1333
1334 text - the revision data to add
1334 text - the revision data to add
1335 transaction - the transaction object used for rollback
1335 transaction - the transaction object used for rollback
1336 link - the linkrev data to add
1336 link - the linkrev data to add
1337 p1, p2 - the parent nodeids of the revision
1337 p1, p2 - the parent nodeids of the revision
1338 cachedelta - an optional precomputed delta
1338 cachedelta - an optional precomputed delta
1339 node - nodeid of revision; typically node is not specified, and it is
1339 node - nodeid of revision; typically node is not specified, and it is
1340 computed by default as hash(text, p1, p2), however subclasses might
1340 computed by default as hash(text, p1, p2), however subclasses might
1341 use different hashing method (and override checkhash() in such case)
1341 use different hashing method (and override checkhash() in such case)
1342 flags - the known flags to set on the revision
1342 """
1343 """
1343 if link == nullrev:
1344 if link == nullrev:
1344 raise RevlogError(_("attempted to add linkrev -1 to %s")
1345 raise RevlogError(_("attempted to add linkrev -1 to %s")
1345 % self.indexfile)
1346 % self.indexfile)
1346
1347
1347 if len(text) > _maxentrysize:
1348 if len(text) > _maxentrysize:
1348 raise RevlogError(
1349 raise RevlogError(
1349 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1350 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1350 % (self.indexfile, len(text)))
1351 % (self.indexfile, len(text)))
1351
1352
1352 node = node or self.hash(text, p1, p2)
1353 node = node or self.hash(text, p1, p2)
1353 if node in self.nodemap:
1354 if node in self.nodemap:
1354 return node
1355 return node
1355
1356
1356 dfh = None
1357 dfh = None
1357 if not self._inline:
1358 if not self._inline:
1358 dfh = self.opener(self.datafile, "a+")
1359 dfh = self.opener(self.datafile, "a+")
1359 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1360 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1360 try:
1361 try:
1361 return self._addrevision(node, text, transaction, link, p1, p2,
1362 return self._addrevision(node, text, transaction, link, p1, p2,
1362 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1363 flags, cachedelta, ifh, dfh)
1363 finally:
1364 finally:
1364 if dfh:
1365 if dfh:
1365 dfh.close()
1366 dfh.close()
1366 ifh.close()
1367 ifh.close()
1367
1368
1368 def compress(self, text):
1369 def compress(self, text):
1369 """ generate a possibly-compressed representation of text """
1370 """ generate a possibly-compressed representation of text """
1370 if not text:
1371 if not text:
1371 return ("", text)
1372 return ("", text)
1372 l = len(text)
1373 l = len(text)
1373 bin = None
1374 bin = None
1374 if l < 44:
1375 if l < 44:
1375 pass
1376 pass
1376 elif l > 1000000:
1377 elif l > 1000000:
1377 # zlib makes an internal copy, thus doubling memory usage for
1378 # zlib makes an internal copy, thus doubling memory usage for
1378 # large files, so lets do this in pieces
1379 # large files, so lets do this in pieces
1379 z = zlib.compressobj()
1380 z = zlib.compressobj()
1380 p = []
1381 p = []
1381 pos = 0
1382 pos = 0
1382 while pos < l:
1383 while pos < l:
1383 pos2 = pos + 2**20
1384 pos2 = pos + 2**20
1384 p.append(z.compress(text[pos:pos2]))
1385 p.append(z.compress(text[pos:pos2]))
1385 pos = pos2
1386 pos = pos2
1386 p.append(z.flush())
1387 p.append(z.flush())
1387 if sum(map(len, p)) < l:
1388 if sum(map(len, p)) < l:
1388 bin = "".join(p)
1389 bin = "".join(p)
1389 else:
1390 else:
1390 bin = _compress(text)
1391 bin = _compress(text)
1391 if bin is None or len(bin) > l:
1392 if bin is None or len(bin) > l:
1392 if text[0] == '\0':
1393 if text[0] == '\0':
1393 return ("", text)
1394 return ("", text)
1394 return ('u', text)
1395 return ('u', text)
1395 return ("", bin)
1396 return ("", bin)
1396
1397
1397 def _isgooddelta(self, d, textlen):
1398 def _isgooddelta(self, d, textlen):
1398 """Returns True if the given delta is good. Good means that it is within
1399 """Returns True if the given delta is good. Good means that it is within
1399 the disk span, disk size, and chain length bounds that we know to be
1400 the disk span, disk size, and chain length bounds that we know to be
1400 performant."""
1401 performant."""
1401 if d is None:
1402 if d is None:
1402 return False
1403 return False
1403
1404
1404 # - 'dist' is the distance from the base revision -- bounding it limits
1405 # - 'dist' is the distance from the base revision -- bounding it limits
1405 # the amount of I/O we need to do.
1406 # the amount of I/O we need to do.
1406 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1407 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1407 # to apply -- bounding it limits the amount of CPU we consume.
1408 # to apply -- bounding it limits the amount of CPU we consume.
1408 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1409 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1409 if (dist > textlen * 4 or l > textlen or
1410 if (dist > textlen * 4 or l > textlen or
1410 compresseddeltalen > textlen * 2 or
1411 compresseddeltalen > textlen * 2 or
1411 (self._maxchainlen and chainlen > self._maxchainlen)):
1412 (self._maxchainlen and chainlen > self._maxchainlen)):
1412 return False
1413 return False
1413
1414
1414 return True
1415 return True
1415
1416
1416 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1417 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1417 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1418 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1418 """internal function to add revisions to the log
1419 """internal function to add revisions to the log
1419
1420
1420 see addrevision for argument descriptions.
1421 see addrevision for argument descriptions.
1421 invariants:
1422 invariants:
1422 - text is optional (can be None); if not set, cachedelta must be set.
1423 - text is optional (can be None); if not set, cachedelta must be set.
1423 if both are set, they must correspond to each other.
1424 if both are set, they must correspond to each other.
1424 - raw is optional; if set to True, it indicates the revision data is to
1425 - raw is optional; if set to True, it indicates the revision data is to
1425 be treated by _processflags() as raw. It is usually set by changegroup
1426 be treated by _processflags() as raw. It is usually set by changegroup
1426 generation and debug commands.
1427 generation and debug commands.
1427 """
1428 """
1428 btext = [text]
1429 btext = [text]
1429 def buildtext():
1430 def buildtext():
1430 if btext[0] is not None:
1431 if btext[0] is not None:
1431 return btext[0]
1432 return btext[0]
1432 baserev = cachedelta[0]
1433 baserev = cachedelta[0]
1433 delta = cachedelta[1]
1434 delta = cachedelta[1]
1434 # special case deltas which replace entire base; no need to decode
1435 # special case deltas which replace entire base; no need to decode
1435 # base revision. this neatly avoids censored bases, which throw when
1436 # base revision. this neatly avoids censored bases, which throw when
1436 # they're decoded.
1437 # they're decoded.
1437 hlen = struct.calcsize(">lll")
1438 hlen = struct.calcsize(">lll")
1438 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1439 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1439 len(delta) - hlen):
1440 len(delta) - hlen):
1440 btext[0] = delta[hlen:]
1441 btext[0] = delta[hlen:]
1441 else:
1442 else:
1442 if self._inline:
1443 if self._inline:
1443 fh = ifh
1444 fh = ifh
1444 else:
1445 else:
1445 fh = dfh
1446 fh = dfh
1446 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1447 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1447 btext[0] = mdiff.patch(basetext, delta)
1448 btext[0] = mdiff.patch(basetext, delta)
1448
1449
1449 try:
1450 try:
1450 self.checkhash(btext[0], node, p1=p1, p2=p2)
1451 self.checkhash(btext[0], node, p1=p1, p2=p2)
1451 if flags & REVIDX_ISCENSORED:
1452 if flags & REVIDX_ISCENSORED:
1452 raise RevlogError(_('node %s is not censored') % node)
1453 raise RevlogError(_('node %s is not censored') % node)
1453 except CensoredNodeError:
1454 except CensoredNodeError:
1454 # must pass the censored index flag to add censored revisions
1455 # must pass the censored index flag to add censored revisions
1455 if not flags & REVIDX_ISCENSORED:
1456 if not flags & REVIDX_ISCENSORED:
1456 raise
1457 raise
1457 return btext[0]
1458 return btext[0]
1458
1459
1459 def builddelta(rev):
1460 def builddelta(rev):
1460 # can we use the cached delta?
1461 # can we use the cached delta?
1461 if cachedelta and cachedelta[0] == rev:
1462 if cachedelta and cachedelta[0] == rev:
1462 delta = cachedelta[1]
1463 delta = cachedelta[1]
1463 else:
1464 else:
1464 t = buildtext()
1465 t = buildtext()
1465 if self.iscensored(rev):
1466 if self.iscensored(rev):
1466 # deltas based on a censored revision must replace the
1467 # deltas based on a censored revision must replace the
1467 # full content in one patch, so delta works everywhere
1468 # full content in one patch, so delta works everywhere
1468 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1469 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1469 delta = header + t
1470 delta = header + t
1470 else:
1471 else:
1471 if self._inline:
1472 if self._inline:
1472 fh = ifh
1473 fh = ifh
1473 else:
1474 else:
1474 fh = dfh
1475 fh = dfh
1475 ptext = self.revision(self.node(rev), _df=fh)
1476 ptext = self.revision(self.node(rev), _df=fh)
1476 delta = mdiff.textdiff(ptext, t)
1477 delta = mdiff.textdiff(ptext, t)
1477 header, data = self.compress(delta)
1478 header, data = self.compress(delta)
1478 deltalen = len(header) + len(data)
1479 deltalen = len(header) + len(data)
1479 chainbase = self.chainbase(rev)
1480 chainbase = self.chainbase(rev)
1480 dist = deltalen + offset - self.start(chainbase)
1481 dist = deltalen + offset - self.start(chainbase)
1481 if self._generaldelta:
1482 if self._generaldelta:
1482 base = rev
1483 base = rev
1483 else:
1484 else:
1484 base = chainbase
1485 base = chainbase
1485 chainlen, compresseddeltalen = self._chaininfo(rev)
1486 chainlen, compresseddeltalen = self._chaininfo(rev)
1486 chainlen += 1
1487 chainlen += 1
1487 compresseddeltalen += deltalen
1488 compresseddeltalen += deltalen
1488 return (dist, deltalen, (header, data), base,
1489 return (dist, deltalen, (header, data), base,
1489 chainbase, chainlen, compresseddeltalen)
1490 chainbase, chainlen, compresseddeltalen)
1490
1491
1491 curr = len(self)
1492 curr = len(self)
1492 prev = curr - 1
1493 prev = curr - 1
1493 offset = self.end(prev)
1494 offset = self.end(prev)
1494 delta = None
1495 delta = None
1495 p1r, p2r = self.rev(p1), self.rev(p2)
1496 p1r, p2r = self.rev(p1), self.rev(p2)
1496
1497
1497 # full versions are inserted when the needed deltas
1498 # full versions are inserted when the needed deltas
1498 # become comparable to the uncompressed text
1499 # become comparable to the uncompressed text
1499 if text is None:
1500 if text is None:
1500 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1501 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1501 cachedelta[1])
1502 cachedelta[1])
1502 else:
1503 else:
1503 textlen = len(text)
1504 textlen = len(text)
1504
1505
1505 # should we try to build a delta?
1506 # should we try to build a delta?
1506 if prev != nullrev and self.storedeltachains:
1507 if prev != nullrev and self.storedeltachains:
1507 tested = set()
1508 tested = set()
1508 # This condition is true most of the time when processing
1509 # This condition is true most of the time when processing
1509 # changegroup data into a generaldelta repo. The only time it
1510 # changegroup data into a generaldelta repo. The only time it
1510 # isn't true is if this is the first revision in a delta chain
1511 # isn't true is if this is the first revision in a delta chain
1511 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1512 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1512 if cachedelta and self._generaldelta and self._lazydeltabase:
1513 if cachedelta and self._generaldelta and self._lazydeltabase:
1513 # Assume what we received from the server is a good choice
1514 # Assume what we received from the server is a good choice
1514 # build delta will reuse the cache
1515 # build delta will reuse the cache
1515 candidatedelta = builddelta(cachedelta[0])
1516 candidatedelta = builddelta(cachedelta[0])
1516 tested.add(cachedelta[0])
1517 tested.add(cachedelta[0])
1517 if self._isgooddelta(candidatedelta, textlen):
1518 if self._isgooddelta(candidatedelta, textlen):
1518 delta = candidatedelta
1519 delta = candidatedelta
1519 if delta is None and self._generaldelta:
1520 if delta is None and self._generaldelta:
1520 # exclude already lazy tested base if any
1521 # exclude already lazy tested base if any
1521 parents = [p for p in (p1r, p2r)
1522 parents = [p for p in (p1r, p2r)
1522 if p != nullrev and p not in tested]
1523 if p != nullrev and p not in tested]
1523 if parents and not self._aggressivemergedeltas:
1524 if parents and not self._aggressivemergedeltas:
1524 # Pick whichever parent is closer to us (to minimize the
1525 # Pick whichever parent is closer to us (to minimize the
1525 # chance of having to build a fulltext).
1526 # chance of having to build a fulltext).
1526 parents = [max(parents)]
1527 parents = [max(parents)]
1527 tested.update(parents)
1528 tested.update(parents)
1528 pdeltas = []
1529 pdeltas = []
1529 for p in parents:
1530 for p in parents:
1530 pd = builddelta(p)
1531 pd = builddelta(p)
1531 if self._isgooddelta(pd, textlen):
1532 if self._isgooddelta(pd, textlen):
1532 pdeltas.append(pd)
1533 pdeltas.append(pd)
1533 if pdeltas:
1534 if pdeltas:
1534 delta = min(pdeltas, key=lambda x: x[1])
1535 delta = min(pdeltas, key=lambda x: x[1])
1535 if delta is None and prev not in tested:
1536 if delta is None and prev not in tested:
1536 # other approach failed try against prev to hopefully save us a
1537 # other approach failed try against prev to hopefully save us a
1537 # fulltext.
1538 # fulltext.
1538 candidatedelta = builddelta(prev)
1539 candidatedelta = builddelta(prev)
1539 if self._isgooddelta(candidatedelta, textlen):
1540 if self._isgooddelta(candidatedelta, textlen):
1540 delta = candidatedelta
1541 delta = candidatedelta
1541 if delta is not None:
1542 if delta is not None:
1542 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1543 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1543 else:
1544 else:
1544 text = buildtext()
1545 text = buildtext()
1545 data = self.compress(text)
1546 data = self.compress(text)
1546 l = len(data[1]) + len(data[0])
1547 l = len(data[1]) + len(data[0])
1547 base = chainbase = curr
1548 base = chainbase = curr
1548
1549
1549 e = (offset_type(offset, flags), l, textlen,
1550 e = (offset_type(offset, flags), l, textlen,
1550 base, link, p1r, p2r, node)
1551 base, link, p1r, p2r, node)
1551 self.index.insert(-1, e)
1552 self.index.insert(-1, e)
1552 self.nodemap[node] = curr
1553 self.nodemap[node] = curr
1553
1554
1554 entry = self._io.packentry(e, self.node, self.version, curr)
1555 entry = self._io.packentry(e, self.node, self.version, curr)
1555 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1556
1557
1557 if alwayscache and text is None:
1558 if alwayscache and text is None:
1558 text = buildtext()
1559 text = buildtext()
1559
1560
1560 if type(text) == str: # only accept immutable objects
1561 if type(text) == str: # only accept immutable objects
1561 self._cache = (node, curr, text)
1562 self._cache = (node, curr, text)
1562 self._chainbasecache[curr] = chainbase
1563 self._chainbasecache[curr] = chainbase
1563 return node
1564 return node
1564
1565
1565 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1566 # Files opened in a+ mode have inconsistent behavior on various
1567 # Files opened in a+ mode have inconsistent behavior on various
1567 # platforms. Windows requires that a file positioning call be made
1568 # platforms. Windows requires that a file positioning call be made
1568 # when the file handle transitions between reads and writes. See
1569 # when the file handle transitions between reads and writes. See
1569 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1570 # platforms, Python or the platform itself can be buggy. Some versions
1571 # platforms, Python or the platform itself can be buggy. Some versions
1571 # of Solaris have been observed to not append at the end of the file
1572 # of Solaris have been observed to not append at the end of the file
1572 # if the file was seeked to before the end. See issue4943 for more.
1573 # if the file was seeked to before the end. See issue4943 for more.
1573 #
1574 #
1574 # We work around this issue by inserting a seek() before writing.
1575 # We work around this issue by inserting a seek() before writing.
1575 # Note: This is likely not necessary on Python 3.
1576 # Note: This is likely not necessary on Python 3.
1576 ifh.seek(0, os.SEEK_END)
1577 ifh.seek(0, os.SEEK_END)
1577 if dfh:
1578 if dfh:
1578 dfh.seek(0, os.SEEK_END)
1579 dfh.seek(0, os.SEEK_END)
1579
1580
1580 curr = len(self) - 1
1581 curr = len(self) - 1
1581 if not self._inline:
1582 if not self._inline:
1582 transaction.add(self.datafile, offset)
1583 transaction.add(self.datafile, offset)
1583 transaction.add(self.indexfile, curr * len(entry))
1584 transaction.add(self.indexfile, curr * len(entry))
1584 if data[0]:
1585 if data[0]:
1585 dfh.write(data[0])
1586 dfh.write(data[0])
1586 dfh.write(data[1])
1587 dfh.write(data[1])
1587 ifh.write(entry)
1588 ifh.write(entry)
1588 else:
1589 else:
1589 offset += curr * self._io.size
1590 offset += curr * self._io.size
1590 transaction.add(self.indexfile, offset, curr)
1591 transaction.add(self.indexfile, offset, curr)
1591 ifh.write(entry)
1592 ifh.write(entry)
1592 ifh.write(data[0])
1593 ifh.write(data[0])
1593 ifh.write(data[1])
1594 ifh.write(data[1])
1594 self.checkinlinesize(transaction, ifh)
1595 self.checkinlinesize(transaction, ifh)
1595
1596
1596 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1597 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1597 """
1598 """
1598 add a delta group
1599 add a delta group
1599
1600
1600 given a set of deltas, add them to the revision log. the
1601 given a set of deltas, add them to the revision log. the
1601 first delta is against its parent, which should be in our
1602 first delta is against its parent, which should be in our
1602 log, the rest are against the previous delta.
1603 log, the rest are against the previous delta.
1603
1604
1604 If ``addrevisioncb`` is defined, it will be called with arguments of
1605 If ``addrevisioncb`` is defined, it will be called with arguments of
1605 this revlog and the node that was added.
1606 this revlog and the node that was added.
1606 """
1607 """
1607
1608
1608 # track the base of the current delta log
1609 # track the base of the current delta log
1609 content = []
1610 content = []
1610 node = None
1611 node = None
1611
1612
1612 r = len(self)
1613 r = len(self)
1613 end = 0
1614 end = 0
1614 if r:
1615 if r:
1615 end = self.end(r - 1)
1616 end = self.end(r - 1)
1616 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1617 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1617 isize = r * self._io.size
1618 isize = r * self._io.size
1618 if self._inline:
1619 if self._inline:
1619 transaction.add(self.indexfile, end + isize, r)
1620 transaction.add(self.indexfile, end + isize, r)
1620 dfh = None
1621 dfh = None
1621 else:
1622 else:
1622 transaction.add(self.indexfile, isize, r)
1623 transaction.add(self.indexfile, isize, r)
1623 transaction.add(self.datafile, end)
1624 transaction.add(self.datafile, end)
1624 dfh = self.opener(self.datafile, "a+")
1625 dfh = self.opener(self.datafile, "a+")
1625 def flush():
1626 def flush():
1626 if dfh:
1627 if dfh:
1627 dfh.flush()
1628 dfh.flush()
1628 ifh.flush()
1629 ifh.flush()
1629 try:
1630 try:
1630 # loop through our set of deltas
1631 # loop through our set of deltas
1631 chain = None
1632 chain = None
1632 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1633 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1633 node = chunkdata['node']
1634 node = chunkdata['node']
1634 p1 = chunkdata['p1']
1635 p1 = chunkdata['p1']
1635 p2 = chunkdata['p2']
1636 p2 = chunkdata['p2']
1636 cs = chunkdata['cs']
1637 cs = chunkdata['cs']
1637 deltabase = chunkdata['deltabase']
1638 deltabase = chunkdata['deltabase']
1638 delta = chunkdata['delta']
1639 delta = chunkdata['delta']
1639 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1640 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1640
1641
1641 content.append(node)
1642 content.append(node)
1642
1643
1643 link = linkmapper(cs)
1644 link = linkmapper(cs)
1644 if node in self.nodemap:
1645 if node in self.nodemap:
1645 # this can happen if two branches make the same change
1646 # this can happen if two branches make the same change
1646 chain = node
1647 chain = node
1647 continue
1648 continue
1648
1649
1649 for p in (p1, p2):
1650 for p in (p1, p2):
1650 if p not in self.nodemap:
1651 if p not in self.nodemap:
1651 raise LookupError(p, self.indexfile,
1652 raise LookupError(p, self.indexfile,
1652 _('unknown parent'))
1653 _('unknown parent'))
1653
1654
1654 if deltabase not in self.nodemap:
1655 if deltabase not in self.nodemap:
1655 raise LookupError(deltabase, self.indexfile,
1656 raise LookupError(deltabase, self.indexfile,
1656 _('unknown delta base'))
1657 _('unknown delta base'))
1657
1658
1658 baserev = self.rev(deltabase)
1659 baserev = self.rev(deltabase)
1659
1660
1660 if baserev != nullrev and self.iscensored(baserev):
1661 if baserev != nullrev and self.iscensored(baserev):
1661 # if base is censored, delta must be full replacement in a
1662 # if base is censored, delta must be full replacement in a
1662 # single patch operation
1663 # single patch operation
1663 hlen = struct.calcsize(">lll")
1664 hlen = struct.calcsize(">lll")
1664 oldlen = self.rawsize(baserev)
1665 oldlen = self.rawsize(baserev)
1665 newlen = len(delta) - hlen
1666 newlen = len(delta) - hlen
1666 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1667 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1667 raise error.CensoredBaseError(self.indexfile,
1668 raise error.CensoredBaseError(self.indexfile,
1668 self.node(baserev))
1669 self.node(baserev))
1669
1670
1670 if not flags and self._peek_iscensored(baserev, delta, flush):
1671 if not flags and self._peek_iscensored(baserev, delta, flush):
1671 flags |= REVIDX_ISCENSORED
1672 flags |= REVIDX_ISCENSORED
1672
1673
1673 # We assume consumers of addrevisioncb will want to retrieve
1674 # We assume consumers of addrevisioncb will want to retrieve
1674 # the added revision, which will require a call to
1675 # the added revision, which will require a call to
1675 # revision(). revision() will fast path if there is a cache
1676 # revision(). revision() will fast path if there is a cache
1676 # hit. So, we tell _addrevision() to always cache in this case.
1677 # hit. So, we tell _addrevision() to always cache in this case.
1677 # We're only using addgroup() in the context of changegroup
1678 # We're only using addgroup() in the context of changegroup
1678 # generation so the revision data can always be handled as raw
1679 # generation so the revision data can always be handled as raw
1679 # by the flagprocessor.
1680 # by the flagprocessor.
1680 chain = self._addrevision(node, None, transaction, link,
1681 chain = self._addrevision(node, None, transaction, link,
1681 p1, p2, flags, (baserev, delta),
1682 p1, p2, flags, (baserev, delta),
1682 ifh, dfh,
1683 ifh, dfh,
1683 alwayscache=bool(addrevisioncb),
1684 alwayscache=bool(addrevisioncb),
1684 raw=True)
1685 raw=True)
1685
1686
1686 if addrevisioncb:
1687 if addrevisioncb:
1687 addrevisioncb(self, chain)
1688 addrevisioncb(self, chain)
1688
1689
1689 if not dfh and not self._inline:
1690 if not dfh and not self._inline:
1690 # addrevision switched from inline to conventional
1691 # addrevision switched from inline to conventional
1691 # reopen the index
1692 # reopen the index
1692 ifh.close()
1693 ifh.close()
1693 dfh = self.opener(self.datafile, "a+")
1694 dfh = self.opener(self.datafile, "a+")
1694 ifh = self.opener(self.indexfile, "a+",
1695 ifh = self.opener(self.indexfile, "a+",
1695 checkambig=self._checkambig)
1696 checkambig=self._checkambig)
1696 finally:
1697 finally:
1697 if dfh:
1698 if dfh:
1698 dfh.close()
1699 dfh.close()
1699 ifh.close()
1700 ifh.close()
1700
1701
1701 return content
1702 return content
1702
1703
1703 def iscensored(self, rev):
1704 def iscensored(self, rev):
1704 """Check if a file revision is censored."""
1705 """Check if a file revision is censored."""
1705 return False
1706 return False
1706
1707
1707 def _peek_iscensored(self, baserev, delta, flush):
1708 def _peek_iscensored(self, baserev, delta, flush):
1708 """Quickly check if a delta produces a censored revision."""
1709 """Quickly check if a delta produces a censored revision."""
1709 return False
1710 return False
1710
1711
1711 def getstrippoint(self, minlink):
1712 def getstrippoint(self, minlink):
1712 """find the minimum rev that must be stripped to strip the linkrev
1713 """find the minimum rev that must be stripped to strip the linkrev
1713
1714
1714 Returns a tuple containing the minimum rev and a set of all revs that
1715 Returns a tuple containing the minimum rev and a set of all revs that
1715 have linkrevs that will be broken by this strip.
1716 have linkrevs that will be broken by this strip.
1716 """
1717 """
1717 brokenrevs = set()
1718 brokenrevs = set()
1718 strippoint = len(self)
1719 strippoint = len(self)
1719
1720
1720 heads = {}
1721 heads = {}
1721 futurelargelinkrevs = set()
1722 futurelargelinkrevs = set()
1722 for head in self.headrevs():
1723 for head in self.headrevs():
1723 headlinkrev = self.linkrev(head)
1724 headlinkrev = self.linkrev(head)
1724 heads[head] = headlinkrev
1725 heads[head] = headlinkrev
1725 if headlinkrev >= minlink:
1726 if headlinkrev >= minlink:
1726 futurelargelinkrevs.add(headlinkrev)
1727 futurelargelinkrevs.add(headlinkrev)
1727
1728
1728 # This algorithm involves walking down the rev graph, starting at the
1729 # This algorithm involves walking down the rev graph, starting at the
1729 # heads. Since the revs are topologically sorted according to linkrev,
1730 # heads. Since the revs are topologically sorted according to linkrev,
1730 # once all head linkrevs are below the minlink, we know there are
1731 # once all head linkrevs are below the minlink, we know there are
1731 # no more revs that could have a linkrev greater than minlink.
1732 # no more revs that could have a linkrev greater than minlink.
1732 # So we can stop walking.
1733 # So we can stop walking.
1733 while futurelargelinkrevs:
1734 while futurelargelinkrevs:
1734 strippoint -= 1
1735 strippoint -= 1
1735 linkrev = heads.pop(strippoint)
1736 linkrev = heads.pop(strippoint)
1736
1737
1737 if linkrev < minlink:
1738 if linkrev < minlink:
1738 brokenrevs.add(strippoint)
1739 brokenrevs.add(strippoint)
1739 else:
1740 else:
1740 futurelargelinkrevs.remove(linkrev)
1741 futurelargelinkrevs.remove(linkrev)
1741
1742
1742 for p in self.parentrevs(strippoint):
1743 for p in self.parentrevs(strippoint):
1743 if p != nullrev:
1744 if p != nullrev:
1744 plinkrev = self.linkrev(p)
1745 plinkrev = self.linkrev(p)
1745 heads[p] = plinkrev
1746 heads[p] = plinkrev
1746 if plinkrev >= minlink:
1747 if plinkrev >= minlink:
1747 futurelargelinkrevs.add(plinkrev)
1748 futurelargelinkrevs.add(plinkrev)
1748
1749
1749 return strippoint, brokenrevs
1750 return strippoint, brokenrevs
1750
1751
1751 def strip(self, minlink, transaction):
1752 def strip(self, minlink, transaction):
1752 """truncate the revlog on the first revision with a linkrev >= minlink
1753 """truncate the revlog on the first revision with a linkrev >= minlink
1753
1754
1754 This function is called when we're stripping revision minlink and
1755 This function is called when we're stripping revision minlink and
1755 its descendants from the repository.
1756 its descendants from the repository.
1756
1757
1757 We have to remove all revisions with linkrev >= minlink, because
1758 We have to remove all revisions with linkrev >= minlink, because
1758 the equivalent changelog revisions will be renumbered after the
1759 the equivalent changelog revisions will be renumbered after the
1759 strip.
1760 strip.
1760
1761
1761 So we truncate the revlog on the first of these revisions, and
1762 So we truncate the revlog on the first of these revisions, and
1762 trust that the caller has saved the revisions that shouldn't be
1763 trust that the caller has saved the revisions that shouldn't be
1763 removed and that it'll re-add them after this truncation.
1764 removed and that it'll re-add them after this truncation.
1764 """
1765 """
1765 if len(self) == 0:
1766 if len(self) == 0:
1766 return
1767 return
1767
1768
1768 rev, _ = self.getstrippoint(minlink)
1769 rev, _ = self.getstrippoint(minlink)
1769 if rev == len(self):
1770 if rev == len(self):
1770 return
1771 return
1771
1772
1772 # first truncate the files on disk
1773 # first truncate the files on disk
1773 end = self.start(rev)
1774 end = self.start(rev)
1774 if not self._inline:
1775 if not self._inline:
1775 transaction.add(self.datafile, end)
1776 transaction.add(self.datafile, end)
1776 end = rev * self._io.size
1777 end = rev * self._io.size
1777 else:
1778 else:
1778 end += rev * self._io.size
1779 end += rev * self._io.size
1779
1780
1780 transaction.add(self.indexfile, end)
1781 transaction.add(self.indexfile, end)
1781
1782
1782 # then reset internal state in memory to forget those revisions
1783 # then reset internal state in memory to forget those revisions
1783 self._cache = None
1784 self._cache = None
1784 self._chaininfocache = {}
1785 self._chaininfocache = {}
1785 self._chunkclear()
1786 self._chunkclear()
1786 for x in xrange(rev, len(self)):
1787 for x in xrange(rev, len(self)):
1787 del self.nodemap[self.node(x)]
1788 del self.nodemap[self.node(x)]
1788
1789
1789 del self.index[rev:-1]
1790 del self.index[rev:-1]
1790
1791
1791 def checksize(self):
1792 def checksize(self):
1792 expected = 0
1793 expected = 0
1793 if len(self):
1794 if len(self):
1794 expected = max(0, self.end(len(self) - 1))
1795 expected = max(0, self.end(len(self) - 1))
1795
1796
1796 try:
1797 try:
1797 f = self.opener(self.datafile)
1798 f = self.opener(self.datafile)
1798 f.seek(0, 2)
1799 f.seek(0, 2)
1799 actual = f.tell()
1800 actual = f.tell()
1800 f.close()
1801 f.close()
1801 dd = actual - expected
1802 dd = actual - expected
1802 except IOError as inst:
1803 except IOError as inst:
1803 if inst.errno != errno.ENOENT:
1804 if inst.errno != errno.ENOENT:
1804 raise
1805 raise
1805 dd = 0
1806 dd = 0
1806
1807
1807 try:
1808 try:
1808 f = self.opener(self.indexfile)
1809 f = self.opener(self.indexfile)
1809 f.seek(0, 2)
1810 f.seek(0, 2)
1810 actual = f.tell()
1811 actual = f.tell()
1811 f.close()
1812 f.close()
1812 s = self._io.size
1813 s = self._io.size
1813 i = max(0, actual // s)
1814 i = max(0, actual // s)
1814 di = actual - (i * s)
1815 di = actual - (i * s)
1815 if self._inline:
1816 if self._inline:
1816 databytes = 0
1817 databytes = 0
1817 for r in self:
1818 for r in self:
1818 databytes += max(0, self.length(r))
1819 databytes += max(0, self.length(r))
1819 dd = 0
1820 dd = 0
1820 di = actual - len(self) * s - databytes
1821 di = actual - len(self) * s - databytes
1821 except IOError as inst:
1822 except IOError as inst:
1822 if inst.errno != errno.ENOENT:
1823 if inst.errno != errno.ENOENT:
1823 raise
1824 raise
1824 di = 0
1825 di = 0
1825
1826
1826 return (dd, di)
1827 return (dd, di)
1827
1828
1828 def files(self):
1829 def files(self):
1829 res = [self.indexfile]
1830 res = [self.indexfile]
1830 if not self._inline:
1831 if not self._inline:
1831 res.append(self.datafile)
1832 res.append(self.datafile)
1832 return res
1833 return res
General Comments 0
You need to be logged in to leave comments. Login now