##// END OF EJS Templates
revlog: ensure that flags do not overflow 2 bytes...
Cotizo Sima -
r30543:03fae904 default
parent child Browse files
Show More
@@ -1,1820 +1,1822 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
76 raise ValueError('unknown revlog index flags')
75 return long(long(offset) << 16 | type)
77 return long(long(offset) << 16 | type)
76
78
77 _nullhash = hashlib.sha1(nullid)
79 _nullhash = hashlib.sha1(nullid)
78
80
79 def hash(text, p1, p2):
81 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
82 """generate a hash from the given text and its parent hashes
81
83
82 This hash combines both the current file contents and its history
84 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
85 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
86 content in the revision graph.
85 """
87 """
86 # As of now, if one of the parent node is null, p2 is null
88 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
89 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
90 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
91 s = _nullhash.copy()
90 s.update(p1)
92 s.update(p1)
91 else:
93 else:
92 # none of the parent nodes are nullid
94 # none of the parent nodes are nullid
93 l = [p1, p2]
95 l = [p1, p2]
94 l.sort()
96 l.sort()
95 s = hashlib.sha1(l[0])
97 s = hashlib.sha1(l[0])
96 s.update(l[1])
98 s.update(l[1])
97 s.update(text)
99 s.update(text)
98 return s.digest()
100 return s.digest()
99
101
100 def decompress(bin):
102 def decompress(bin):
101 """ decompress the given input """
103 """ decompress the given input """
102 if not bin:
104 if not bin:
103 return bin
105 return bin
104 t = bin[0]
106 t = bin[0]
105 if t == '\0':
107 if t == '\0':
106 return bin
108 return bin
107 if t == 'x':
109 if t == 'x':
108 try:
110 try:
109 return _decompress(bin)
111 return _decompress(bin)
110 except zlib.error as e:
112 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
113 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
114 if t == 'u':
113 return util.buffer(bin, 1)
115 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
116 raise RevlogError(_("unknown compression type %r") % t)
115
117
116 # index v0:
118 # index v0:
117 # 4 bytes: offset
119 # 4 bytes: offset
118 # 4 bytes: compressed length
120 # 4 bytes: compressed length
119 # 4 bytes: base rev
121 # 4 bytes: base rev
120 # 4 bytes: link rev
122 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
123 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
124 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
125 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
126 indexformatv0 = ">4l20s20s20s"
125
127
126 class revlogoldio(object):
128 class revlogoldio(object):
127 def __init__(self):
129 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
130 self.size = struct.calcsize(indexformatv0)
129
131
130 def parseindex(self, data, inline):
132 def parseindex(self, data, inline):
131 s = self.size
133 s = self.size
132 index = []
134 index = []
133 nodemap = {nullid: nullrev}
135 nodemap = {nullid: nullrev}
134 n = off = 0
136 n = off = 0
135 l = len(data)
137 l = len(data)
136 while off + s <= l:
138 while off + s <= l:
137 cur = data[off:off + s]
139 cur = data[off:off + s]
138 off += s
140 off += s
139 e = _unpack(indexformatv0, cur)
141 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
142 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
143 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
144 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
145 index.append(e2)
144 nodemap[e[6]] = n
146 nodemap[e[6]] = n
145 n += 1
147 n += 1
146
148
147 # add the magic null revision at -1
149 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
151
150 return index, nodemap, None
152 return index, nodemap, None
151
153
152 def packentry(self, entry, node, version, rev):
154 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
155 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
156 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
157 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
158 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
159 return _pack(indexformatv0, *e2)
158
160
159 # index ng:
161 # index ng:
160 # 6 bytes: offset
162 # 6 bytes: offset
161 # 2 bytes: flags
163 # 2 bytes: flags
162 # 4 bytes: compressed length
164 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
165 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
166 # 4 bytes: base rev
165 # 4 bytes: link rev
167 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
168 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
169 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
170 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
171 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
172 versionformat = ">I"
171
173
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
174 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
175 # signed integer)
174 _maxentrysize = 0x7fffffff
176 _maxentrysize = 0x7fffffff
175
177
176 class revlogio(object):
178 class revlogio(object):
177 def __init__(self):
179 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
180 self.size = struct.calcsize(indexformatng)
179
181
180 def parseindex(self, data, inline):
182 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
183 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
184 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
185 return index, getattr(index, 'nodemap', None), cache
184
186
185 def packentry(self, entry, node, version, rev):
187 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
188 p = _pack(indexformatng, *entry)
187 if rev == 0:
189 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
190 p = _pack(versionformat, version) + p[4:]
189 return p
191 return p
190
192
191 class revlog(object):
193 class revlog(object):
192 """
194 """
193 the underlying revision storage object
195 the underlying revision storage object
194
196
195 A revlog consists of two parts, an index and the revision data.
197 A revlog consists of two parts, an index and the revision data.
196
198
197 The index is a file with a fixed record size containing
199 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
200 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
201 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
202 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
203 contains a linkrev entry that can serve as a pointer to external
202 data.
204 data.
203
205
204 The revision data itself is a linear collection of data chunks.
206 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
207 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
208 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
209 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
210 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
211 its size, or O(1) relative to the number of revisions.
210
212
211 Both pieces of the revlog are written to in an append-only
213 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
214 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
215 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
216 for locking while reading.
215
217
216 If checkambig, indexfile is opened with checkambig=True at
218 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
219 writing, to avoid file stat ambiguity.
218 """
220 """
219 def __init__(self, opener, indexfile, checkambig=False):
221 def __init__(self, opener, indexfile, checkambig=False):
220 """
222 """
221 create a revlog object
223 create a revlog object
222
224
223 opener is a function that abstracts the file opening operation
225 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
226 and can be used to implement COW semantics or the like.
225 """
227 """
226 self.indexfile = indexfile
228 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
229 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
230 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
231 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
232 # avoid file stat ambiguity.
231 self._checkambig = checkambig
233 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
234 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
235 self._cache = None
234 # Maps rev to chain base rev.
236 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
237 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
238 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
239 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
240 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
241 self._chunkcachesize = 65536
240 self._maxchainlen = None
242 self._maxchainlen = None
241 self._aggressivemergedeltas = False
243 self._aggressivemergedeltas = False
242 self.index = []
244 self.index = []
243 # Mapping of partial identifiers to full nodes.
245 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
246 self._pcache = {}
245 # Mapping of revision integer to full node.
247 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
248 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
249 self._nodepos = None
248
250
249 v = REVLOG_DEFAULT_VERSION
251 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
252 opts = getattr(opener, 'options', None)
251 if opts is not None:
253 if opts is not None:
252 if 'revlogv1' in opts:
254 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
255 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
256 v |= REVLOGGENERALDELTA
255 else:
257 else:
256 v = 0
258 v = 0
257 if 'chunkcachesize' in opts:
259 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
260 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
261 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
262 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
263 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
264 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
265 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
266
265 if self._chunkcachesize <= 0:
267 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
268 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
269 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
270 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
271 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
272 'of 2') % self._chunkcachesize)
271
273
272 indexdata = ''
274 indexdata = ''
273 self._initempty = True
275 self._initempty = True
274 try:
276 try:
275 f = self.opener(self.indexfile)
277 f = self.opener(self.indexfile)
276 indexdata = f.read()
278 indexdata = f.read()
277 f.close()
279 f.close()
278 if len(indexdata) > 0:
280 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
281 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
282 self._initempty = False
281 except IOError as inst:
283 except IOError as inst:
282 if inst.errno != errno.ENOENT:
284 if inst.errno != errno.ENOENT:
283 raise
285 raise
284
286
285 self.version = v
287 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
288 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
289 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
290 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
291 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
292 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
293 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
294 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
295 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
296 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
297 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
298 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
299 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
300 % (self.indexfile, fmt))
299
301
300 self.storedeltachains = True
302 self.storedeltachains = True
301
303
302 self._io = revlogio()
304 self._io = revlogio()
303 if self.version == REVLOGV0:
305 if self.version == REVLOGV0:
304 self._io = revlogoldio()
306 self._io = revlogoldio()
305 try:
307 try:
306 d = self._io.parseindex(indexdata, self._inline)
308 d = self._io.parseindex(indexdata, self._inline)
307 except (ValueError, IndexError):
309 except (ValueError, IndexError):
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
310 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 self.index, nodemap, self._chunkcache = d
311 self.index, nodemap, self._chunkcache = d
310 if nodemap is not None:
312 if nodemap is not None:
311 self.nodemap = self._nodecache = nodemap
313 self.nodemap = self._nodecache = nodemap
312 if not self._chunkcache:
314 if not self._chunkcache:
313 self._chunkclear()
315 self._chunkclear()
314 # revnum -> (chain-length, sum-delta-length)
316 # revnum -> (chain-length, sum-delta-length)
315 self._chaininfocache = {}
317 self._chaininfocache = {}
316
318
317 def tip(self):
319 def tip(self):
318 return self.node(len(self.index) - 2)
320 return self.node(len(self.index) - 2)
319 def __contains__(self, rev):
321 def __contains__(self, rev):
320 return 0 <= rev < len(self)
322 return 0 <= rev < len(self)
321 def __len__(self):
323 def __len__(self):
322 return len(self.index) - 1
324 return len(self.index) - 1
323 def __iter__(self):
325 def __iter__(self):
324 return iter(xrange(len(self)))
326 return iter(xrange(len(self)))
325 def revs(self, start=0, stop=None):
327 def revs(self, start=0, stop=None):
326 """iterate over all rev in this revlog (from start to stop)"""
328 """iterate over all rev in this revlog (from start to stop)"""
327 step = 1
329 step = 1
328 if stop is not None:
330 if stop is not None:
329 if start > stop:
331 if start > stop:
330 step = -1
332 step = -1
331 stop += step
333 stop += step
332 else:
334 else:
333 stop = len(self)
335 stop = len(self)
334 return xrange(start, stop, step)
336 return xrange(start, stop, step)
335
337
336 @util.propertycache
338 @util.propertycache
337 def nodemap(self):
339 def nodemap(self):
338 self.rev(self.node(0))
340 self.rev(self.node(0))
339 return self._nodecache
341 return self._nodecache
340
342
341 def hasnode(self, node):
343 def hasnode(self, node):
342 try:
344 try:
343 self.rev(node)
345 self.rev(node)
344 return True
346 return True
345 except KeyError:
347 except KeyError:
346 return False
348 return False
347
349
348 def clearcaches(self):
350 def clearcaches(self):
349 self._cache = None
351 self._cache = None
350 self._chainbasecache.clear()
352 self._chainbasecache.clear()
351 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
352 self._pcache = {}
354 self._pcache = {}
353
355
354 try:
356 try:
355 self._nodecache.clearcaches()
357 self._nodecache.clearcaches()
356 except AttributeError:
358 except AttributeError:
357 self._nodecache = {nullid: nullrev}
359 self._nodecache = {nullid: nullrev}
358 self._nodepos = None
360 self._nodepos = None
359
361
360 def rev(self, node):
362 def rev(self, node):
361 try:
363 try:
362 return self._nodecache[node]
364 return self._nodecache[node]
363 except TypeError:
365 except TypeError:
364 raise
366 raise
365 except RevlogError:
367 except RevlogError:
366 # parsers.c radix tree lookup failed
368 # parsers.c radix tree lookup failed
367 raise LookupError(node, self.indexfile, _('no node'))
369 raise LookupError(node, self.indexfile, _('no node'))
368 except KeyError:
370 except KeyError:
369 # pure python cache lookup failed
371 # pure python cache lookup failed
370 n = self._nodecache
372 n = self._nodecache
371 i = self.index
373 i = self.index
372 p = self._nodepos
374 p = self._nodepos
373 if p is None:
375 if p is None:
374 p = len(i) - 2
376 p = len(i) - 2
375 for r in xrange(p, -1, -1):
377 for r in xrange(p, -1, -1):
376 v = i[r][7]
378 v = i[r][7]
377 n[v] = r
379 n[v] = r
378 if v == node:
380 if v == node:
379 self._nodepos = r - 1
381 self._nodepos = r - 1
380 return r
382 return r
381 raise LookupError(node, self.indexfile, _('no node'))
383 raise LookupError(node, self.indexfile, _('no node'))
382
384
383 # Accessors for index entries.
385 # Accessors for index entries.
384
386
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
387 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 # are flags.
388 # are flags.
387 def start(self, rev):
389 def start(self, rev):
388 return int(self.index[rev][0] >> 16)
390 return int(self.index[rev][0] >> 16)
389
391
390 def flags(self, rev):
392 def flags(self, rev):
391 return self.index[rev][0] & 0xFFFF
393 return self.index[rev][0] & 0xFFFF
392
394
393 def length(self, rev):
395 def length(self, rev):
394 return self.index[rev][1]
396 return self.index[rev][1]
395
397
396 def rawsize(self, rev):
398 def rawsize(self, rev):
397 """return the length of the uncompressed text for a given revision"""
399 """return the length of the uncompressed text for a given revision"""
398 l = self.index[rev][2]
400 l = self.index[rev][2]
399 if l >= 0:
401 if l >= 0:
400 return l
402 return l
401
403
402 t = self.revision(self.node(rev))
404 t = self.revision(self.node(rev))
403 return len(t)
405 return len(t)
404 size = rawsize
406 size = rawsize
405
407
406 def chainbase(self, rev):
408 def chainbase(self, rev):
407 base = self._chainbasecache.get(rev)
409 base = self._chainbasecache.get(rev)
408 if base is not None:
410 if base is not None:
409 return base
411 return base
410
412
411 index = self.index
413 index = self.index
412 base = index[rev][3]
414 base = index[rev][3]
413 while base != rev:
415 while base != rev:
414 rev = base
416 rev = base
415 base = index[rev][3]
417 base = index[rev][3]
416
418
417 self._chainbasecache[rev] = base
419 self._chainbasecache[rev] = base
418 return base
420 return base
419
421
420 def linkrev(self, rev):
422 def linkrev(self, rev):
421 return self.index[rev][4]
423 return self.index[rev][4]
422
424
423 def parentrevs(self, rev):
425 def parentrevs(self, rev):
424 return self.index[rev][5:7]
426 return self.index[rev][5:7]
425
427
426 def node(self, rev):
428 def node(self, rev):
427 return self.index[rev][7]
429 return self.index[rev][7]
428
430
429 # Derived from index values.
431 # Derived from index values.
430
432
431 def end(self, rev):
433 def end(self, rev):
432 return self.start(rev) + self.length(rev)
434 return self.start(rev) + self.length(rev)
433
435
434 def parents(self, node):
436 def parents(self, node):
435 i = self.index
437 i = self.index
436 d = i[self.rev(node)]
438 d = i[self.rev(node)]
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
439 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438
440
439 def chainlen(self, rev):
441 def chainlen(self, rev):
440 return self._chaininfo(rev)[0]
442 return self._chaininfo(rev)[0]
441
443
442 def _chaininfo(self, rev):
444 def _chaininfo(self, rev):
443 chaininfocache = self._chaininfocache
445 chaininfocache = self._chaininfocache
444 if rev in chaininfocache:
446 if rev in chaininfocache:
445 return chaininfocache[rev]
447 return chaininfocache[rev]
446 index = self.index
448 index = self.index
447 generaldelta = self._generaldelta
449 generaldelta = self._generaldelta
448 iterrev = rev
450 iterrev = rev
449 e = index[iterrev]
451 e = index[iterrev]
450 clen = 0
452 clen = 0
451 compresseddeltalen = 0
453 compresseddeltalen = 0
452 while iterrev != e[3]:
454 while iterrev != e[3]:
453 clen += 1
455 clen += 1
454 compresseddeltalen += e[1]
456 compresseddeltalen += e[1]
455 if generaldelta:
457 if generaldelta:
456 iterrev = e[3]
458 iterrev = e[3]
457 else:
459 else:
458 iterrev -= 1
460 iterrev -= 1
459 if iterrev in chaininfocache:
461 if iterrev in chaininfocache:
460 t = chaininfocache[iterrev]
462 t = chaininfocache[iterrev]
461 clen += t[0]
463 clen += t[0]
462 compresseddeltalen += t[1]
464 compresseddeltalen += t[1]
463 break
465 break
464 e = index[iterrev]
466 e = index[iterrev]
465 else:
467 else:
466 # Add text length of base since decompressing that also takes
468 # Add text length of base since decompressing that also takes
467 # work. For cache hits the length is already included.
469 # work. For cache hits the length is already included.
468 compresseddeltalen += e[1]
470 compresseddeltalen += e[1]
469 r = (clen, compresseddeltalen)
471 r = (clen, compresseddeltalen)
470 chaininfocache[rev] = r
472 chaininfocache[rev] = r
471 return r
473 return r
472
474
473 def _deltachain(self, rev, stoprev=None):
475 def _deltachain(self, rev, stoprev=None):
474 """Obtain the delta chain for a revision.
476 """Obtain the delta chain for a revision.
475
477
476 ``stoprev`` specifies a revision to stop at. If not specified, we
478 ``stoprev`` specifies a revision to stop at. If not specified, we
477 stop at the base of the chain.
479 stop at the base of the chain.
478
480
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
481 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 revs in ascending order and ``stopped`` is a bool indicating whether
482 revs in ascending order and ``stopped`` is a bool indicating whether
481 ``stoprev`` was hit.
483 ``stoprev`` was hit.
482 """
484 """
483 chain = []
485 chain = []
484
486
485 # Alias to prevent attribute lookup in tight loop.
487 # Alias to prevent attribute lookup in tight loop.
486 index = self.index
488 index = self.index
487 generaldelta = self._generaldelta
489 generaldelta = self._generaldelta
488
490
489 iterrev = rev
491 iterrev = rev
490 e = index[iterrev]
492 e = index[iterrev]
491 while iterrev != e[3] and iterrev != stoprev:
493 while iterrev != e[3] and iterrev != stoprev:
492 chain.append(iterrev)
494 chain.append(iterrev)
493 if generaldelta:
495 if generaldelta:
494 iterrev = e[3]
496 iterrev = e[3]
495 else:
497 else:
496 iterrev -= 1
498 iterrev -= 1
497 e = index[iterrev]
499 e = index[iterrev]
498
500
499 if iterrev == stoprev:
501 if iterrev == stoprev:
500 stopped = True
502 stopped = True
501 else:
503 else:
502 chain.append(iterrev)
504 chain.append(iterrev)
503 stopped = False
505 stopped = False
504
506
505 chain.reverse()
507 chain.reverse()
506 return chain, stopped
508 return chain, stopped
507
509
508 def ancestors(self, revs, stoprev=0, inclusive=False):
510 def ancestors(self, revs, stoprev=0, inclusive=False):
509 """Generate the ancestors of 'revs' in reverse topological order.
511 """Generate the ancestors of 'revs' in reverse topological order.
510 Does not generate revs lower than stoprev.
512 Does not generate revs lower than stoprev.
511
513
512 See the documentation for ancestor.lazyancestors for more details."""
514 See the documentation for ancestor.lazyancestors for more details."""
513
515
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
516 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 inclusive=inclusive)
517 inclusive=inclusive)
516
518
517 def descendants(self, revs):
519 def descendants(self, revs):
518 """Generate the descendants of 'revs' in revision order.
520 """Generate the descendants of 'revs' in revision order.
519
521
520 Yield a sequence of revision numbers starting with a child of
522 Yield a sequence of revision numbers starting with a child of
521 some rev in revs, i.e., each revision is *not* considered a
523 some rev in revs, i.e., each revision is *not* considered a
522 descendant of itself. Results are ordered by revision number (a
524 descendant of itself. Results are ordered by revision number (a
523 topological sort)."""
525 topological sort)."""
524 first = min(revs)
526 first = min(revs)
525 if first == nullrev:
527 if first == nullrev:
526 for i in self:
528 for i in self:
527 yield i
529 yield i
528 return
530 return
529
531
530 seen = set(revs)
532 seen = set(revs)
531 for i in self.revs(start=first + 1):
533 for i in self.revs(start=first + 1):
532 for x in self.parentrevs(i):
534 for x in self.parentrevs(i):
533 if x != nullrev and x in seen:
535 if x != nullrev and x in seen:
534 seen.add(i)
536 seen.add(i)
535 yield i
537 yield i
536 break
538 break
537
539
538 def findcommonmissing(self, common=None, heads=None):
540 def findcommonmissing(self, common=None, heads=None):
539 """Return a tuple of the ancestors of common and the ancestors of heads
541 """Return a tuple of the ancestors of common and the ancestors of heads
540 that are not ancestors of common. In revset terminology, we return the
542 that are not ancestors of common. In revset terminology, we return the
541 tuple:
543 tuple:
542
544
543 ::common, (::heads) - (::common)
545 ::common, (::heads) - (::common)
544
546
545 The list is sorted by revision number, meaning it is
547 The list is sorted by revision number, meaning it is
546 topologically sorted.
548 topologically sorted.
547
549
548 'heads' and 'common' are both lists of node IDs. If heads is
550 'heads' and 'common' are both lists of node IDs. If heads is
549 not supplied, uses all of the revlog's heads. If common is not
551 not supplied, uses all of the revlog's heads. If common is not
550 supplied, uses nullid."""
552 supplied, uses nullid."""
551 if common is None:
553 if common is None:
552 common = [nullid]
554 common = [nullid]
553 if heads is None:
555 if heads is None:
554 heads = self.heads()
556 heads = self.heads()
555
557
556 common = [self.rev(n) for n in common]
558 common = [self.rev(n) for n in common]
557 heads = [self.rev(n) for n in heads]
559 heads = [self.rev(n) for n in heads]
558
560
559 # we want the ancestors, but inclusive
561 # we want the ancestors, but inclusive
560 class lazyset(object):
562 class lazyset(object):
561 def __init__(self, lazyvalues):
563 def __init__(self, lazyvalues):
562 self.addedvalues = set()
564 self.addedvalues = set()
563 self.lazyvalues = lazyvalues
565 self.lazyvalues = lazyvalues
564
566
565 def __contains__(self, value):
567 def __contains__(self, value):
566 return value in self.addedvalues or value in self.lazyvalues
568 return value in self.addedvalues or value in self.lazyvalues
567
569
568 def __iter__(self):
570 def __iter__(self):
569 added = self.addedvalues
571 added = self.addedvalues
570 for r in added:
572 for r in added:
571 yield r
573 yield r
572 for r in self.lazyvalues:
574 for r in self.lazyvalues:
573 if not r in added:
575 if not r in added:
574 yield r
576 yield r
575
577
576 def add(self, value):
578 def add(self, value):
577 self.addedvalues.add(value)
579 self.addedvalues.add(value)
578
580
579 def update(self, values):
581 def update(self, values):
580 self.addedvalues.update(values)
582 self.addedvalues.update(values)
581
583
582 has = lazyset(self.ancestors(common))
584 has = lazyset(self.ancestors(common))
583 has.add(nullrev)
585 has.add(nullrev)
584 has.update(common)
586 has.update(common)
585
587
586 # take all ancestors from heads that aren't in has
588 # take all ancestors from heads that aren't in has
587 missing = set()
589 missing = set()
588 visit = collections.deque(r for r in heads if r not in has)
590 visit = collections.deque(r for r in heads if r not in has)
589 while visit:
591 while visit:
590 r = visit.popleft()
592 r = visit.popleft()
591 if r in missing:
593 if r in missing:
592 continue
594 continue
593 else:
595 else:
594 missing.add(r)
596 missing.add(r)
595 for p in self.parentrevs(r):
597 for p in self.parentrevs(r):
596 if p not in has:
598 if p not in has:
597 visit.append(p)
599 visit.append(p)
598 missing = list(missing)
600 missing = list(missing)
599 missing.sort()
601 missing.sort()
600 return has, [self.node(miss) for miss in missing]
602 return has, [self.node(miss) for miss in missing]
601
603
602 def incrementalmissingrevs(self, common=None):
604 def incrementalmissingrevs(self, common=None):
603 """Return an object that can be used to incrementally compute the
605 """Return an object that can be used to incrementally compute the
604 revision numbers of the ancestors of arbitrary sets that are not
606 revision numbers of the ancestors of arbitrary sets that are not
605 ancestors of common. This is an ancestor.incrementalmissingancestors
607 ancestors of common. This is an ancestor.incrementalmissingancestors
606 object.
608 object.
607
609
608 'common' is a list of revision numbers. If common is not supplied, uses
610 'common' is a list of revision numbers. If common is not supplied, uses
609 nullrev.
611 nullrev.
610 """
612 """
611 if common is None:
613 if common is None:
612 common = [nullrev]
614 common = [nullrev]
613
615
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
616 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615
617
616 def findmissingrevs(self, common=None, heads=None):
618 def findmissingrevs(self, common=None, heads=None):
617 """Return the revision numbers of the ancestors of heads that
619 """Return the revision numbers of the ancestors of heads that
618 are not ancestors of common.
620 are not ancestors of common.
619
621
620 More specifically, return a list of revision numbers corresponding to
622 More specifically, return a list of revision numbers corresponding to
621 nodes N such that every N satisfies the following constraints:
623 nodes N such that every N satisfies the following constraints:
622
624
623 1. N is an ancestor of some node in 'heads'
625 1. N is an ancestor of some node in 'heads'
624 2. N is not an ancestor of any node in 'common'
626 2. N is not an ancestor of any node in 'common'
625
627
626 The list is sorted by revision number, meaning it is
628 The list is sorted by revision number, meaning it is
627 topologically sorted.
629 topologically sorted.
628
630
629 'heads' and 'common' are both lists of revision numbers. If heads is
631 'heads' and 'common' are both lists of revision numbers. If heads is
630 not supplied, uses all of the revlog's heads. If common is not
632 not supplied, uses all of the revlog's heads. If common is not
631 supplied, uses nullid."""
633 supplied, uses nullid."""
632 if common is None:
634 if common is None:
633 common = [nullrev]
635 common = [nullrev]
634 if heads is None:
636 if heads is None:
635 heads = self.headrevs()
637 heads = self.headrevs()
636
638
637 inc = self.incrementalmissingrevs(common=common)
639 inc = self.incrementalmissingrevs(common=common)
638 return inc.missingancestors(heads)
640 return inc.missingancestors(heads)
639
641
640 def findmissing(self, common=None, heads=None):
642 def findmissing(self, common=None, heads=None):
641 """Return the ancestors of heads that are not ancestors of common.
643 """Return the ancestors of heads that are not ancestors of common.
642
644
643 More specifically, return a list of nodes N such that every N
645 More specifically, return a list of nodes N such that every N
644 satisfies the following constraints:
646 satisfies the following constraints:
645
647
646 1. N is an ancestor of some node in 'heads'
648 1. N is an ancestor of some node in 'heads'
647 2. N is not an ancestor of any node in 'common'
649 2. N is not an ancestor of any node in 'common'
648
650
649 The list is sorted by revision number, meaning it is
651 The list is sorted by revision number, meaning it is
650 topologically sorted.
652 topologically sorted.
651
653
652 'heads' and 'common' are both lists of node IDs. If heads is
654 'heads' and 'common' are both lists of node IDs. If heads is
653 not supplied, uses all of the revlog's heads. If common is not
655 not supplied, uses all of the revlog's heads. If common is not
654 supplied, uses nullid."""
656 supplied, uses nullid."""
655 if common is None:
657 if common is None:
656 common = [nullid]
658 common = [nullid]
657 if heads is None:
659 if heads is None:
658 heads = self.heads()
660 heads = self.heads()
659
661
660 common = [self.rev(n) for n in common]
662 common = [self.rev(n) for n in common]
661 heads = [self.rev(n) for n in heads]
663 heads = [self.rev(n) for n in heads]
662
664
663 inc = self.incrementalmissingrevs(common=common)
665 inc = self.incrementalmissingrevs(common=common)
664 return [self.node(r) for r in inc.missingancestors(heads)]
666 return [self.node(r) for r in inc.missingancestors(heads)]
665
667
666 def nodesbetween(self, roots=None, heads=None):
668 def nodesbetween(self, roots=None, heads=None):
667 """Return a topological path from 'roots' to 'heads'.
669 """Return a topological path from 'roots' to 'heads'.
668
670
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 topologically sorted list of all nodes N that satisfy both of
672 topologically sorted list of all nodes N that satisfy both of
671 these constraints:
673 these constraints:
672
674
673 1. N is a descendant of some node in 'roots'
675 1. N is a descendant of some node in 'roots'
674 2. N is an ancestor of some node in 'heads'
676 2. N is an ancestor of some node in 'heads'
675
677
676 Every node is considered to be both a descendant and an ancestor
678 Every node is considered to be both a descendant and an ancestor
677 of itself, so every reachable node in 'roots' and 'heads' will be
679 of itself, so every reachable node in 'roots' and 'heads' will be
678 included in 'nodes'.
680 included in 'nodes'.
679
681
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
682 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 subset of 'roots' that is returned in 'nodes'. Likewise,
683 subset of 'roots' that is returned in 'nodes'. Likewise,
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
684 'outheads' is the subset of 'heads' that is also in 'nodes'.
683
685
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 unspecified, uses nullid as the only root. If 'heads' is
687 unspecified, uses nullid as the only root. If 'heads' is
686 unspecified, uses list of all of the revlog's heads."""
688 unspecified, uses list of all of the revlog's heads."""
687 nonodes = ([], [], [])
689 nonodes = ([], [], [])
688 if roots is not None:
690 if roots is not None:
689 roots = list(roots)
691 roots = list(roots)
690 if not roots:
692 if not roots:
691 return nonodes
693 return nonodes
692 lowestrev = min([self.rev(n) for n in roots])
694 lowestrev = min([self.rev(n) for n in roots])
693 else:
695 else:
694 roots = [nullid] # Everybody's a descendant of nullid
696 roots = [nullid] # Everybody's a descendant of nullid
695 lowestrev = nullrev
697 lowestrev = nullrev
696 if (lowestrev == nullrev) and (heads is None):
698 if (lowestrev == nullrev) and (heads is None):
697 # We want _all_ the nodes!
699 # We want _all_ the nodes!
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 if heads is None:
701 if heads is None:
700 # All nodes are ancestors, so the latest ancestor is the last
702 # All nodes are ancestors, so the latest ancestor is the last
701 # node.
703 # node.
702 highestrev = len(self) - 1
704 highestrev = len(self) - 1
703 # Set ancestors to None to signal that every node is an ancestor.
705 # Set ancestors to None to signal that every node is an ancestor.
704 ancestors = None
706 ancestors = None
705 # Set heads to an empty dictionary for later discovery of heads
707 # Set heads to an empty dictionary for later discovery of heads
706 heads = {}
708 heads = {}
707 else:
709 else:
708 heads = list(heads)
710 heads = list(heads)
709 if not heads:
711 if not heads:
710 return nonodes
712 return nonodes
711 ancestors = set()
713 ancestors = set()
712 # Turn heads into a dictionary so we can remove 'fake' heads.
714 # Turn heads into a dictionary so we can remove 'fake' heads.
713 # Also, later we will be using it to filter out the heads we can't
715 # Also, later we will be using it to filter out the heads we can't
714 # find from roots.
716 # find from roots.
715 heads = dict.fromkeys(heads, False)
717 heads = dict.fromkeys(heads, False)
716 # Start at the top and keep marking parents until we're done.
718 # Start at the top and keep marking parents until we're done.
717 nodestotag = set(heads)
719 nodestotag = set(heads)
718 # Remember where the top was so we can use it as a limit later.
720 # Remember where the top was so we can use it as a limit later.
719 highestrev = max([self.rev(n) for n in nodestotag])
721 highestrev = max([self.rev(n) for n in nodestotag])
720 while nodestotag:
722 while nodestotag:
721 # grab a node to tag
723 # grab a node to tag
722 n = nodestotag.pop()
724 n = nodestotag.pop()
723 # Never tag nullid
725 # Never tag nullid
724 if n == nullid:
726 if n == nullid:
725 continue
727 continue
726 # A node's revision number represents its place in a
728 # A node's revision number represents its place in a
727 # topologically sorted list of nodes.
729 # topologically sorted list of nodes.
728 r = self.rev(n)
730 r = self.rev(n)
729 if r >= lowestrev:
731 if r >= lowestrev:
730 if n not in ancestors:
732 if n not in ancestors:
731 # If we are possibly a descendant of one of the roots
733 # If we are possibly a descendant of one of the roots
732 # and we haven't already been marked as an ancestor
734 # and we haven't already been marked as an ancestor
733 ancestors.add(n) # Mark as ancestor
735 ancestors.add(n) # Mark as ancestor
734 # Add non-nullid parents to list of nodes to tag.
736 # Add non-nullid parents to list of nodes to tag.
735 nodestotag.update([p for p in self.parents(n) if
737 nodestotag.update([p for p in self.parents(n) if
736 p != nullid])
738 p != nullid])
737 elif n in heads: # We've seen it before, is it a fake head?
739 elif n in heads: # We've seen it before, is it a fake head?
738 # So it is, real heads should not be the ancestors of
740 # So it is, real heads should not be the ancestors of
739 # any other heads.
741 # any other heads.
740 heads.pop(n)
742 heads.pop(n)
741 if not ancestors:
743 if not ancestors:
742 return nonodes
744 return nonodes
743 # Now that we have our set of ancestors, we want to remove any
745 # Now that we have our set of ancestors, we want to remove any
744 # roots that are not ancestors.
746 # roots that are not ancestors.
745
747
746 # If one of the roots was nullid, everything is included anyway.
748 # If one of the roots was nullid, everything is included anyway.
747 if lowestrev > nullrev:
749 if lowestrev > nullrev:
748 # But, since we weren't, let's recompute the lowest rev to not
750 # But, since we weren't, let's recompute the lowest rev to not
749 # include roots that aren't ancestors.
751 # include roots that aren't ancestors.
750
752
751 # Filter out roots that aren't ancestors of heads
753 # Filter out roots that aren't ancestors of heads
752 roots = [root for root in roots if root in ancestors]
754 roots = [root for root in roots if root in ancestors]
753 # Recompute the lowest revision
755 # Recompute the lowest revision
754 if roots:
756 if roots:
755 lowestrev = min([self.rev(root) for root in roots])
757 lowestrev = min([self.rev(root) for root in roots])
756 else:
758 else:
757 # No more roots? Return empty list
759 # No more roots? Return empty list
758 return nonodes
760 return nonodes
759 else:
761 else:
760 # We are descending from nullid, and don't need to care about
762 # We are descending from nullid, and don't need to care about
761 # any other roots.
763 # any other roots.
762 lowestrev = nullrev
764 lowestrev = nullrev
763 roots = [nullid]
765 roots = [nullid]
764 # Transform our roots list into a set.
766 # Transform our roots list into a set.
765 descendants = set(roots)
767 descendants = set(roots)
766 # Also, keep the original roots so we can filter out roots that aren't
768 # Also, keep the original roots so we can filter out roots that aren't
767 # 'real' roots (i.e. are descended from other roots).
769 # 'real' roots (i.e. are descended from other roots).
768 roots = descendants.copy()
770 roots = descendants.copy()
769 # Our topologically sorted list of output nodes.
771 # Our topologically sorted list of output nodes.
770 orderedout = []
772 orderedout = []
771 # Don't start at nullid since we don't want nullid in our output list,
773 # Don't start at nullid since we don't want nullid in our output list,
772 # and if nullid shows up in descendants, empty parents will look like
774 # and if nullid shows up in descendants, empty parents will look like
773 # they're descendants.
775 # they're descendants.
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
776 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 n = self.node(r)
777 n = self.node(r)
776 isdescendant = False
778 isdescendant = False
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
779 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 isdescendant = True
780 isdescendant = True
779 elif n in descendants:
781 elif n in descendants:
780 # n is already a descendant
782 # n is already a descendant
781 isdescendant = True
783 isdescendant = True
782 # This check only needs to be done here because all the roots
784 # This check only needs to be done here because all the roots
783 # will start being marked is descendants before the loop.
785 # will start being marked is descendants before the loop.
784 if n in roots:
786 if n in roots:
785 # If n was a root, check if it's a 'real' root.
787 # If n was a root, check if it's a 'real' root.
786 p = tuple(self.parents(n))
788 p = tuple(self.parents(n))
787 # If any of its parents are descendants, it's not a root.
789 # If any of its parents are descendants, it's not a root.
788 if (p[0] in descendants) or (p[1] in descendants):
790 if (p[0] in descendants) or (p[1] in descendants):
789 roots.remove(n)
791 roots.remove(n)
790 else:
792 else:
791 p = tuple(self.parents(n))
793 p = tuple(self.parents(n))
792 # A node is a descendant if either of its parents are
794 # A node is a descendant if either of its parents are
793 # descendants. (We seeded the dependents list with the roots
795 # descendants. (We seeded the dependents list with the roots
794 # up there, remember?)
796 # up there, remember?)
795 if (p[0] in descendants) or (p[1] in descendants):
797 if (p[0] in descendants) or (p[1] in descendants):
796 descendants.add(n)
798 descendants.add(n)
797 isdescendant = True
799 isdescendant = True
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
800 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 # Only include nodes that are both descendants and ancestors.
801 # Only include nodes that are both descendants and ancestors.
800 orderedout.append(n)
802 orderedout.append(n)
801 if (ancestors is not None) and (n in heads):
803 if (ancestors is not None) and (n in heads):
802 # We're trying to figure out which heads are reachable
804 # We're trying to figure out which heads are reachable
803 # from roots.
805 # from roots.
804 # Mark this head as having been reached
806 # Mark this head as having been reached
805 heads[n] = True
807 heads[n] = True
806 elif ancestors is None:
808 elif ancestors is None:
807 # Otherwise, we're trying to discover the heads.
809 # Otherwise, we're trying to discover the heads.
808 # Assume this is a head because if it isn't, the next step
810 # Assume this is a head because if it isn't, the next step
809 # will eventually remove it.
811 # will eventually remove it.
810 heads[n] = True
812 heads[n] = True
811 # But, obviously its parents aren't.
813 # But, obviously its parents aren't.
812 for p in self.parents(n):
814 for p in self.parents(n):
813 heads.pop(p, None)
815 heads.pop(p, None)
814 heads = [head for head, flag in heads.iteritems() if flag]
816 heads = [head for head, flag in heads.iteritems() if flag]
815 roots = list(roots)
817 roots = list(roots)
816 assert orderedout
818 assert orderedout
817 assert roots
819 assert roots
818 assert heads
820 assert heads
819 return (orderedout, roots, heads)
821 return (orderedout, roots, heads)
820
822
821 def headrevs(self):
823 def headrevs(self):
822 try:
824 try:
823 return self.index.headrevs()
825 return self.index.headrevs()
824 except AttributeError:
826 except AttributeError:
825 return self._headrevs()
827 return self._headrevs()
826
828
827 def computephases(self, roots):
829 def computephases(self, roots):
828 return self.index.computephasesmapsets(roots)
830 return self.index.computephasesmapsets(roots)
829
831
830 def _headrevs(self):
832 def _headrevs(self):
831 count = len(self)
833 count = len(self)
832 if not count:
834 if not count:
833 return [nullrev]
835 return [nullrev]
834 # we won't iter over filtered rev so nobody is a head at start
836 # we won't iter over filtered rev so nobody is a head at start
835 ishead = [0] * (count + 1)
837 ishead = [0] * (count + 1)
836 index = self.index
838 index = self.index
837 for r in self:
839 for r in self:
838 ishead[r] = 1 # I may be an head
840 ishead[r] = 1 # I may be an head
839 e = index[r]
841 e = index[r]
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
842 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 return [r for r, val in enumerate(ishead) if val]
843 return [r for r, val in enumerate(ishead) if val]
842
844
843 def heads(self, start=None, stop=None):
845 def heads(self, start=None, stop=None):
844 """return the list of all nodes that have no children
846 """return the list of all nodes that have no children
845
847
846 if start is specified, only heads that are descendants of
848 if start is specified, only heads that are descendants of
847 start will be returned
849 start will be returned
848 if stop is specified, it will consider all the revs from stop
850 if stop is specified, it will consider all the revs from stop
849 as if they had no children
851 as if they had no children
850 """
852 """
851 if start is None and stop is None:
853 if start is None and stop is None:
852 if not len(self):
854 if not len(self):
853 return [nullid]
855 return [nullid]
854 return [self.node(r) for r in self.headrevs()]
856 return [self.node(r) for r in self.headrevs()]
855
857
856 if start is None:
858 if start is None:
857 start = nullid
859 start = nullid
858 if stop is None:
860 if stop is None:
859 stop = []
861 stop = []
860 stoprevs = set([self.rev(n) for n in stop])
862 stoprevs = set([self.rev(n) for n in stop])
861 startrev = self.rev(start)
863 startrev = self.rev(start)
862 reachable = set((startrev,))
864 reachable = set((startrev,))
863 heads = set((startrev,))
865 heads = set((startrev,))
864
866
865 parentrevs = self.parentrevs
867 parentrevs = self.parentrevs
866 for r in self.revs(start=startrev + 1):
868 for r in self.revs(start=startrev + 1):
867 for p in parentrevs(r):
869 for p in parentrevs(r):
868 if p in reachable:
870 if p in reachable:
869 if r not in stoprevs:
871 if r not in stoprevs:
870 reachable.add(r)
872 reachable.add(r)
871 heads.add(r)
873 heads.add(r)
872 if p in heads and p not in stoprevs:
874 if p in heads and p not in stoprevs:
873 heads.remove(p)
875 heads.remove(p)
874
876
875 return [self.node(r) for r in heads]
877 return [self.node(r) for r in heads]
876
878
877 def children(self, node):
879 def children(self, node):
878 """find the children of a given node"""
880 """find the children of a given node"""
879 c = []
881 c = []
880 p = self.rev(node)
882 p = self.rev(node)
881 for r in self.revs(start=p + 1):
883 for r in self.revs(start=p + 1):
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
884 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 if prevs:
885 if prevs:
884 for pr in prevs:
886 for pr in prevs:
885 if pr == p:
887 if pr == p:
886 c.append(self.node(r))
888 c.append(self.node(r))
887 elif p == nullrev:
889 elif p == nullrev:
888 c.append(self.node(r))
890 c.append(self.node(r))
889 return c
891 return c
890
892
891 def descendant(self, start, end):
893 def descendant(self, start, end):
892 if start == nullrev:
894 if start == nullrev:
893 return True
895 return True
894 for i in self.descendants([start]):
896 for i in self.descendants([start]):
895 if i == end:
897 if i == end:
896 return True
898 return True
897 elif i > end:
899 elif i > end:
898 break
900 break
899 return False
901 return False
900
902
901 def commonancestorsheads(self, a, b):
903 def commonancestorsheads(self, a, b):
902 """calculate all the heads of the common ancestors of nodes a and b"""
904 """calculate all the heads of the common ancestors of nodes a and b"""
903 a, b = self.rev(a), self.rev(b)
905 a, b = self.rev(a), self.rev(b)
904 try:
906 try:
905 ancs = self.index.commonancestorsheads(a, b)
907 ancs = self.index.commonancestorsheads(a, b)
906 except (AttributeError, OverflowError): # C implementation failed
908 except (AttributeError, OverflowError): # C implementation failed
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
909 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 return map(self.node, ancs)
910 return map(self.node, ancs)
909
911
910 def isancestor(self, a, b):
912 def isancestor(self, a, b):
911 """return True if node a is an ancestor of node b
913 """return True if node a is an ancestor of node b
912
914
913 The implementation of this is trivial but the use of
915 The implementation of this is trivial but the use of
914 commonancestorsheads is not."""
916 commonancestorsheads is not."""
915 return a in self.commonancestorsheads(a, b)
917 return a in self.commonancestorsheads(a, b)
916
918
917 def ancestor(self, a, b):
919 def ancestor(self, a, b):
918 """calculate the "best" common ancestor of nodes a and b"""
920 """calculate the "best" common ancestor of nodes a and b"""
919
921
920 a, b = self.rev(a), self.rev(b)
922 a, b = self.rev(a), self.rev(b)
921 try:
923 try:
922 ancs = self.index.ancestors(a, b)
924 ancs = self.index.ancestors(a, b)
923 except (AttributeError, OverflowError):
925 except (AttributeError, OverflowError):
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
926 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 if ancs:
927 if ancs:
926 # choose a consistent winner when there's a tie
928 # choose a consistent winner when there's a tie
927 return min(map(self.node, ancs))
929 return min(map(self.node, ancs))
928 return nullid
930 return nullid
929
931
930 def _match(self, id):
932 def _match(self, id):
931 if isinstance(id, int):
933 if isinstance(id, int):
932 # rev
934 # rev
933 return self.node(id)
935 return self.node(id)
934 if len(id) == 20:
936 if len(id) == 20:
935 # possibly a binary node
937 # possibly a binary node
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
938 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 try:
939 try:
938 node = id
940 node = id
939 self.rev(node) # quick search the index
941 self.rev(node) # quick search the index
940 return node
942 return node
941 except LookupError:
943 except LookupError:
942 pass # may be partial hex id
944 pass # may be partial hex id
943 try:
945 try:
944 # str(rev)
946 # str(rev)
945 rev = int(id)
947 rev = int(id)
946 if str(rev) != id:
948 if str(rev) != id:
947 raise ValueError
949 raise ValueError
948 if rev < 0:
950 if rev < 0:
949 rev = len(self) + rev
951 rev = len(self) + rev
950 if rev < 0 or rev >= len(self):
952 if rev < 0 or rev >= len(self):
951 raise ValueError
953 raise ValueError
952 return self.node(rev)
954 return self.node(rev)
953 except (ValueError, OverflowError):
955 except (ValueError, OverflowError):
954 pass
956 pass
955 if len(id) == 40:
957 if len(id) == 40:
956 try:
958 try:
957 # a full hex nodeid?
959 # a full hex nodeid?
958 node = bin(id)
960 node = bin(id)
959 self.rev(node)
961 self.rev(node)
960 return node
962 return node
961 except (TypeError, LookupError):
963 except (TypeError, LookupError):
962 pass
964 pass
963
965
964 def _partialmatch(self, id):
966 def _partialmatch(self, id):
965 try:
967 try:
966 partial = self.index.partialmatch(id)
968 partial = self.index.partialmatch(id)
967 if partial and self.hasnode(partial):
969 if partial and self.hasnode(partial):
968 return partial
970 return partial
969 return None
971 return None
970 except RevlogError:
972 except RevlogError:
971 # parsers.c radix tree lookup gave multiple matches
973 # parsers.c radix tree lookup gave multiple matches
972 # fast path: for unfiltered changelog, radix tree is accurate
974 # fast path: for unfiltered changelog, radix tree is accurate
973 if not getattr(self, 'filteredrevs', None):
975 if not getattr(self, 'filteredrevs', None):
974 raise LookupError(id, self.indexfile,
976 raise LookupError(id, self.indexfile,
975 _('ambiguous identifier'))
977 _('ambiguous identifier'))
976 # fall through to slow path that filters hidden revisions
978 # fall through to slow path that filters hidden revisions
977 except (AttributeError, ValueError):
979 except (AttributeError, ValueError):
978 # we are pure python, or key was too short to search radix tree
980 # we are pure python, or key was too short to search radix tree
979 pass
981 pass
980
982
981 if id in self._pcache:
983 if id in self._pcache:
982 return self._pcache[id]
984 return self._pcache[id]
983
985
984 if len(id) < 40:
986 if len(id) < 40:
985 try:
987 try:
986 # hex(node)[:...]
988 # hex(node)[:...]
987 l = len(id) // 2 # grab an even number of digits
989 l = len(id) // 2 # grab an even number of digits
988 prefix = bin(id[:l * 2])
990 prefix = bin(id[:l * 2])
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
991 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 nl = [n for n in nl if hex(n).startswith(id) and
992 nl = [n for n in nl if hex(n).startswith(id) and
991 self.hasnode(n)]
993 self.hasnode(n)]
992 if len(nl) > 0:
994 if len(nl) > 0:
993 if len(nl) == 1:
995 if len(nl) == 1:
994 self._pcache[id] = nl[0]
996 self._pcache[id] = nl[0]
995 return nl[0]
997 return nl[0]
996 raise LookupError(id, self.indexfile,
998 raise LookupError(id, self.indexfile,
997 _('ambiguous identifier'))
999 _('ambiguous identifier'))
998 return None
1000 return None
999 except TypeError:
1001 except TypeError:
1000 pass
1002 pass
1001
1003
1002 def lookup(self, id):
1004 def lookup(self, id):
1003 """locate a node based on:
1005 """locate a node based on:
1004 - revision number or str(revision number)
1006 - revision number or str(revision number)
1005 - nodeid or subset of hex nodeid
1007 - nodeid or subset of hex nodeid
1006 """
1008 """
1007 n = self._match(id)
1009 n = self._match(id)
1008 if n is not None:
1010 if n is not None:
1009 return n
1011 return n
1010 n = self._partialmatch(id)
1012 n = self._partialmatch(id)
1011 if n:
1013 if n:
1012 return n
1014 return n
1013
1015
1014 raise LookupError(id, self.indexfile, _('no match found'))
1016 raise LookupError(id, self.indexfile, _('no match found'))
1015
1017
1016 def cmp(self, node, text):
1018 def cmp(self, node, text):
1017 """compare text with a given file revision
1019 """compare text with a given file revision
1018
1020
1019 returns True if text is different than what is stored.
1021 returns True if text is different than what is stored.
1020 """
1022 """
1021 p1, p2 = self.parents(node)
1023 p1, p2 = self.parents(node)
1022 return hash(text, p1, p2) != node
1024 return hash(text, p1, p2) != node
1023
1025
1024 def _addchunk(self, offset, data):
1026 def _addchunk(self, offset, data):
1025 """Add a segment to the revlog cache.
1027 """Add a segment to the revlog cache.
1026
1028
1027 Accepts an absolute offset and the data that is at that location.
1029 Accepts an absolute offset and the data that is at that location.
1028 """
1030 """
1029 o, d = self._chunkcache
1031 o, d = self._chunkcache
1030 # try to add to existing cache
1032 # try to add to existing cache
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1033 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 self._chunkcache = o, d + data
1034 self._chunkcache = o, d + data
1033 else:
1035 else:
1034 self._chunkcache = offset, data
1036 self._chunkcache = offset, data
1035
1037
1036 def _loadchunk(self, offset, length, df=None):
1038 def _loadchunk(self, offset, length, df=None):
1037 """Load a segment of raw data from the revlog.
1039 """Load a segment of raw data from the revlog.
1038
1040
1039 Accepts an absolute offset, length to read, and an optional existing
1041 Accepts an absolute offset, length to read, and an optional existing
1040 file handle to read from.
1042 file handle to read from.
1041
1043
1042 If an existing file handle is passed, it will be seeked and the
1044 If an existing file handle is passed, it will be seeked and the
1043 original seek position will NOT be restored.
1045 original seek position will NOT be restored.
1044
1046
1045 Returns a str or buffer of raw byte data.
1047 Returns a str or buffer of raw byte data.
1046 """
1048 """
1047 if df is not None:
1049 if df is not None:
1048 closehandle = False
1050 closehandle = False
1049 else:
1051 else:
1050 if self._inline:
1052 if self._inline:
1051 df = self.opener(self.indexfile)
1053 df = self.opener(self.indexfile)
1052 else:
1054 else:
1053 df = self.opener(self.datafile)
1055 df = self.opener(self.datafile)
1054 closehandle = True
1056 closehandle = True
1055
1057
1056 # Cache data both forward and backward around the requested
1058 # Cache data both forward and backward around the requested
1057 # data, in a fixed size window. This helps speed up operations
1059 # data, in a fixed size window. This helps speed up operations
1058 # involving reading the revlog backwards.
1060 # involving reading the revlog backwards.
1059 cachesize = self._chunkcachesize
1061 cachesize = self._chunkcachesize
1060 realoffset = offset & ~(cachesize - 1)
1062 realoffset = offset & ~(cachesize - 1)
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1063 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 - realoffset)
1064 - realoffset)
1063 df.seek(realoffset)
1065 df.seek(realoffset)
1064 d = df.read(reallength)
1066 d = df.read(reallength)
1065 if closehandle:
1067 if closehandle:
1066 df.close()
1068 df.close()
1067 self._addchunk(realoffset, d)
1069 self._addchunk(realoffset, d)
1068 if offset != realoffset or reallength != length:
1070 if offset != realoffset or reallength != length:
1069 return util.buffer(d, offset - realoffset, length)
1071 return util.buffer(d, offset - realoffset, length)
1070 return d
1072 return d
1071
1073
1072 def _getchunk(self, offset, length, df=None):
1074 def _getchunk(self, offset, length, df=None):
1073 """Obtain a segment of raw data from the revlog.
1075 """Obtain a segment of raw data from the revlog.
1074
1076
1075 Accepts an absolute offset, length of bytes to obtain, and an
1077 Accepts an absolute offset, length of bytes to obtain, and an
1076 optional file handle to the already-opened revlog. If the file
1078 optional file handle to the already-opened revlog. If the file
1077 handle is used, it's original seek position will not be preserved.
1079 handle is used, it's original seek position will not be preserved.
1078
1080
1079 Requests for data may be returned from a cache.
1081 Requests for data may be returned from a cache.
1080
1082
1081 Returns a str or a buffer instance of raw byte data.
1083 Returns a str or a buffer instance of raw byte data.
1082 """
1084 """
1083 o, d = self._chunkcache
1085 o, d = self._chunkcache
1084 l = len(d)
1086 l = len(d)
1085
1087
1086 # is it in the cache?
1088 # is it in the cache?
1087 cachestart = offset - o
1089 cachestart = offset - o
1088 cacheend = cachestart + length
1090 cacheend = cachestart + length
1089 if cachestart >= 0 and cacheend <= l:
1091 if cachestart >= 0 and cacheend <= l:
1090 if cachestart == 0 and cacheend == l:
1092 if cachestart == 0 and cacheend == l:
1091 return d # avoid a copy
1093 return d # avoid a copy
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1094 return util.buffer(d, cachestart, cacheend - cachestart)
1093
1095
1094 return self._loadchunk(offset, length, df=df)
1096 return self._loadchunk(offset, length, df=df)
1095
1097
1096 def _chunkraw(self, startrev, endrev, df=None):
1098 def _chunkraw(self, startrev, endrev, df=None):
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1099 """Obtain a segment of raw data corresponding to a range of revisions.
1098
1100
1099 Accepts the start and end revisions and an optional already-open
1101 Accepts the start and end revisions and an optional already-open
1100 file handle to be used for reading. If the file handle is read, its
1102 file handle to be used for reading. If the file handle is read, its
1101 seek position will not be preserved.
1103 seek position will not be preserved.
1102
1104
1103 Requests for data may be satisfied by a cache.
1105 Requests for data may be satisfied by a cache.
1104
1106
1105 Returns a 2-tuple of (offset, data) for the requested range of
1107 Returns a 2-tuple of (offset, data) for the requested range of
1106 revisions. Offset is the integer offset from the beginning of the
1108 revisions. Offset is the integer offset from the beginning of the
1107 revlog and data is a str or buffer of the raw byte data.
1109 revlog and data is a str or buffer of the raw byte data.
1108
1110
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1111 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 to determine where each revision's data begins and ends.
1112 to determine where each revision's data begins and ends.
1111 """
1113 """
1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1114 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 # (functions are expensive).
1115 # (functions are expensive).
1114 index = self.index
1116 index = self.index
1115 istart = index[startrev]
1117 istart = index[startrev]
1116 start = int(istart[0] >> 16)
1118 start = int(istart[0] >> 16)
1117 if startrev == endrev:
1119 if startrev == endrev:
1118 end = start + istart[1]
1120 end = start + istart[1]
1119 else:
1121 else:
1120 iend = index[endrev]
1122 iend = index[endrev]
1121 end = int(iend[0] >> 16) + iend[1]
1123 end = int(iend[0] >> 16) + iend[1]
1122
1124
1123 if self._inline:
1125 if self._inline:
1124 start += (startrev + 1) * self._io.size
1126 start += (startrev + 1) * self._io.size
1125 end += (endrev + 1) * self._io.size
1127 end += (endrev + 1) * self._io.size
1126 length = end - start
1128 length = end - start
1127
1129
1128 return start, self._getchunk(start, length, df=df)
1130 return start, self._getchunk(start, length, df=df)
1129
1131
1130 def _chunk(self, rev, df=None):
1132 def _chunk(self, rev, df=None):
1131 """Obtain a single decompressed chunk for a revision.
1133 """Obtain a single decompressed chunk for a revision.
1132
1134
1133 Accepts an integer revision and an optional already-open file handle
1135 Accepts an integer revision and an optional already-open file handle
1134 to be used for reading. If used, the seek position of the file will not
1136 to be used for reading. If used, the seek position of the file will not
1135 be preserved.
1137 be preserved.
1136
1138
1137 Returns a str holding uncompressed data for the requested revision.
1139 Returns a str holding uncompressed data for the requested revision.
1138 """
1140 """
1139 return decompress(self._chunkraw(rev, rev, df=df)[1])
1141 return decompress(self._chunkraw(rev, rev, df=df)[1])
1140
1142
1141 def _chunks(self, revs, df=None):
1143 def _chunks(self, revs, df=None):
1142 """Obtain decompressed chunks for the specified revisions.
1144 """Obtain decompressed chunks for the specified revisions.
1143
1145
1144 Accepts an iterable of numeric revisions that are assumed to be in
1146 Accepts an iterable of numeric revisions that are assumed to be in
1145 ascending order. Also accepts an optional already-open file handle
1147 ascending order. Also accepts an optional already-open file handle
1146 to be used for reading. If used, the seek position of the file will
1148 to be used for reading. If used, the seek position of the file will
1147 not be preserved.
1149 not be preserved.
1148
1150
1149 This function is similar to calling ``self._chunk()`` multiple times,
1151 This function is similar to calling ``self._chunk()`` multiple times,
1150 but is faster.
1152 but is faster.
1151
1153
1152 Returns a list with decompressed data for each requested revision.
1154 Returns a list with decompressed data for each requested revision.
1153 """
1155 """
1154 if not revs:
1156 if not revs:
1155 return []
1157 return []
1156 start = self.start
1158 start = self.start
1157 length = self.length
1159 length = self.length
1158 inline = self._inline
1160 inline = self._inline
1159 iosize = self._io.size
1161 iosize = self._io.size
1160 buffer = util.buffer
1162 buffer = util.buffer
1161
1163
1162 l = []
1164 l = []
1163 ladd = l.append
1165 ladd = l.append
1164
1166
1165 try:
1167 try:
1166 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1168 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1167 except OverflowError:
1169 except OverflowError:
1168 # issue4215 - we can't cache a run of chunks greater than
1170 # issue4215 - we can't cache a run of chunks greater than
1169 # 2G on Windows
1171 # 2G on Windows
1170 return [self._chunk(rev, df=df) for rev in revs]
1172 return [self._chunk(rev, df=df) for rev in revs]
1171
1173
1172 for rev in revs:
1174 for rev in revs:
1173 chunkstart = start(rev)
1175 chunkstart = start(rev)
1174 if inline:
1176 if inline:
1175 chunkstart += (rev + 1) * iosize
1177 chunkstart += (rev + 1) * iosize
1176 chunklength = length(rev)
1178 chunklength = length(rev)
1177 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1179 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1178
1180
1179 return l
1181 return l
1180
1182
1181 def _chunkclear(self):
1183 def _chunkclear(self):
1182 """Clear the raw chunk cache."""
1184 """Clear the raw chunk cache."""
1183 self._chunkcache = (0, '')
1185 self._chunkcache = (0, '')
1184
1186
1185 def deltaparent(self, rev):
1187 def deltaparent(self, rev):
1186 """return deltaparent of the given revision"""
1188 """return deltaparent of the given revision"""
1187 base = self.index[rev][3]
1189 base = self.index[rev][3]
1188 if base == rev:
1190 if base == rev:
1189 return nullrev
1191 return nullrev
1190 elif self._generaldelta:
1192 elif self._generaldelta:
1191 return base
1193 return base
1192 else:
1194 else:
1193 return rev - 1
1195 return rev - 1
1194
1196
1195 def revdiff(self, rev1, rev2):
1197 def revdiff(self, rev1, rev2):
1196 """return or calculate a delta between two revisions"""
1198 """return or calculate a delta between two revisions"""
1197 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1199 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1198 return str(self._chunk(rev2))
1200 return str(self._chunk(rev2))
1199
1201
1200 return mdiff.textdiff(self.revision(rev1),
1202 return mdiff.textdiff(self.revision(rev1),
1201 self.revision(rev2))
1203 self.revision(rev2))
1202
1204
1203 def revision(self, nodeorrev, _df=None):
1205 def revision(self, nodeorrev, _df=None):
1204 """return an uncompressed revision of a given node or revision
1206 """return an uncompressed revision of a given node or revision
1205 number.
1207 number.
1206
1208
1207 _df is an existing file handle to read from. It is meant to only be
1209 _df is an existing file handle to read from. It is meant to only be
1208 used internally.
1210 used internally.
1209 """
1211 """
1210 if isinstance(nodeorrev, int):
1212 if isinstance(nodeorrev, int):
1211 rev = nodeorrev
1213 rev = nodeorrev
1212 node = self.node(rev)
1214 node = self.node(rev)
1213 else:
1215 else:
1214 node = nodeorrev
1216 node = nodeorrev
1215 rev = None
1217 rev = None
1216
1218
1217 cachedrev = None
1219 cachedrev = None
1218 if node == nullid:
1220 if node == nullid:
1219 return ""
1221 return ""
1220 if self._cache:
1222 if self._cache:
1221 if self._cache[0] == node:
1223 if self._cache[0] == node:
1222 return self._cache[2]
1224 return self._cache[2]
1223 cachedrev = self._cache[1]
1225 cachedrev = self._cache[1]
1224
1226
1225 # look up what we need to read
1227 # look up what we need to read
1226 text = None
1228 text = None
1227 if rev is None:
1229 if rev is None:
1228 rev = self.rev(node)
1230 rev = self.rev(node)
1229
1231
1230 # check rev flags
1232 # check rev flags
1231 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1233 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1232 raise RevlogError(_('incompatible revision flag %x') %
1234 raise RevlogError(_('incompatible revision flag %x') %
1233 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1235 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1234
1236
1235 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1237 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1236 if stopped:
1238 if stopped:
1237 text = self._cache[2]
1239 text = self._cache[2]
1238
1240
1239 # drop cache to save memory
1241 # drop cache to save memory
1240 self._cache = None
1242 self._cache = None
1241
1243
1242 bins = self._chunks(chain, df=_df)
1244 bins = self._chunks(chain, df=_df)
1243 if text is None:
1245 if text is None:
1244 text = str(bins[0])
1246 text = str(bins[0])
1245 bins = bins[1:]
1247 bins = bins[1:]
1246
1248
1247 text = mdiff.patches(text, bins)
1249 text = mdiff.patches(text, bins)
1248
1250
1249 text = self._checkhash(text, node, rev)
1251 text = self._checkhash(text, node, rev)
1250
1252
1251 self._cache = (node, rev, text)
1253 self._cache = (node, rev, text)
1252 return text
1254 return text
1253
1255
1254 def hash(self, text, p1, p2):
1256 def hash(self, text, p1, p2):
1255 """Compute a node hash.
1257 """Compute a node hash.
1256
1258
1257 Available as a function so that subclasses can replace the hash
1259 Available as a function so that subclasses can replace the hash
1258 as needed.
1260 as needed.
1259 """
1261 """
1260 return hash(text, p1, p2)
1262 return hash(text, p1, p2)
1261
1263
1262 def _checkhash(self, text, node, rev):
1264 def _checkhash(self, text, node, rev):
1263 p1, p2 = self.parents(node)
1265 p1, p2 = self.parents(node)
1264 self.checkhash(text, p1, p2, node, rev)
1266 self.checkhash(text, p1, p2, node, rev)
1265 return text
1267 return text
1266
1268
1267 def checkhash(self, text, p1, p2, node, rev=None):
1269 def checkhash(self, text, p1, p2, node, rev=None):
1268 if node != self.hash(text, p1, p2):
1270 if node != self.hash(text, p1, p2):
1269 revornode = rev
1271 revornode = rev
1270 if revornode is None:
1272 if revornode is None:
1271 revornode = templatefilters.short(hex(node))
1273 revornode = templatefilters.short(hex(node))
1272 raise RevlogError(_("integrity check failed on %s:%s")
1274 raise RevlogError(_("integrity check failed on %s:%s")
1273 % (self.indexfile, revornode))
1275 % (self.indexfile, revornode))
1274
1276
1275 def checkinlinesize(self, tr, fp=None):
1277 def checkinlinesize(self, tr, fp=None):
1276 """Check if the revlog is too big for inline and convert if so.
1278 """Check if the revlog is too big for inline and convert if so.
1277
1279
1278 This should be called after revisions are added to the revlog. If the
1280 This should be called after revisions are added to the revlog. If the
1279 revlog has grown too large to be an inline revlog, it will convert it
1281 revlog has grown too large to be an inline revlog, it will convert it
1280 to use multiple index and data files.
1282 to use multiple index and data files.
1281 """
1283 """
1282 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1284 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1283 return
1285 return
1284
1286
1285 trinfo = tr.find(self.indexfile)
1287 trinfo = tr.find(self.indexfile)
1286 if trinfo is None:
1288 if trinfo is None:
1287 raise RevlogError(_("%s not found in the transaction")
1289 raise RevlogError(_("%s not found in the transaction")
1288 % self.indexfile)
1290 % self.indexfile)
1289
1291
1290 trindex = trinfo[2]
1292 trindex = trinfo[2]
1291 if trindex is not None:
1293 if trindex is not None:
1292 dataoff = self.start(trindex)
1294 dataoff = self.start(trindex)
1293 else:
1295 else:
1294 # revlog was stripped at start of transaction, use all leftover data
1296 # revlog was stripped at start of transaction, use all leftover data
1295 trindex = len(self) - 1
1297 trindex = len(self) - 1
1296 dataoff = self.end(-2)
1298 dataoff = self.end(-2)
1297
1299
1298 tr.add(self.datafile, dataoff)
1300 tr.add(self.datafile, dataoff)
1299
1301
1300 if fp:
1302 if fp:
1301 fp.flush()
1303 fp.flush()
1302 fp.close()
1304 fp.close()
1303
1305
1304 df = self.opener(self.datafile, 'w')
1306 df = self.opener(self.datafile, 'w')
1305 try:
1307 try:
1306 for r in self:
1308 for r in self:
1307 df.write(self._chunkraw(r, r)[1])
1309 df.write(self._chunkraw(r, r)[1])
1308 finally:
1310 finally:
1309 df.close()
1311 df.close()
1310
1312
1311 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1313 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1312 checkambig=self._checkambig)
1314 checkambig=self._checkambig)
1313 self.version &= ~(REVLOGNGINLINEDATA)
1315 self.version &= ~(REVLOGNGINLINEDATA)
1314 self._inline = False
1316 self._inline = False
1315 for i in self:
1317 for i in self:
1316 e = self._io.packentry(self.index[i], self.node, self.version, i)
1318 e = self._io.packentry(self.index[i], self.node, self.version, i)
1317 fp.write(e)
1319 fp.write(e)
1318
1320
1319 # if we don't call close, the temp file will never replace the
1321 # if we don't call close, the temp file will never replace the
1320 # real index
1322 # real index
1321 fp.close()
1323 fp.close()
1322
1324
1323 tr.replace(self.indexfile, trindex * self._io.size)
1325 tr.replace(self.indexfile, trindex * self._io.size)
1324 self._chunkclear()
1326 self._chunkclear()
1325
1327
1326 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1328 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1327 node=None):
1329 node=None):
1328 """add a revision to the log
1330 """add a revision to the log
1329
1331
1330 text - the revision data to add
1332 text - the revision data to add
1331 transaction - the transaction object used for rollback
1333 transaction - the transaction object used for rollback
1332 link - the linkrev data to add
1334 link - the linkrev data to add
1333 p1, p2 - the parent nodeids of the revision
1335 p1, p2 - the parent nodeids of the revision
1334 cachedelta - an optional precomputed delta
1336 cachedelta - an optional precomputed delta
1335 node - nodeid of revision; typically node is not specified, and it is
1337 node - nodeid of revision; typically node is not specified, and it is
1336 computed by default as hash(text, p1, p2), however subclasses might
1338 computed by default as hash(text, p1, p2), however subclasses might
1337 use different hashing method (and override checkhash() in such case)
1339 use different hashing method (and override checkhash() in such case)
1338 """
1340 """
1339 if link == nullrev:
1341 if link == nullrev:
1340 raise RevlogError(_("attempted to add linkrev -1 to %s")
1342 raise RevlogError(_("attempted to add linkrev -1 to %s")
1341 % self.indexfile)
1343 % self.indexfile)
1342
1344
1343 if len(text) > _maxentrysize:
1345 if len(text) > _maxentrysize:
1344 raise RevlogError(
1346 raise RevlogError(
1345 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1347 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1346 % (self.indexfile, len(text)))
1348 % (self.indexfile, len(text)))
1347
1349
1348 node = node or self.hash(text, p1, p2)
1350 node = node or self.hash(text, p1, p2)
1349 if node in self.nodemap:
1351 if node in self.nodemap:
1350 return node
1352 return node
1351
1353
1352 dfh = None
1354 dfh = None
1353 if not self._inline:
1355 if not self._inline:
1354 dfh = self.opener(self.datafile, "a+")
1356 dfh = self.opener(self.datafile, "a+")
1355 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1357 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1356 try:
1358 try:
1357 return self._addrevision(node, text, transaction, link, p1, p2,
1359 return self._addrevision(node, text, transaction, link, p1, p2,
1358 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1360 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1359 finally:
1361 finally:
1360 if dfh:
1362 if dfh:
1361 dfh.close()
1363 dfh.close()
1362 ifh.close()
1364 ifh.close()
1363
1365
1364 def compress(self, text):
1366 def compress(self, text):
1365 """ generate a possibly-compressed representation of text """
1367 """ generate a possibly-compressed representation of text """
1366 if not text:
1368 if not text:
1367 return ("", text)
1369 return ("", text)
1368 l = len(text)
1370 l = len(text)
1369 bin = None
1371 bin = None
1370 if l < 44:
1372 if l < 44:
1371 pass
1373 pass
1372 elif l > 1000000:
1374 elif l > 1000000:
1373 # zlib makes an internal copy, thus doubling memory usage for
1375 # zlib makes an internal copy, thus doubling memory usage for
1374 # large files, so lets do this in pieces
1376 # large files, so lets do this in pieces
1375 z = zlib.compressobj()
1377 z = zlib.compressobj()
1376 p = []
1378 p = []
1377 pos = 0
1379 pos = 0
1378 while pos < l:
1380 while pos < l:
1379 pos2 = pos + 2**20
1381 pos2 = pos + 2**20
1380 p.append(z.compress(text[pos:pos2]))
1382 p.append(z.compress(text[pos:pos2]))
1381 pos = pos2
1383 pos = pos2
1382 p.append(z.flush())
1384 p.append(z.flush())
1383 if sum(map(len, p)) < l:
1385 if sum(map(len, p)) < l:
1384 bin = "".join(p)
1386 bin = "".join(p)
1385 else:
1387 else:
1386 bin = _compress(text)
1388 bin = _compress(text)
1387 if bin is None or len(bin) > l:
1389 if bin is None or len(bin) > l:
1388 if text[0] == '\0':
1390 if text[0] == '\0':
1389 return ("", text)
1391 return ("", text)
1390 return ('u', text)
1392 return ('u', text)
1391 return ("", bin)
1393 return ("", bin)
1392
1394
1393 def _isgooddelta(self, d, textlen):
1395 def _isgooddelta(self, d, textlen):
1394 """Returns True if the given delta is good. Good means that it is within
1396 """Returns True if the given delta is good. Good means that it is within
1395 the disk span, disk size, and chain length bounds that we know to be
1397 the disk span, disk size, and chain length bounds that we know to be
1396 performant."""
1398 performant."""
1397 if d is None:
1399 if d is None:
1398 return False
1400 return False
1399
1401
1400 # - 'dist' is the distance from the base revision -- bounding it limits
1402 # - 'dist' is the distance from the base revision -- bounding it limits
1401 # the amount of I/O we need to do.
1403 # the amount of I/O we need to do.
1402 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1404 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1403 # to apply -- bounding it limits the amount of CPU we consume.
1405 # to apply -- bounding it limits the amount of CPU we consume.
1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1406 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405 if (dist > textlen * 4 or l > textlen or
1407 if (dist > textlen * 4 or l > textlen or
1406 compresseddeltalen > textlen * 2 or
1408 compresseddeltalen > textlen * 2 or
1407 (self._maxchainlen and chainlen > self._maxchainlen)):
1409 (self._maxchainlen and chainlen > self._maxchainlen)):
1408 return False
1410 return False
1409
1411
1410 return True
1412 return True
1411
1413
1412 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1414 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1413 cachedelta, ifh, dfh, alwayscache=False):
1415 cachedelta, ifh, dfh, alwayscache=False):
1414 """internal function to add revisions to the log
1416 """internal function to add revisions to the log
1415
1417
1416 see addrevision for argument descriptions.
1418 see addrevision for argument descriptions.
1417 invariants:
1419 invariants:
1418 - text is optional (can be None); if not set, cachedelta must be set.
1420 - text is optional (can be None); if not set, cachedelta must be set.
1419 if both are set, they must correspond to each other.
1421 if both are set, they must correspond to each other.
1420 """
1422 """
1421 btext = [text]
1423 btext = [text]
1422 def buildtext():
1424 def buildtext():
1423 if btext[0] is not None:
1425 if btext[0] is not None:
1424 return btext[0]
1426 return btext[0]
1425 baserev = cachedelta[0]
1427 baserev = cachedelta[0]
1426 delta = cachedelta[1]
1428 delta = cachedelta[1]
1427 # special case deltas which replace entire base; no need to decode
1429 # special case deltas which replace entire base; no need to decode
1428 # base revision. this neatly avoids censored bases, which throw when
1430 # base revision. this neatly avoids censored bases, which throw when
1429 # they're decoded.
1431 # they're decoded.
1430 hlen = struct.calcsize(">lll")
1432 hlen = struct.calcsize(">lll")
1431 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1433 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1432 len(delta) - hlen):
1434 len(delta) - hlen):
1433 btext[0] = delta[hlen:]
1435 btext[0] = delta[hlen:]
1434 else:
1436 else:
1435 if self._inline:
1437 if self._inline:
1436 fh = ifh
1438 fh = ifh
1437 else:
1439 else:
1438 fh = dfh
1440 fh = dfh
1439 basetext = self.revision(self.node(baserev), _df=fh)
1441 basetext = self.revision(self.node(baserev), _df=fh)
1440 btext[0] = mdiff.patch(basetext, delta)
1442 btext[0] = mdiff.patch(basetext, delta)
1441 try:
1443 try:
1442 self.checkhash(btext[0], p1, p2, node)
1444 self.checkhash(btext[0], p1, p2, node)
1443 if flags & REVIDX_ISCENSORED:
1445 if flags & REVIDX_ISCENSORED:
1444 raise RevlogError(_('node %s is not censored') % node)
1446 raise RevlogError(_('node %s is not censored') % node)
1445 except CensoredNodeError:
1447 except CensoredNodeError:
1446 # must pass the censored index flag to add censored revisions
1448 # must pass the censored index flag to add censored revisions
1447 if not flags & REVIDX_ISCENSORED:
1449 if not flags & REVIDX_ISCENSORED:
1448 raise
1450 raise
1449 return btext[0]
1451 return btext[0]
1450
1452
1451 def builddelta(rev):
1453 def builddelta(rev):
1452 # can we use the cached delta?
1454 # can we use the cached delta?
1453 if cachedelta and cachedelta[0] == rev:
1455 if cachedelta and cachedelta[0] == rev:
1454 delta = cachedelta[1]
1456 delta = cachedelta[1]
1455 else:
1457 else:
1456 t = buildtext()
1458 t = buildtext()
1457 if self.iscensored(rev):
1459 if self.iscensored(rev):
1458 # deltas based on a censored revision must replace the
1460 # deltas based on a censored revision must replace the
1459 # full content in one patch, so delta works everywhere
1461 # full content in one patch, so delta works everywhere
1460 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1462 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1461 delta = header + t
1463 delta = header + t
1462 else:
1464 else:
1463 if self._inline:
1465 if self._inline:
1464 fh = ifh
1466 fh = ifh
1465 else:
1467 else:
1466 fh = dfh
1468 fh = dfh
1467 ptext = self.revision(self.node(rev), _df=fh)
1469 ptext = self.revision(self.node(rev), _df=fh)
1468 delta = mdiff.textdiff(ptext, t)
1470 delta = mdiff.textdiff(ptext, t)
1469 header, data = self.compress(delta)
1471 header, data = self.compress(delta)
1470 deltalen = len(header) + len(data)
1472 deltalen = len(header) + len(data)
1471 chainbase = self.chainbase(rev)
1473 chainbase = self.chainbase(rev)
1472 dist = deltalen + offset - self.start(chainbase)
1474 dist = deltalen + offset - self.start(chainbase)
1473 if self._generaldelta:
1475 if self._generaldelta:
1474 base = rev
1476 base = rev
1475 else:
1477 else:
1476 base = chainbase
1478 base = chainbase
1477 chainlen, compresseddeltalen = self._chaininfo(rev)
1479 chainlen, compresseddeltalen = self._chaininfo(rev)
1478 chainlen += 1
1480 chainlen += 1
1479 compresseddeltalen += deltalen
1481 compresseddeltalen += deltalen
1480 return (dist, deltalen, (header, data), base,
1482 return (dist, deltalen, (header, data), base,
1481 chainbase, chainlen, compresseddeltalen)
1483 chainbase, chainlen, compresseddeltalen)
1482
1484
1483 curr = len(self)
1485 curr = len(self)
1484 prev = curr - 1
1486 prev = curr - 1
1485 offset = self.end(prev)
1487 offset = self.end(prev)
1486 delta = None
1488 delta = None
1487 p1r, p2r = self.rev(p1), self.rev(p2)
1489 p1r, p2r = self.rev(p1), self.rev(p2)
1488
1490
1489 # full versions are inserted when the needed deltas
1491 # full versions are inserted when the needed deltas
1490 # become comparable to the uncompressed text
1492 # become comparable to the uncompressed text
1491 if text is None:
1493 if text is None:
1492 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1494 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1493 cachedelta[1])
1495 cachedelta[1])
1494 else:
1496 else:
1495 textlen = len(text)
1497 textlen = len(text)
1496
1498
1497 # should we try to build a delta?
1499 # should we try to build a delta?
1498 if prev != nullrev and self.storedeltachains:
1500 if prev != nullrev and self.storedeltachains:
1499 tested = set()
1501 tested = set()
1500 # This condition is true most of the time when processing
1502 # This condition is true most of the time when processing
1501 # changegroup data into a generaldelta repo. The only time it
1503 # changegroup data into a generaldelta repo. The only time it
1502 # isn't true is if this is the first revision in a delta chain
1504 # isn't true is if this is the first revision in a delta chain
1503 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1505 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1504 if cachedelta and self._generaldelta and self._lazydeltabase:
1506 if cachedelta and self._generaldelta and self._lazydeltabase:
1505 # Assume what we received from the server is a good choice
1507 # Assume what we received from the server is a good choice
1506 # build delta will reuse the cache
1508 # build delta will reuse the cache
1507 candidatedelta = builddelta(cachedelta[0])
1509 candidatedelta = builddelta(cachedelta[0])
1508 tested.add(cachedelta[0])
1510 tested.add(cachedelta[0])
1509 if self._isgooddelta(candidatedelta, textlen):
1511 if self._isgooddelta(candidatedelta, textlen):
1510 delta = candidatedelta
1512 delta = candidatedelta
1511 if delta is None and self._generaldelta:
1513 if delta is None and self._generaldelta:
1512 # exclude already lazy tested base if any
1514 # exclude already lazy tested base if any
1513 parents = [p for p in (p1r, p2r)
1515 parents = [p for p in (p1r, p2r)
1514 if p != nullrev and p not in tested]
1516 if p != nullrev and p not in tested]
1515 if parents and not self._aggressivemergedeltas:
1517 if parents and not self._aggressivemergedeltas:
1516 # Pick whichever parent is closer to us (to minimize the
1518 # Pick whichever parent is closer to us (to minimize the
1517 # chance of having to build a fulltext).
1519 # chance of having to build a fulltext).
1518 parents = [max(parents)]
1520 parents = [max(parents)]
1519 tested.update(parents)
1521 tested.update(parents)
1520 pdeltas = []
1522 pdeltas = []
1521 for p in parents:
1523 for p in parents:
1522 pd = builddelta(p)
1524 pd = builddelta(p)
1523 if self._isgooddelta(pd, textlen):
1525 if self._isgooddelta(pd, textlen):
1524 pdeltas.append(pd)
1526 pdeltas.append(pd)
1525 if pdeltas:
1527 if pdeltas:
1526 delta = min(pdeltas, key=lambda x: x[1])
1528 delta = min(pdeltas, key=lambda x: x[1])
1527 if delta is None and prev not in tested:
1529 if delta is None and prev not in tested:
1528 # other approach failed try against prev to hopefully save us a
1530 # other approach failed try against prev to hopefully save us a
1529 # fulltext.
1531 # fulltext.
1530 candidatedelta = builddelta(prev)
1532 candidatedelta = builddelta(prev)
1531 if self._isgooddelta(candidatedelta, textlen):
1533 if self._isgooddelta(candidatedelta, textlen):
1532 delta = candidatedelta
1534 delta = candidatedelta
1533 if delta is not None:
1535 if delta is not None:
1534 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1536 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1535 else:
1537 else:
1536 text = buildtext()
1538 text = buildtext()
1537 data = self.compress(text)
1539 data = self.compress(text)
1538 l = len(data[1]) + len(data[0])
1540 l = len(data[1]) + len(data[0])
1539 base = chainbase = curr
1541 base = chainbase = curr
1540
1542
1541 e = (offset_type(offset, flags), l, textlen,
1543 e = (offset_type(offset, flags), l, textlen,
1542 base, link, p1r, p2r, node)
1544 base, link, p1r, p2r, node)
1543 self.index.insert(-1, e)
1545 self.index.insert(-1, e)
1544 self.nodemap[node] = curr
1546 self.nodemap[node] = curr
1545
1547
1546 entry = self._io.packentry(e, self.node, self.version, curr)
1548 entry = self._io.packentry(e, self.node, self.version, curr)
1547 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1549 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1548
1550
1549 if alwayscache and text is None:
1551 if alwayscache and text is None:
1550 text = buildtext()
1552 text = buildtext()
1551
1553
1552 if type(text) == str: # only accept immutable objects
1554 if type(text) == str: # only accept immutable objects
1553 self._cache = (node, curr, text)
1555 self._cache = (node, curr, text)
1554 self._chainbasecache[curr] = chainbase
1556 self._chainbasecache[curr] = chainbase
1555 return node
1557 return node
1556
1558
1557 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1559 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1558 # Files opened in a+ mode have inconsistent behavior on various
1560 # Files opened in a+ mode have inconsistent behavior on various
1559 # platforms. Windows requires that a file positioning call be made
1561 # platforms. Windows requires that a file positioning call be made
1560 # when the file handle transitions between reads and writes. See
1562 # when the file handle transitions between reads and writes. See
1561 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1562 # platforms, Python or the platform itself can be buggy. Some versions
1564 # platforms, Python or the platform itself can be buggy. Some versions
1563 # of Solaris have been observed to not append at the end of the file
1565 # of Solaris have been observed to not append at the end of the file
1564 # if the file was seeked to before the end. See issue4943 for more.
1566 # if the file was seeked to before the end. See issue4943 for more.
1565 #
1567 #
1566 # We work around this issue by inserting a seek() before writing.
1568 # We work around this issue by inserting a seek() before writing.
1567 # Note: This is likely not necessary on Python 3.
1569 # Note: This is likely not necessary on Python 3.
1568 ifh.seek(0, os.SEEK_END)
1570 ifh.seek(0, os.SEEK_END)
1569 if dfh:
1571 if dfh:
1570 dfh.seek(0, os.SEEK_END)
1572 dfh.seek(0, os.SEEK_END)
1571
1573
1572 curr = len(self) - 1
1574 curr = len(self) - 1
1573 if not self._inline:
1575 if not self._inline:
1574 transaction.add(self.datafile, offset)
1576 transaction.add(self.datafile, offset)
1575 transaction.add(self.indexfile, curr * len(entry))
1577 transaction.add(self.indexfile, curr * len(entry))
1576 if data[0]:
1578 if data[0]:
1577 dfh.write(data[0])
1579 dfh.write(data[0])
1578 dfh.write(data[1])
1580 dfh.write(data[1])
1579 ifh.write(entry)
1581 ifh.write(entry)
1580 else:
1582 else:
1581 offset += curr * self._io.size
1583 offset += curr * self._io.size
1582 transaction.add(self.indexfile, offset, curr)
1584 transaction.add(self.indexfile, offset, curr)
1583 ifh.write(entry)
1585 ifh.write(entry)
1584 ifh.write(data[0])
1586 ifh.write(data[0])
1585 ifh.write(data[1])
1587 ifh.write(data[1])
1586 self.checkinlinesize(transaction, ifh)
1588 self.checkinlinesize(transaction, ifh)
1587
1589
1588 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1590 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1589 """
1591 """
1590 add a delta group
1592 add a delta group
1591
1593
1592 given a set of deltas, add them to the revision log. the
1594 given a set of deltas, add them to the revision log. the
1593 first delta is against its parent, which should be in our
1595 first delta is against its parent, which should be in our
1594 log, the rest are against the previous delta.
1596 log, the rest are against the previous delta.
1595
1597
1596 If ``addrevisioncb`` is defined, it will be called with arguments of
1598 If ``addrevisioncb`` is defined, it will be called with arguments of
1597 this revlog and the node that was added.
1599 this revlog and the node that was added.
1598 """
1600 """
1599
1601
1600 # track the base of the current delta log
1602 # track the base of the current delta log
1601 content = []
1603 content = []
1602 node = None
1604 node = None
1603
1605
1604 r = len(self)
1606 r = len(self)
1605 end = 0
1607 end = 0
1606 if r:
1608 if r:
1607 end = self.end(r - 1)
1609 end = self.end(r - 1)
1608 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1610 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1609 isize = r * self._io.size
1611 isize = r * self._io.size
1610 if self._inline:
1612 if self._inline:
1611 transaction.add(self.indexfile, end + isize, r)
1613 transaction.add(self.indexfile, end + isize, r)
1612 dfh = None
1614 dfh = None
1613 else:
1615 else:
1614 transaction.add(self.indexfile, isize, r)
1616 transaction.add(self.indexfile, isize, r)
1615 transaction.add(self.datafile, end)
1617 transaction.add(self.datafile, end)
1616 dfh = self.opener(self.datafile, "a+")
1618 dfh = self.opener(self.datafile, "a+")
1617 def flush():
1619 def flush():
1618 if dfh:
1620 if dfh:
1619 dfh.flush()
1621 dfh.flush()
1620 ifh.flush()
1622 ifh.flush()
1621 try:
1623 try:
1622 # loop through our set of deltas
1624 # loop through our set of deltas
1623 chain = None
1625 chain = None
1624 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1626 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1625 node = chunkdata['node']
1627 node = chunkdata['node']
1626 p1 = chunkdata['p1']
1628 p1 = chunkdata['p1']
1627 p2 = chunkdata['p2']
1629 p2 = chunkdata['p2']
1628 cs = chunkdata['cs']
1630 cs = chunkdata['cs']
1629 deltabase = chunkdata['deltabase']
1631 deltabase = chunkdata['deltabase']
1630 delta = chunkdata['delta']
1632 delta = chunkdata['delta']
1631 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1633 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1632
1634
1633 content.append(node)
1635 content.append(node)
1634
1636
1635 link = linkmapper(cs)
1637 link = linkmapper(cs)
1636 if node in self.nodemap:
1638 if node in self.nodemap:
1637 # this can happen if two branches make the same change
1639 # this can happen if two branches make the same change
1638 chain = node
1640 chain = node
1639 continue
1641 continue
1640
1642
1641 for p in (p1, p2):
1643 for p in (p1, p2):
1642 if p not in self.nodemap:
1644 if p not in self.nodemap:
1643 raise LookupError(p, self.indexfile,
1645 raise LookupError(p, self.indexfile,
1644 _('unknown parent'))
1646 _('unknown parent'))
1645
1647
1646 if deltabase not in self.nodemap:
1648 if deltabase not in self.nodemap:
1647 raise LookupError(deltabase, self.indexfile,
1649 raise LookupError(deltabase, self.indexfile,
1648 _('unknown delta base'))
1650 _('unknown delta base'))
1649
1651
1650 baserev = self.rev(deltabase)
1652 baserev = self.rev(deltabase)
1651
1653
1652 if baserev != nullrev and self.iscensored(baserev):
1654 if baserev != nullrev and self.iscensored(baserev):
1653 # if base is censored, delta must be full replacement in a
1655 # if base is censored, delta must be full replacement in a
1654 # single patch operation
1656 # single patch operation
1655 hlen = struct.calcsize(">lll")
1657 hlen = struct.calcsize(">lll")
1656 oldlen = self.rawsize(baserev)
1658 oldlen = self.rawsize(baserev)
1657 newlen = len(delta) - hlen
1659 newlen = len(delta) - hlen
1658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1660 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1659 raise error.CensoredBaseError(self.indexfile,
1661 raise error.CensoredBaseError(self.indexfile,
1660 self.node(baserev))
1662 self.node(baserev))
1661
1663
1662 if not flags and self._peek_iscensored(baserev, delta, flush):
1664 if not flags and self._peek_iscensored(baserev, delta, flush):
1663 flags |= REVIDX_ISCENSORED
1665 flags |= REVIDX_ISCENSORED
1664
1666
1665 # We assume consumers of addrevisioncb will want to retrieve
1667 # We assume consumers of addrevisioncb will want to retrieve
1666 # the added revision, which will require a call to
1668 # the added revision, which will require a call to
1667 # revision(). revision() will fast path if there is a cache
1669 # revision(). revision() will fast path if there is a cache
1668 # hit. So, we tell _addrevision() to always cache in this case.
1670 # hit. So, we tell _addrevision() to always cache in this case.
1669 chain = self._addrevision(node, None, transaction, link,
1671 chain = self._addrevision(node, None, transaction, link,
1670 p1, p2, flags, (baserev, delta),
1672 p1, p2, flags, (baserev, delta),
1671 ifh, dfh,
1673 ifh, dfh,
1672 alwayscache=bool(addrevisioncb))
1674 alwayscache=bool(addrevisioncb))
1673
1675
1674 if addrevisioncb:
1676 if addrevisioncb:
1675 addrevisioncb(self, chain)
1677 addrevisioncb(self, chain)
1676
1678
1677 if not dfh and not self._inline:
1679 if not dfh and not self._inline:
1678 # addrevision switched from inline to conventional
1680 # addrevision switched from inline to conventional
1679 # reopen the index
1681 # reopen the index
1680 ifh.close()
1682 ifh.close()
1681 dfh = self.opener(self.datafile, "a+")
1683 dfh = self.opener(self.datafile, "a+")
1682 ifh = self.opener(self.indexfile, "a+",
1684 ifh = self.opener(self.indexfile, "a+",
1683 checkambig=self._checkambig)
1685 checkambig=self._checkambig)
1684 finally:
1686 finally:
1685 if dfh:
1687 if dfh:
1686 dfh.close()
1688 dfh.close()
1687 ifh.close()
1689 ifh.close()
1688
1690
1689 return content
1691 return content
1690
1692
1691 def iscensored(self, rev):
1693 def iscensored(self, rev):
1692 """Check if a file revision is censored."""
1694 """Check if a file revision is censored."""
1693 return False
1695 return False
1694
1696
1695 def _peek_iscensored(self, baserev, delta, flush):
1697 def _peek_iscensored(self, baserev, delta, flush):
1696 """Quickly check if a delta produces a censored revision."""
1698 """Quickly check if a delta produces a censored revision."""
1697 return False
1699 return False
1698
1700
1699 def getstrippoint(self, minlink):
1701 def getstrippoint(self, minlink):
1700 """find the minimum rev that must be stripped to strip the linkrev
1702 """find the minimum rev that must be stripped to strip the linkrev
1701
1703
1702 Returns a tuple containing the minimum rev and a set of all revs that
1704 Returns a tuple containing the minimum rev and a set of all revs that
1703 have linkrevs that will be broken by this strip.
1705 have linkrevs that will be broken by this strip.
1704 """
1706 """
1705 brokenrevs = set()
1707 brokenrevs = set()
1706 strippoint = len(self)
1708 strippoint = len(self)
1707
1709
1708 heads = {}
1710 heads = {}
1709 futurelargelinkrevs = set()
1711 futurelargelinkrevs = set()
1710 for head in self.headrevs():
1712 for head in self.headrevs():
1711 headlinkrev = self.linkrev(head)
1713 headlinkrev = self.linkrev(head)
1712 heads[head] = headlinkrev
1714 heads[head] = headlinkrev
1713 if headlinkrev >= minlink:
1715 if headlinkrev >= minlink:
1714 futurelargelinkrevs.add(headlinkrev)
1716 futurelargelinkrevs.add(headlinkrev)
1715
1717
1716 # This algorithm involves walking down the rev graph, starting at the
1718 # This algorithm involves walking down the rev graph, starting at the
1717 # heads. Since the revs are topologically sorted according to linkrev,
1719 # heads. Since the revs are topologically sorted according to linkrev,
1718 # once all head linkrevs are below the minlink, we know there are
1720 # once all head linkrevs are below the minlink, we know there are
1719 # no more revs that could have a linkrev greater than minlink.
1721 # no more revs that could have a linkrev greater than minlink.
1720 # So we can stop walking.
1722 # So we can stop walking.
1721 while futurelargelinkrevs:
1723 while futurelargelinkrevs:
1722 strippoint -= 1
1724 strippoint -= 1
1723 linkrev = heads.pop(strippoint)
1725 linkrev = heads.pop(strippoint)
1724
1726
1725 if linkrev < minlink:
1727 if linkrev < minlink:
1726 brokenrevs.add(strippoint)
1728 brokenrevs.add(strippoint)
1727 else:
1729 else:
1728 futurelargelinkrevs.remove(linkrev)
1730 futurelargelinkrevs.remove(linkrev)
1729
1731
1730 for p in self.parentrevs(strippoint):
1732 for p in self.parentrevs(strippoint):
1731 if p != nullrev:
1733 if p != nullrev:
1732 plinkrev = self.linkrev(p)
1734 plinkrev = self.linkrev(p)
1733 heads[p] = plinkrev
1735 heads[p] = plinkrev
1734 if plinkrev >= minlink:
1736 if plinkrev >= minlink:
1735 futurelargelinkrevs.add(plinkrev)
1737 futurelargelinkrevs.add(plinkrev)
1736
1738
1737 return strippoint, brokenrevs
1739 return strippoint, brokenrevs
1738
1740
1739 def strip(self, minlink, transaction):
1741 def strip(self, minlink, transaction):
1740 """truncate the revlog on the first revision with a linkrev >= minlink
1742 """truncate the revlog on the first revision with a linkrev >= minlink
1741
1743
1742 This function is called when we're stripping revision minlink and
1744 This function is called when we're stripping revision minlink and
1743 its descendants from the repository.
1745 its descendants from the repository.
1744
1746
1745 We have to remove all revisions with linkrev >= minlink, because
1747 We have to remove all revisions with linkrev >= minlink, because
1746 the equivalent changelog revisions will be renumbered after the
1748 the equivalent changelog revisions will be renumbered after the
1747 strip.
1749 strip.
1748
1750
1749 So we truncate the revlog on the first of these revisions, and
1751 So we truncate the revlog on the first of these revisions, and
1750 trust that the caller has saved the revisions that shouldn't be
1752 trust that the caller has saved the revisions that shouldn't be
1751 removed and that it'll re-add them after this truncation.
1753 removed and that it'll re-add them after this truncation.
1752 """
1754 """
1753 if len(self) == 0:
1755 if len(self) == 0:
1754 return
1756 return
1755
1757
1756 rev, _ = self.getstrippoint(minlink)
1758 rev, _ = self.getstrippoint(minlink)
1757 if rev == len(self):
1759 if rev == len(self):
1758 return
1760 return
1759
1761
1760 # first truncate the files on disk
1762 # first truncate the files on disk
1761 end = self.start(rev)
1763 end = self.start(rev)
1762 if not self._inline:
1764 if not self._inline:
1763 transaction.add(self.datafile, end)
1765 transaction.add(self.datafile, end)
1764 end = rev * self._io.size
1766 end = rev * self._io.size
1765 else:
1767 else:
1766 end += rev * self._io.size
1768 end += rev * self._io.size
1767
1769
1768 transaction.add(self.indexfile, end)
1770 transaction.add(self.indexfile, end)
1769
1771
1770 # then reset internal state in memory to forget those revisions
1772 # then reset internal state in memory to forget those revisions
1771 self._cache = None
1773 self._cache = None
1772 self._chaininfocache = {}
1774 self._chaininfocache = {}
1773 self._chunkclear()
1775 self._chunkclear()
1774 for x in xrange(rev, len(self)):
1776 for x in xrange(rev, len(self)):
1775 del self.nodemap[self.node(x)]
1777 del self.nodemap[self.node(x)]
1776
1778
1777 del self.index[rev:-1]
1779 del self.index[rev:-1]
1778
1780
1779 def checksize(self):
1781 def checksize(self):
1780 expected = 0
1782 expected = 0
1781 if len(self):
1783 if len(self):
1782 expected = max(0, self.end(len(self) - 1))
1784 expected = max(0, self.end(len(self) - 1))
1783
1785
1784 try:
1786 try:
1785 f = self.opener(self.datafile)
1787 f = self.opener(self.datafile)
1786 f.seek(0, 2)
1788 f.seek(0, 2)
1787 actual = f.tell()
1789 actual = f.tell()
1788 f.close()
1790 f.close()
1789 dd = actual - expected
1791 dd = actual - expected
1790 except IOError as inst:
1792 except IOError as inst:
1791 if inst.errno != errno.ENOENT:
1793 if inst.errno != errno.ENOENT:
1792 raise
1794 raise
1793 dd = 0
1795 dd = 0
1794
1796
1795 try:
1797 try:
1796 f = self.opener(self.indexfile)
1798 f = self.opener(self.indexfile)
1797 f.seek(0, 2)
1799 f.seek(0, 2)
1798 actual = f.tell()
1800 actual = f.tell()
1799 f.close()
1801 f.close()
1800 s = self._io.size
1802 s = self._io.size
1801 i = max(0, actual // s)
1803 i = max(0, actual // s)
1802 di = actual - (i * s)
1804 di = actual - (i * s)
1803 if self._inline:
1805 if self._inline:
1804 databytes = 0
1806 databytes = 0
1805 for r in self:
1807 for r in self:
1806 databytes += max(0, self.length(r))
1808 databytes += max(0, self.length(r))
1807 dd = 0
1809 dd = 0
1808 di = actual - len(self) * s - databytes
1810 di = actual - len(self) * s - databytes
1809 except IOError as inst:
1811 except IOError as inst:
1810 if inst.errno != errno.ENOENT:
1812 if inst.errno != errno.ENOENT:
1811 raise
1813 raise
1812 di = 0
1814 di = 0
1813
1815
1814 return (dd, di)
1816 return (dd, di)
1815
1817
1816 def files(self):
1818 def files(self):
1817 res = [self.indexfile]
1819 res = [self.indexfile]
1818 if not self._inline:
1820 if not self._inline:
1819 res.append(self.datafile)
1821 res.append(self.datafile)
1820 return res
1822 return res
General Comments 0
You need to be logged in to leave comments. Login now