##// END OF EJS Templates
revlog: refactor delta chain computation into own function...
Gregory Szorc -
r27468:93ac15f0 default
parent child Browse files
Show More
@@ -1,1768 +1,1786 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import struct
19 import struct
20 import zlib
20 import zlib
21
21
22 # import stuff from node for others to import from revlog
22 # import stuff from node for others to import from revlog
23 from .node import (
23 from .node import (
24 bin,
24 bin,
25 hex,
25 hex,
26 nullid,
26 nullid,
27 nullrev,
27 nullrev,
28 )
28 )
29 from .i18n import _
29 from .i18n import _
30 from . import (
30 from . import (
31 ancestor,
31 ancestor,
32 error,
32 error,
33 mdiff,
33 mdiff,
34 parsers,
34 parsers,
35 templatefilters,
35 templatefilters,
36 util,
36 util,
37 )
37 )
38
38
39 _pack = struct.pack
39 _pack = struct.pack
40 _unpack = struct.unpack
40 _unpack = struct.unpack
41 _compress = zlib.compress
41 _compress = zlib.compress
42 _decompress = zlib.decompress
42 _decompress = zlib.decompress
43 _sha = util.sha1
43 _sha = util.sha1
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = _sha(nullid)
77 _nullhash = _sha(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = _sha(l[0])
95 s = _sha(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return bin[1:]
113 return bin[1:]
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
229 # begins at for a revision.
230 self._basecache = None
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
232 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
233 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
234 self._chunkcachesize = 65536
235 self._maxchainlen = None
235 self._maxchainlen = None
236 self._aggressivemergedeltas = False
236 self._aggressivemergedeltas = False
237 self.index = []
237 self.index = []
238 # Mapping of partial identifiers to full nodes.
238 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
239 self._pcache = {}
240 # Mapping of revision integer to full node.
240 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
241 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
242 self._nodepos = None
243
243
244 v = REVLOG_DEFAULT_VERSION
244 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 if 'revlogv1' in opts:
247 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
248 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
249 v |= REVLOGGENERALDELTA
250 else:
250 else:
251 v = 0
251 v = 0
252 if 'chunkcachesize' in opts:
252 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
253 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
254 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
255 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
256 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
259
260 if self._chunkcachesize <= 0:
260 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
262 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
265 'of 2') % self._chunkcachesize)
266
266
267 indexdata = ''
267 indexdata = ''
268 self._initempty = True
268 self._initempty = True
269 try:
269 try:
270 f = self.opener(self.indexfile)
270 f = self.opener(self.indexfile)
271 indexdata = f.read()
271 indexdata = f.read()
272 f.close()
272 f.close()
273 if len(indexdata) > 0:
273 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
275 self._initempty = False
276 except IOError as inst:
276 except IOError as inst:
277 if inst.errno != errno.ENOENT:
277 if inst.errno != errno.ENOENT:
278 raise
278 raise
279
279
280 self.version = v
280 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
281 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
282 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
283 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
284 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
285 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
287 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
290 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
291 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
292 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
293 % (self.indexfile, fmt))
294
294
295 self._io = revlogio()
295 self._io = revlogio()
296 if self.version == REVLOGV0:
296 if self.version == REVLOGV0:
297 self._io = revlogoldio()
297 self._io = revlogoldio()
298 try:
298 try:
299 d = self._io.parseindex(indexdata, self._inline)
299 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
300 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
302 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
303 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
304 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
305 if not self._chunkcache:
306 self._chunkclear()
306 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
307 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
308 self._chaininfocache = {}
309
309
310 def tip(self):
310 def tip(self):
311 return self.node(len(self.index) - 2)
311 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
312 def __contains__(self, rev):
313 return 0 <= rev < len(self)
313 return 0 <= rev < len(self)
314 def __len__(self):
314 def __len__(self):
315 return len(self.index) - 1
315 return len(self.index) - 1
316 def __iter__(self):
316 def __iter__(self):
317 return iter(xrange(len(self)))
317 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
318 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
319 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
320 step = 1
321 if stop is not None:
321 if stop is not None:
322 if start > stop:
322 if start > stop:
323 step = -1
323 step = -1
324 stop += step
324 stop += step
325 else:
325 else:
326 stop = len(self)
326 stop = len(self)
327 return xrange(start, stop, step)
327 return xrange(start, stop, step)
328
328
329 @util.propertycache
329 @util.propertycache
330 def nodemap(self):
330 def nodemap(self):
331 self.rev(self.node(0))
331 self.rev(self.node(0))
332 return self._nodecache
332 return self._nodecache
333
333
334 def hasnode(self, node):
334 def hasnode(self, node):
335 try:
335 try:
336 self.rev(node)
336 self.rev(node)
337 return True
337 return True
338 except KeyError:
338 except KeyError:
339 return False
339 return False
340
340
341 def clearcaches(self):
341 def clearcaches(self):
342 self._cache = None
342 self._cache = None
343 self._basecache = None
343 self._basecache = None
344 self._chunkcache = (0, '')
344 self._chunkcache = (0, '')
345 self._pcache = {}
345 self._pcache = {}
346
346
347 try:
347 try:
348 self._nodecache.clearcaches()
348 self._nodecache.clearcaches()
349 except AttributeError:
349 except AttributeError:
350 self._nodecache = {nullid: nullrev}
350 self._nodecache = {nullid: nullrev}
351 self._nodepos = None
351 self._nodepos = None
352
352
353 def rev(self, node):
353 def rev(self, node):
354 try:
354 try:
355 return self._nodecache[node]
355 return self._nodecache[node]
356 except TypeError:
356 except TypeError:
357 raise
357 raise
358 except RevlogError:
358 except RevlogError:
359 # parsers.c radix tree lookup failed
359 # parsers.c radix tree lookup failed
360 raise LookupError(node, self.indexfile, _('no node'))
360 raise LookupError(node, self.indexfile, _('no node'))
361 except KeyError:
361 except KeyError:
362 # pure python cache lookup failed
362 # pure python cache lookup failed
363 n = self._nodecache
363 n = self._nodecache
364 i = self.index
364 i = self.index
365 p = self._nodepos
365 p = self._nodepos
366 if p is None:
366 if p is None:
367 p = len(i) - 2
367 p = len(i) - 2
368 for r in xrange(p, -1, -1):
368 for r in xrange(p, -1, -1):
369 v = i[r][7]
369 v = i[r][7]
370 n[v] = r
370 n[v] = r
371 if v == node:
371 if v == node:
372 self._nodepos = r - 1
372 self._nodepos = r - 1
373 return r
373 return r
374 raise LookupError(node, self.indexfile, _('no node'))
374 raise LookupError(node, self.indexfile, _('no node'))
375
375
376 def node(self, rev):
376 def node(self, rev):
377 return self.index[rev][7]
377 return self.index[rev][7]
378 def linkrev(self, rev):
378 def linkrev(self, rev):
379 return self.index[rev][4]
379 return self.index[rev][4]
380 def parents(self, node):
380 def parents(self, node):
381 i = self.index
381 i = self.index
382 d = i[self.rev(node)]
382 d = i[self.rev(node)]
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 def parentrevs(self, rev):
384 def parentrevs(self, rev):
385 return self.index[rev][5:7]
385 return self.index[rev][5:7]
386 def start(self, rev):
386 def start(self, rev):
387 return int(self.index[rev][0] >> 16)
387 return int(self.index[rev][0] >> 16)
388 def end(self, rev):
388 def end(self, rev):
389 return self.start(rev) + self.length(rev)
389 return self.start(rev) + self.length(rev)
390 def length(self, rev):
390 def length(self, rev):
391 return self.index[rev][1]
391 return self.index[rev][1]
392 def chainbase(self, rev):
392 def chainbase(self, rev):
393 index = self.index
393 index = self.index
394 base = index[rev][3]
394 base = index[rev][3]
395 while base != rev:
395 while base != rev:
396 rev = base
396 rev = base
397 base = index[rev][3]
397 base = index[rev][3]
398 return base
398 return base
399 def chainlen(self, rev):
399 def chainlen(self, rev):
400 return self._chaininfo(rev)[0]
400 return self._chaininfo(rev)[0]
401
401
402 def _chaininfo(self, rev):
402 def _chaininfo(self, rev):
403 chaininfocache = self._chaininfocache
403 chaininfocache = self._chaininfocache
404 if rev in chaininfocache:
404 if rev in chaininfocache:
405 return chaininfocache[rev]
405 return chaininfocache[rev]
406 index = self.index
406 index = self.index
407 generaldelta = self._generaldelta
407 generaldelta = self._generaldelta
408 iterrev = rev
408 iterrev = rev
409 e = index[iterrev]
409 e = index[iterrev]
410 clen = 0
410 clen = 0
411 compresseddeltalen = 0
411 compresseddeltalen = 0
412 while iterrev != e[3]:
412 while iterrev != e[3]:
413 clen += 1
413 clen += 1
414 compresseddeltalen += e[1]
414 compresseddeltalen += e[1]
415 if generaldelta:
415 if generaldelta:
416 iterrev = e[3]
416 iterrev = e[3]
417 else:
417 else:
418 iterrev -= 1
418 iterrev -= 1
419 if iterrev in chaininfocache:
419 if iterrev in chaininfocache:
420 t = chaininfocache[iterrev]
420 t = chaininfocache[iterrev]
421 clen += t[0]
421 clen += t[0]
422 compresseddeltalen += t[1]
422 compresseddeltalen += t[1]
423 break
423 break
424 e = index[iterrev]
424 e = index[iterrev]
425 else:
425 else:
426 # Add text length of base since decompressing that also takes
426 # Add text length of base since decompressing that also takes
427 # work. For cache hits the length is already included.
427 # work. For cache hits the length is already included.
428 compresseddeltalen += e[1]
428 compresseddeltalen += e[1]
429 r = (clen, compresseddeltalen)
429 r = (clen, compresseddeltalen)
430 chaininfocache[rev] = r
430 chaininfocache[rev] = r
431 return r
431 return r
432
432
433 def _deltachain(self, rev, stoprev=None):
434 """Obtain the delta chain for a revision.
435
436 ``stoprev`` specifies a revision to stop at. If not specified, we
437 stop at the base of the chain.
438
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 revs in ascending order and ``stopped`` is a bool indicating whether
441 ``stoprev`` was hit.
442 """
443 chain = []
444
445 # Alias to prevent attribute lookup in tight loop.
446 index = self.index
447 generaldelta = self._generaldelta
448
449 iterrev = rev
450 e = index[iterrev]
451 while iterrev != e[3] and iterrev != stoprev:
452 chain.append(iterrev)
453 if generaldelta:
454 iterrev = e[3]
455 else:
456 iterrev -= 1
457 e = index[iterrev]
458
459 if iterrev == stoprev:
460 stopped = True
461 else:
462 chain.append(iterrev)
463 stopped = False
464
465 chain.reverse()
466 return chain, stopped
467
433 def flags(self, rev):
468 def flags(self, rev):
434 return self.index[rev][0] & 0xFFFF
469 return self.index[rev][0] & 0xFFFF
435 def rawsize(self, rev):
470 def rawsize(self, rev):
436 """return the length of the uncompressed text for a given revision"""
471 """return the length of the uncompressed text for a given revision"""
437 l = self.index[rev][2]
472 l = self.index[rev][2]
438 if l >= 0:
473 if l >= 0:
439 return l
474 return l
440
475
441 t = self.revision(self.node(rev))
476 t = self.revision(self.node(rev))
442 return len(t)
477 return len(t)
443 size = rawsize
478 size = rawsize
444
479
445 def ancestors(self, revs, stoprev=0, inclusive=False):
480 def ancestors(self, revs, stoprev=0, inclusive=False):
446 """Generate the ancestors of 'revs' in reverse topological order.
481 """Generate the ancestors of 'revs' in reverse topological order.
447 Does not generate revs lower than stoprev.
482 Does not generate revs lower than stoprev.
448
483
449 See the documentation for ancestor.lazyancestors for more details."""
484 See the documentation for ancestor.lazyancestors for more details."""
450
485
451 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
452 inclusive=inclusive)
487 inclusive=inclusive)
453
488
454 def descendants(self, revs):
489 def descendants(self, revs):
455 """Generate the descendants of 'revs' in revision order.
490 """Generate the descendants of 'revs' in revision order.
456
491
457 Yield a sequence of revision numbers starting with a child of
492 Yield a sequence of revision numbers starting with a child of
458 some rev in revs, i.e., each revision is *not* considered a
493 some rev in revs, i.e., each revision is *not* considered a
459 descendant of itself. Results are ordered by revision number (a
494 descendant of itself. Results are ordered by revision number (a
460 topological sort)."""
495 topological sort)."""
461 first = min(revs)
496 first = min(revs)
462 if first == nullrev:
497 if first == nullrev:
463 for i in self:
498 for i in self:
464 yield i
499 yield i
465 return
500 return
466
501
467 seen = set(revs)
502 seen = set(revs)
468 for i in self.revs(start=first + 1):
503 for i in self.revs(start=first + 1):
469 for x in self.parentrevs(i):
504 for x in self.parentrevs(i):
470 if x != nullrev and x in seen:
505 if x != nullrev and x in seen:
471 seen.add(i)
506 seen.add(i)
472 yield i
507 yield i
473 break
508 break
474
509
475 def findcommonmissing(self, common=None, heads=None):
510 def findcommonmissing(self, common=None, heads=None):
476 """Return a tuple of the ancestors of common and the ancestors of heads
511 """Return a tuple of the ancestors of common and the ancestors of heads
477 that are not ancestors of common. In revset terminology, we return the
512 that are not ancestors of common. In revset terminology, we return the
478 tuple:
513 tuple:
479
514
480 ::common, (::heads) - (::common)
515 ::common, (::heads) - (::common)
481
516
482 The list is sorted by revision number, meaning it is
517 The list is sorted by revision number, meaning it is
483 topologically sorted.
518 topologically sorted.
484
519
485 'heads' and 'common' are both lists of node IDs. If heads is
520 'heads' and 'common' are both lists of node IDs. If heads is
486 not supplied, uses all of the revlog's heads. If common is not
521 not supplied, uses all of the revlog's heads. If common is not
487 supplied, uses nullid."""
522 supplied, uses nullid."""
488 if common is None:
523 if common is None:
489 common = [nullid]
524 common = [nullid]
490 if heads is None:
525 if heads is None:
491 heads = self.heads()
526 heads = self.heads()
492
527
493 common = [self.rev(n) for n in common]
528 common = [self.rev(n) for n in common]
494 heads = [self.rev(n) for n in heads]
529 heads = [self.rev(n) for n in heads]
495
530
496 # we want the ancestors, but inclusive
531 # we want the ancestors, but inclusive
497 class lazyset(object):
532 class lazyset(object):
498 def __init__(self, lazyvalues):
533 def __init__(self, lazyvalues):
499 self.addedvalues = set()
534 self.addedvalues = set()
500 self.lazyvalues = lazyvalues
535 self.lazyvalues = lazyvalues
501
536
502 def __contains__(self, value):
537 def __contains__(self, value):
503 return value in self.addedvalues or value in self.lazyvalues
538 return value in self.addedvalues or value in self.lazyvalues
504
539
505 def __iter__(self):
540 def __iter__(self):
506 added = self.addedvalues
541 added = self.addedvalues
507 for r in added:
542 for r in added:
508 yield r
543 yield r
509 for r in self.lazyvalues:
544 for r in self.lazyvalues:
510 if not r in added:
545 if not r in added:
511 yield r
546 yield r
512
547
513 def add(self, value):
548 def add(self, value):
514 self.addedvalues.add(value)
549 self.addedvalues.add(value)
515
550
516 def update(self, values):
551 def update(self, values):
517 self.addedvalues.update(values)
552 self.addedvalues.update(values)
518
553
519 has = lazyset(self.ancestors(common))
554 has = lazyset(self.ancestors(common))
520 has.add(nullrev)
555 has.add(nullrev)
521 has.update(common)
556 has.update(common)
522
557
523 # take all ancestors from heads that aren't in has
558 # take all ancestors from heads that aren't in has
524 missing = set()
559 missing = set()
525 visit = collections.deque(r for r in heads if r not in has)
560 visit = collections.deque(r for r in heads if r not in has)
526 while visit:
561 while visit:
527 r = visit.popleft()
562 r = visit.popleft()
528 if r in missing:
563 if r in missing:
529 continue
564 continue
530 else:
565 else:
531 missing.add(r)
566 missing.add(r)
532 for p in self.parentrevs(r):
567 for p in self.parentrevs(r):
533 if p not in has:
568 if p not in has:
534 visit.append(p)
569 visit.append(p)
535 missing = list(missing)
570 missing = list(missing)
536 missing.sort()
571 missing.sort()
537 return has, [self.node(r) for r in missing]
572 return has, [self.node(r) for r in missing]
538
573
539 def incrementalmissingrevs(self, common=None):
574 def incrementalmissingrevs(self, common=None):
540 """Return an object that can be used to incrementally compute the
575 """Return an object that can be used to incrementally compute the
541 revision numbers of the ancestors of arbitrary sets that are not
576 revision numbers of the ancestors of arbitrary sets that are not
542 ancestors of common. This is an ancestor.incrementalmissingancestors
577 ancestors of common. This is an ancestor.incrementalmissingancestors
543 object.
578 object.
544
579
545 'common' is a list of revision numbers. If common is not supplied, uses
580 'common' is a list of revision numbers. If common is not supplied, uses
546 nullrev.
581 nullrev.
547 """
582 """
548 if common is None:
583 if common is None:
549 common = [nullrev]
584 common = [nullrev]
550
585
551 return ancestor.incrementalmissingancestors(self.parentrevs, common)
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
552
587
553 def findmissingrevs(self, common=None, heads=None):
588 def findmissingrevs(self, common=None, heads=None):
554 """Return the revision numbers of the ancestors of heads that
589 """Return the revision numbers of the ancestors of heads that
555 are not ancestors of common.
590 are not ancestors of common.
556
591
557 More specifically, return a list of revision numbers corresponding to
592 More specifically, return a list of revision numbers corresponding to
558 nodes N such that every N satisfies the following constraints:
593 nodes N such that every N satisfies the following constraints:
559
594
560 1. N is an ancestor of some node in 'heads'
595 1. N is an ancestor of some node in 'heads'
561 2. N is not an ancestor of any node in 'common'
596 2. N is not an ancestor of any node in 'common'
562
597
563 The list is sorted by revision number, meaning it is
598 The list is sorted by revision number, meaning it is
564 topologically sorted.
599 topologically sorted.
565
600
566 'heads' and 'common' are both lists of revision numbers. If heads is
601 'heads' and 'common' are both lists of revision numbers. If heads is
567 not supplied, uses all of the revlog's heads. If common is not
602 not supplied, uses all of the revlog's heads. If common is not
568 supplied, uses nullid."""
603 supplied, uses nullid."""
569 if common is None:
604 if common is None:
570 common = [nullrev]
605 common = [nullrev]
571 if heads is None:
606 if heads is None:
572 heads = self.headrevs()
607 heads = self.headrevs()
573
608
574 inc = self.incrementalmissingrevs(common=common)
609 inc = self.incrementalmissingrevs(common=common)
575 return inc.missingancestors(heads)
610 return inc.missingancestors(heads)
576
611
577 def findmissing(self, common=None, heads=None):
612 def findmissing(self, common=None, heads=None):
578 """Return the ancestors of heads that are not ancestors of common.
613 """Return the ancestors of heads that are not ancestors of common.
579
614
580 More specifically, return a list of nodes N such that every N
615 More specifically, return a list of nodes N such that every N
581 satisfies the following constraints:
616 satisfies the following constraints:
582
617
583 1. N is an ancestor of some node in 'heads'
618 1. N is an ancestor of some node in 'heads'
584 2. N is not an ancestor of any node in 'common'
619 2. N is not an ancestor of any node in 'common'
585
620
586 The list is sorted by revision number, meaning it is
621 The list is sorted by revision number, meaning it is
587 topologically sorted.
622 topologically sorted.
588
623
589 'heads' and 'common' are both lists of node IDs. If heads is
624 'heads' and 'common' are both lists of node IDs. If heads is
590 not supplied, uses all of the revlog's heads. If common is not
625 not supplied, uses all of the revlog's heads. If common is not
591 supplied, uses nullid."""
626 supplied, uses nullid."""
592 if common is None:
627 if common is None:
593 common = [nullid]
628 common = [nullid]
594 if heads is None:
629 if heads is None:
595 heads = self.heads()
630 heads = self.heads()
596
631
597 common = [self.rev(n) for n in common]
632 common = [self.rev(n) for n in common]
598 heads = [self.rev(n) for n in heads]
633 heads = [self.rev(n) for n in heads]
599
634
600 inc = self.incrementalmissingrevs(common=common)
635 inc = self.incrementalmissingrevs(common=common)
601 return [self.node(r) for r in inc.missingancestors(heads)]
636 return [self.node(r) for r in inc.missingancestors(heads)]
602
637
603 def nodesbetween(self, roots=None, heads=None):
638 def nodesbetween(self, roots=None, heads=None):
604 """Return a topological path from 'roots' to 'heads'.
639 """Return a topological path from 'roots' to 'heads'.
605
640
606 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
607 topologically sorted list of all nodes N that satisfy both of
642 topologically sorted list of all nodes N that satisfy both of
608 these constraints:
643 these constraints:
609
644
610 1. N is a descendant of some node in 'roots'
645 1. N is a descendant of some node in 'roots'
611 2. N is an ancestor of some node in 'heads'
646 2. N is an ancestor of some node in 'heads'
612
647
613 Every node is considered to be both a descendant and an ancestor
648 Every node is considered to be both a descendant and an ancestor
614 of itself, so every reachable node in 'roots' and 'heads' will be
649 of itself, so every reachable node in 'roots' and 'heads' will be
615 included in 'nodes'.
650 included in 'nodes'.
616
651
617 'outroots' is the list of reachable nodes in 'roots', i.e., the
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
618 subset of 'roots' that is returned in 'nodes'. Likewise,
653 subset of 'roots' that is returned in 'nodes'. Likewise,
619 'outheads' is the subset of 'heads' that is also in 'nodes'.
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
620
655
621 'roots' and 'heads' are both lists of node IDs. If 'roots' is
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
622 unspecified, uses nullid as the only root. If 'heads' is
657 unspecified, uses nullid as the only root. If 'heads' is
623 unspecified, uses list of all of the revlog's heads."""
658 unspecified, uses list of all of the revlog's heads."""
624 nonodes = ([], [], [])
659 nonodes = ([], [], [])
625 if roots is not None:
660 if roots is not None:
626 roots = list(roots)
661 roots = list(roots)
627 if not roots:
662 if not roots:
628 return nonodes
663 return nonodes
629 lowestrev = min([self.rev(n) for n in roots])
664 lowestrev = min([self.rev(n) for n in roots])
630 else:
665 else:
631 roots = [nullid] # Everybody's a descendant of nullid
666 roots = [nullid] # Everybody's a descendant of nullid
632 lowestrev = nullrev
667 lowestrev = nullrev
633 if (lowestrev == nullrev) and (heads is None):
668 if (lowestrev == nullrev) and (heads is None):
634 # We want _all_ the nodes!
669 # We want _all_ the nodes!
635 return ([self.node(r) for r in self], [nullid], list(self.heads()))
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
636 if heads is None:
671 if heads is None:
637 # All nodes are ancestors, so the latest ancestor is the last
672 # All nodes are ancestors, so the latest ancestor is the last
638 # node.
673 # node.
639 highestrev = len(self) - 1
674 highestrev = len(self) - 1
640 # Set ancestors to None to signal that every node is an ancestor.
675 # Set ancestors to None to signal that every node is an ancestor.
641 ancestors = None
676 ancestors = None
642 # Set heads to an empty dictionary for later discovery of heads
677 # Set heads to an empty dictionary for later discovery of heads
643 heads = {}
678 heads = {}
644 else:
679 else:
645 heads = list(heads)
680 heads = list(heads)
646 if not heads:
681 if not heads:
647 return nonodes
682 return nonodes
648 ancestors = set()
683 ancestors = set()
649 # Turn heads into a dictionary so we can remove 'fake' heads.
684 # Turn heads into a dictionary so we can remove 'fake' heads.
650 # Also, later we will be using it to filter out the heads we can't
685 # Also, later we will be using it to filter out the heads we can't
651 # find from roots.
686 # find from roots.
652 heads = dict.fromkeys(heads, False)
687 heads = dict.fromkeys(heads, False)
653 # Start at the top and keep marking parents until we're done.
688 # Start at the top and keep marking parents until we're done.
654 nodestotag = set(heads)
689 nodestotag = set(heads)
655 # Remember where the top was so we can use it as a limit later.
690 # Remember where the top was so we can use it as a limit later.
656 highestrev = max([self.rev(n) for n in nodestotag])
691 highestrev = max([self.rev(n) for n in nodestotag])
657 while nodestotag:
692 while nodestotag:
658 # grab a node to tag
693 # grab a node to tag
659 n = nodestotag.pop()
694 n = nodestotag.pop()
660 # Never tag nullid
695 # Never tag nullid
661 if n == nullid:
696 if n == nullid:
662 continue
697 continue
663 # A node's revision number represents its place in a
698 # A node's revision number represents its place in a
664 # topologically sorted list of nodes.
699 # topologically sorted list of nodes.
665 r = self.rev(n)
700 r = self.rev(n)
666 if r >= lowestrev:
701 if r >= lowestrev:
667 if n not in ancestors:
702 if n not in ancestors:
668 # If we are possibly a descendant of one of the roots
703 # If we are possibly a descendant of one of the roots
669 # and we haven't already been marked as an ancestor
704 # and we haven't already been marked as an ancestor
670 ancestors.add(n) # Mark as ancestor
705 ancestors.add(n) # Mark as ancestor
671 # Add non-nullid parents to list of nodes to tag.
706 # Add non-nullid parents to list of nodes to tag.
672 nodestotag.update([p for p in self.parents(n) if
707 nodestotag.update([p for p in self.parents(n) if
673 p != nullid])
708 p != nullid])
674 elif n in heads: # We've seen it before, is it a fake head?
709 elif n in heads: # We've seen it before, is it a fake head?
675 # So it is, real heads should not be the ancestors of
710 # So it is, real heads should not be the ancestors of
676 # any other heads.
711 # any other heads.
677 heads.pop(n)
712 heads.pop(n)
678 if not ancestors:
713 if not ancestors:
679 return nonodes
714 return nonodes
680 # Now that we have our set of ancestors, we want to remove any
715 # Now that we have our set of ancestors, we want to remove any
681 # roots that are not ancestors.
716 # roots that are not ancestors.
682
717
683 # If one of the roots was nullid, everything is included anyway.
718 # If one of the roots was nullid, everything is included anyway.
684 if lowestrev > nullrev:
719 if lowestrev > nullrev:
685 # But, since we weren't, let's recompute the lowest rev to not
720 # But, since we weren't, let's recompute the lowest rev to not
686 # include roots that aren't ancestors.
721 # include roots that aren't ancestors.
687
722
688 # Filter out roots that aren't ancestors of heads
723 # Filter out roots that aren't ancestors of heads
689 roots = [n for n in roots if n in ancestors]
724 roots = [n for n in roots if n in ancestors]
690 # Recompute the lowest revision
725 # Recompute the lowest revision
691 if roots:
726 if roots:
692 lowestrev = min([self.rev(n) for n in roots])
727 lowestrev = min([self.rev(n) for n in roots])
693 else:
728 else:
694 # No more roots? Return empty list
729 # No more roots? Return empty list
695 return nonodes
730 return nonodes
696 else:
731 else:
697 # We are descending from nullid, and don't need to care about
732 # We are descending from nullid, and don't need to care about
698 # any other roots.
733 # any other roots.
699 lowestrev = nullrev
734 lowestrev = nullrev
700 roots = [nullid]
735 roots = [nullid]
701 # Transform our roots list into a set.
736 # Transform our roots list into a set.
702 descendants = set(roots)
737 descendants = set(roots)
703 # Also, keep the original roots so we can filter out roots that aren't
738 # Also, keep the original roots so we can filter out roots that aren't
704 # 'real' roots (i.e. are descended from other roots).
739 # 'real' roots (i.e. are descended from other roots).
705 roots = descendants.copy()
740 roots = descendants.copy()
706 # Our topologically sorted list of output nodes.
741 # Our topologically sorted list of output nodes.
707 orderedout = []
742 orderedout = []
708 # Don't start at nullid since we don't want nullid in our output list,
743 # Don't start at nullid since we don't want nullid in our output list,
709 # and if nullid shows up in descendants, empty parents will look like
744 # and if nullid shows up in descendants, empty parents will look like
710 # they're descendants.
745 # they're descendants.
711 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
712 n = self.node(r)
747 n = self.node(r)
713 isdescendant = False
748 isdescendant = False
714 if lowestrev == nullrev: # Everybody is a descendant of nullid
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
715 isdescendant = True
750 isdescendant = True
716 elif n in descendants:
751 elif n in descendants:
717 # n is already a descendant
752 # n is already a descendant
718 isdescendant = True
753 isdescendant = True
719 # This check only needs to be done here because all the roots
754 # This check only needs to be done here because all the roots
720 # will start being marked is descendants before the loop.
755 # will start being marked is descendants before the loop.
721 if n in roots:
756 if n in roots:
722 # If n was a root, check if it's a 'real' root.
757 # If n was a root, check if it's a 'real' root.
723 p = tuple(self.parents(n))
758 p = tuple(self.parents(n))
724 # If any of its parents are descendants, it's not a root.
759 # If any of its parents are descendants, it's not a root.
725 if (p[0] in descendants) or (p[1] in descendants):
760 if (p[0] in descendants) or (p[1] in descendants):
726 roots.remove(n)
761 roots.remove(n)
727 else:
762 else:
728 p = tuple(self.parents(n))
763 p = tuple(self.parents(n))
729 # A node is a descendant if either of its parents are
764 # A node is a descendant if either of its parents are
730 # descendants. (We seeded the dependents list with the roots
765 # descendants. (We seeded the dependents list with the roots
731 # up there, remember?)
766 # up there, remember?)
732 if (p[0] in descendants) or (p[1] in descendants):
767 if (p[0] in descendants) or (p[1] in descendants):
733 descendants.add(n)
768 descendants.add(n)
734 isdescendant = True
769 isdescendant = True
735 if isdescendant and ((ancestors is None) or (n in ancestors)):
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
736 # Only include nodes that are both descendants and ancestors.
771 # Only include nodes that are both descendants and ancestors.
737 orderedout.append(n)
772 orderedout.append(n)
738 if (ancestors is not None) and (n in heads):
773 if (ancestors is not None) and (n in heads):
739 # We're trying to figure out which heads are reachable
774 # We're trying to figure out which heads are reachable
740 # from roots.
775 # from roots.
741 # Mark this head as having been reached
776 # Mark this head as having been reached
742 heads[n] = True
777 heads[n] = True
743 elif ancestors is None:
778 elif ancestors is None:
744 # Otherwise, we're trying to discover the heads.
779 # Otherwise, we're trying to discover the heads.
745 # Assume this is a head because if it isn't, the next step
780 # Assume this is a head because if it isn't, the next step
746 # will eventually remove it.
781 # will eventually remove it.
747 heads[n] = True
782 heads[n] = True
748 # But, obviously its parents aren't.
783 # But, obviously its parents aren't.
749 for p in self.parents(n):
784 for p in self.parents(n):
750 heads.pop(p, None)
785 heads.pop(p, None)
751 heads = [n for n, flag in heads.iteritems() if flag]
786 heads = [n for n, flag in heads.iteritems() if flag]
752 roots = list(roots)
787 roots = list(roots)
753 assert orderedout
788 assert orderedout
754 assert roots
789 assert roots
755 assert heads
790 assert heads
756 return (orderedout, roots, heads)
791 return (orderedout, roots, heads)
757
792
758 def headrevs(self):
793 def headrevs(self):
759 try:
794 try:
760 return self.index.headrevs()
795 return self.index.headrevs()
761 except AttributeError:
796 except AttributeError:
762 return self._headrevs()
797 return self._headrevs()
763
798
764 def computephases(self, roots):
799 def computephases(self, roots):
765 return self.index.computephasesmapsets(roots)
800 return self.index.computephasesmapsets(roots)
766
801
767 def _headrevs(self):
802 def _headrevs(self):
768 count = len(self)
803 count = len(self)
769 if not count:
804 if not count:
770 return [nullrev]
805 return [nullrev]
771 # we won't iter over filtered rev so nobody is a head at start
806 # we won't iter over filtered rev so nobody is a head at start
772 ishead = [0] * (count + 1)
807 ishead = [0] * (count + 1)
773 index = self.index
808 index = self.index
774 for r in self:
809 for r in self:
775 ishead[r] = 1 # I may be an head
810 ishead[r] = 1 # I may be an head
776 e = index[r]
811 e = index[r]
777 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
778 return [r for r, val in enumerate(ishead) if val]
813 return [r for r, val in enumerate(ishead) if val]
779
814
780 def heads(self, start=None, stop=None):
815 def heads(self, start=None, stop=None):
781 """return the list of all nodes that have no children
816 """return the list of all nodes that have no children
782
817
783 if start is specified, only heads that are descendants of
818 if start is specified, only heads that are descendants of
784 start will be returned
819 start will be returned
785 if stop is specified, it will consider all the revs from stop
820 if stop is specified, it will consider all the revs from stop
786 as if they had no children
821 as if they had no children
787 """
822 """
788 if start is None and stop is None:
823 if start is None and stop is None:
789 if not len(self):
824 if not len(self):
790 return [nullid]
825 return [nullid]
791 return [self.node(r) for r in self.headrevs()]
826 return [self.node(r) for r in self.headrevs()]
792
827
793 if start is None:
828 if start is None:
794 start = nullid
829 start = nullid
795 if stop is None:
830 if stop is None:
796 stop = []
831 stop = []
797 stoprevs = set([self.rev(n) for n in stop])
832 stoprevs = set([self.rev(n) for n in stop])
798 startrev = self.rev(start)
833 startrev = self.rev(start)
799 reachable = set((startrev,))
834 reachable = set((startrev,))
800 heads = set((startrev,))
835 heads = set((startrev,))
801
836
802 parentrevs = self.parentrevs
837 parentrevs = self.parentrevs
803 for r in self.revs(start=startrev + 1):
838 for r in self.revs(start=startrev + 1):
804 for p in parentrevs(r):
839 for p in parentrevs(r):
805 if p in reachable:
840 if p in reachable:
806 if r not in stoprevs:
841 if r not in stoprevs:
807 reachable.add(r)
842 reachable.add(r)
808 heads.add(r)
843 heads.add(r)
809 if p in heads and p not in stoprevs:
844 if p in heads and p not in stoprevs:
810 heads.remove(p)
845 heads.remove(p)
811
846
812 return [self.node(r) for r in heads]
847 return [self.node(r) for r in heads]
813
848
814 def children(self, node):
849 def children(self, node):
815 """find the children of a given node"""
850 """find the children of a given node"""
816 c = []
851 c = []
817 p = self.rev(node)
852 p = self.rev(node)
818 for r in self.revs(start=p + 1):
853 for r in self.revs(start=p + 1):
819 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
820 if prevs:
855 if prevs:
821 for pr in prevs:
856 for pr in prevs:
822 if pr == p:
857 if pr == p:
823 c.append(self.node(r))
858 c.append(self.node(r))
824 elif p == nullrev:
859 elif p == nullrev:
825 c.append(self.node(r))
860 c.append(self.node(r))
826 return c
861 return c
827
862
828 def descendant(self, start, end):
863 def descendant(self, start, end):
829 if start == nullrev:
864 if start == nullrev:
830 return True
865 return True
831 for i in self.descendants([start]):
866 for i in self.descendants([start]):
832 if i == end:
867 if i == end:
833 return True
868 return True
834 elif i > end:
869 elif i > end:
835 break
870 break
836 return False
871 return False
837
872
838 def commonancestorsheads(self, a, b):
873 def commonancestorsheads(self, a, b):
839 """calculate all the heads of the common ancestors of nodes a and b"""
874 """calculate all the heads of the common ancestors of nodes a and b"""
840 a, b = self.rev(a), self.rev(b)
875 a, b = self.rev(a), self.rev(b)
841 try:
876 try:
842 ancs = self.index.commonancestorsheads(a, b)
877 ancs = self.index.commonancestorsheads(a, b)
843 except (AttributeError, OverflowError): # C implementation failed
878 except (AttributeError, OverflowError): # C implementation failed
844 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
845 return map(self.node, ancs)
880 return map(self.node, ancs)
846
881
847 def isancestor(self, a, b):
882 def isancestor(self, a, b):
848 """return True if node a is an ancestor of node b
883 """return True if node a is an ancestor of node b
849
884
850 The implementation of this is trivial but the use of
885 The implementation of this is trivial but the use of
851 commonancestorsheads is not."""
886 commonancestorsheads is not."""
852 return a in self.commonancestorsheads(a, b)
887 return a in self.commonancestorsheads(a, b)
853
888
854 def ancestor(self, a, b):
889 def ancestor(self, a, b):
855 """calculate the "best" common ancestor of nodes a and b"""
890 """calculate the "best" common ancestor of nodes a and b"""
856
891
857 a, b = self.rev(a), self.rev(b)
892 a, b = self.rev(a), self.rev(b)
858 try:
893 try:
859 ancs = self.index.ancestors(a, b)
894 ancs = self.index.ancestors(a, b)
860 except (AttributeError, OverflowError):
895 except (AttributeError, OverflowError):
861 ancs = ancestor.ancestors(self.parentrevs, a, b)
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
862 if ancs:
897 if ancs:
863 # choose a consistent winner when there's a tie
898 # choose a consistent winner when there's a tie
864 return min(map(self.node, ancs))
899 return min(map(self.node, ancs))
865 return nullid
900 return nullid
866
901
867 def _match(self, id):
902 def _match(self, id):
868 if isinstance(id, int):
903 if isinstance(id, int):
869 # rev
904 # rev
870 return self.node(id)
905 return self.node(id)
871 if len(id) == 20:
906 if len(id) == 20:
872 # possibly a binary node
907 # possibly a binary node
873 # odds of a binary node being all hex in ASCII are 1 in 10**25
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
874 try:
909 try:
875 node = id
910 node = id
876 self.rev(node) # quick search the index
911 self.rev(node) # quick search the index
877 return node
912 return node
878 except LookupError:
913 except LookupError:
879 pass # may be partial hex id
914 pass # may be partial hex id
880 try:
915 try:
881 # str(rev)
916 # str(rev)
882 rev = int(id)
917 rev = int(id)
883 if str(rev) != id:
918 if str(rev) != id:
884 raise ValueError
919 raise ValueError
885 if rev < 0:
920 if rev < 0:
886 rev = len(self) + rev
921 rev = len(self) + rev
887 if rev < 0 or rev >= len(self):
922 if rev < 0 or rev >= len(self):
888 raise ValueError
923 raise ValueError
889 return self.node(rev)
924 return self.node(rev)
890 except (ValueError, OverflowError):
925 except (ValueError, OverflowError):
891 pass
926 pass
892 if len(id) == 40:
927 if len(id) == 40:
893 try:
928 try:
894 # a full hex nodeid?
929 # a full hex nodeid?
895 node = bin(id)
930 node = bin(id)
896 self.rev(node)
931 self.rev(node)
897 return node
932 return node
898 except (TypeError, LookupError):
933 except (TypeError, LookupError):
899 pass
934 pass
900
935
901 def _partialmatch(self, id):
936 def _partialmatch(self, id):
902 try:
937 try:
903 n = self.index.partialmatch(id)
938 n = self.index.partialmatch(id)
904 if n and self.hasnode(n):
939 if n and self.hasnode(n):
905 return n
940 return n
906 return None
941 return None
907 except RevlogError:
942 except RevlogError:
908 # parsers.c radix tree lookup gave multiple matches
943 # parsers.c radix tree lookup gave multiple matches
909 # fall through to slow path that filters hidden revisions
944 # fall through to slow path that filters hidden revisions
910 pass
945 pass
911 except (AttributeError, ValueError):
946 except (AttributeError, ValueError):
912 # we are pure python, or key was too short to search radix tree
947 # we are pure python, or key was too short to search radix tree
913 pass
948 pass
914
949
915 if id in self._pcache:
950 if id in self._pcache:
916 return self._pcache[id]
951 return self._pcache[id]
917
952
918 if len(id) < 40:
953 if len(id) < 40:
919 try:
954 try:
920 # hex(node)[:...]
955 # hex(node)[:...]
921 l = len(id) // 2 # grab an even number of digits
956 l = len(id) // 2 # grab an even number of digits
922 prefix = bin(id[:l * 2])
957 prefix = bin(id[:l * 2])
923 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
924 nl = [n for n in nl if hex(n).startswith(id) and
959 nl = [n for n in nl if hex(n).startswith(id) and
925 self.hasnode(n)]
960 self.hasnode(n)]
926 if len(nl) > 0:
961 if len(nl) > 0:
927 if len(nl) == 1:
962 if len(nl) == 1:
928 self._pcache[id] = nl[0]
963 self._pcache[id] = nl[0]
929 return nl[0]
964 return nl[0]
930 raise LookupError(id, self.indexfile,
965 raise LookupError(id, self.indexfile,
931 _('ambiguous identifier'))
966 _('ambiguous identifier'))
932 return None
967 return None
933 except TypeError:
968 except TypeError:
934 pass
969 pass
935
970
936 def lookup(self, id):
971 def lookup(self, id):
937 """locate a node based on:
972 """locate a node based on:
938 - revision number or str(revision number)
973 - revision number or str(revision number)
939 - nodeid or subset of hex nodeid
974 - nodeid or subset of hex nodeid
940 """
975 """
941 n = self._match(id)
976 n = self._match(id)
942 if n is not None:
977 if n is not None:
943 return n
978 return n
944 n = self._partialmatch(id)
979 n = self._partialmatch(id)
945 if n:
980 if n:
946 return n
981 return n
947
982
948 raise LookupError(id, self.indexfile, _('no match found'))
983 raise LookupError(id, self.indexfile, _('no match found'))
949
984
950 def cmp(self, node, text):
985 def cmp(self, node, text):
951 """compare text with a given file revision
986 """compare text with a given file revision
952
987
953 returns True if text is different than what is stored.
988 returns True if text is different than what is stored.
954 """
989 """
955 p1, p2 = self.parents(node)
990 p1, p2 = self.parents(node)
956 return hash(text, p1, p2) != node
991 return hash(text, p1, p2) != node
957
992
958 def _addchunk(self, offset, data):
993 def _addchunk(self, offset, data):
959 """Add a segment to the revlog cache.
994 """Add a segment to the revlog cache.
960
995
961 Accepts an absolute offset and the data that is at that location.
996 Accepts an absolute offset and the data that is at that location.
962 """
997 """
963 o, d = self._chunkcache
998 o, d = self._chunkcache
964 # try to add to existing cache
999 # try to add to existing cache
965 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
966 self._chunkcache = o, d + data
1001 self._chunkcache = o, d + data
967 else:
1002 else:
968 self._chunkcache = offset, data
1003 self._chunkcache = offset, data
969
1004
970 def _loadchunk(self, offset, length, df=None):
1005 def _loadchunk(self, offset, length, df=None):
971 """Load a segment of raw data from the revlog.
1006 """Load a segment of raw data from the revlog.
972
1007
973 Accepts an absolute offset, length to read, and an optional existing
1008 Accepts an absolute offset, length to read, and an optional existing
974 file handle to read from.
1009 file handle to read from.
975
1010
976 If an existing file handle is passed, it will be seeked and the
1011 If an existing file handle is passed, it will be seeked and the
977 original seek position will NOT be restored.
1012 original seek position will NOT be restored.
978
1013
979 Returns a str or buffer of raw byte data.
1014 Returns a str or buffer of raw byte data.
980 """
1015 """
981 if df is not None:
1016 if df is not None:
982 closehandle = False
1017 closehandle = False
983 else:
1018 else:
984 if self._inline:
1019 if self._inline:
985 df = self.opener(self.indexfile)
1020 df = self.opener(self.indexfile)
986 else:
1021 else:
987 df = self.opener(self.datafile)
1022 df = self.opener(self.datafile)
988 closehandle = True
1023 closehandle = True
989
1024
990 # Cache data both forward and backward around the requested
1025 # Cache data both forward and backward around the requested
991 # data, in a fixed size window. This helps speed up operations
1026 # data, in a fixed size window. This helps speed up operations
992 # involving reading the revlog backwards.
1027 # involving reading the revlog backwards.
993 cachesize = self._chunkcachesize
1028 cachesize = self._chunkcachesize
994 realoffset = offset & ~(cachesize - 1)
1029 realoffset = offset & ~(cachesize - 1)
995 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
996 - realoffset)
1031 - realoffset)
997 df.seek(realoffset)
1032 df.seek(realoffset)
998 d = df.read(reallength)
1033 d = df.read(reallength)
999 if closehandle:
1034 if closehandle:
1000 df.close()
1035 df.close()
1001 self._addchunk(realoffset, d)
1036 self._addchunk(realoffset, d)
1002 if offset != realoffset or reallength != length:
1037 if offset != realoffset or reallength != length:
1003 return util.buffer(d, offset - realoffset, length)
1038 return util.buffer(d, offset - realoffset, length)
1004 return d
1039 return d
1005
1040
1006 def _getchunk(self, offset, length, df=None):
1041 def _getchunk(self, offset, length, df=None):
1007 """Obtain a segment of raw data from the revlog.
1042 """Obtain a segment of raw data from the revlog.
1008
1043
1009 Accepts an absolute offset, length of bytes to obtain, and an
1044 Accepts an absolute offset, length of bytes to obtain, and an
1010 optional file handle to the already-opened revlog. If the file
1045 optional file handle to the already-opened revlog. If the file
1011 handle is used, it's original seek position will not be preserved.
1046 handle is used, it's original seek position will not be preserved.
1012
1047
1013 Requests for data may be returned from a cache.
1048 Requests for data may be returned from a cache.
1014
1049
1015 Returns a str or a buffer instance of raw byte data.
1050 Returns a str or a buffer instance of raw byte data.
1016 """
1051 """
1017 o, d = self._chunkcache
1052 o, d = self._chunkcache
1018 l = len(d)
1053 l = len(d)
1019
1054
1020 # is it in the cache?
1055 # is it in the cache?
1021 cachestart = offset - o
1056 cachestart = offset - o
1022 cacheend = cachestart + length
1057 cacheend = cachestart + length
1023 if cachestart >= 0 and cacheend <= l:
1058 if cachestart >= 0 and cacheend <= l:
1024 if cachestart == 0 and cacheend == l:
1059 if cachestart == 0 and cacheend == l:
1025 return d # avoid a copy
1060 return d # avoid a copy
1026 return util.buffer(d, cachestart, cacheend - cachestart)
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1027
1062
1028 return self._loadchunk(offset, length, df=df)
1063 return self._loadchunk(offset, length, df=df)
1029
1064
1030 def _chunkraw(self, startrev, endrev, df=None):
1065 def _chunkraw(self, startrev, endrev, df=None):
1031 """Obtain a segment of raw data corresponding to a range of revisions.
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1032
1067
1033 Accepts the start and end revisions and an optional already-open
1068 Accepts the start and end revisions and an optional already-open
1034 file handle to be used for reading. If the file handle is read, its
1069 file handle to be used for reading. If the file handle is read, its
1035 seek position will not be preserved.
1070 seek position will not be preserved.
1036
1071
1037 Requests for data may be satisfied by a cache.
1072 Requests for data may be satisfied by a cache.
1038
1073
1039 Returns a str or a buffer instance of raw byte data. Callers will
1074 Returns a str or a buffer instance of raw byte data. Callers will
1040 need to call ``self.start(rev)`` and ``self.length()`` to determine
1075 need to call ``self.start(rev)`` and ``self.length()`` to determine
1041 where each revision's data begins and ends.
1076 where each revision's data begins and ends.
1042 """
1077 """
1043 start = self.start(startrev)
1078 start = self.start(startrev)
1044 end = self.end(endrev)
1079 end = self.end(endrev)
1045 if self._inline:
1080 if self._inline:
1046 start += (startrev + 1) * self._io.size
1081 start += (startrev + 1) * self._io.size
1047 end += (endrev + 1) * self._io.size
1082 end += (endrev + 1) * self._io.size
1048 length = end - start
1083 length = end - start
1049 return self._getchunk(start, length, df=df)
1084 return self._getchunk(start, length, df=df)
1050
1085
1051 def _chunk(self, rev, df=None):
1086 def _chunk(self, rev, df=None):
1052 """Obtain a single decompressed chunk for a revision.
1087 """Obtain a single decompressed chunk for a revision.
1053
1088
1054 Accepts an integer revision and an optional already-open file handle
1089 Accepts an integer revision and an optional already-open file handle
1055 to be used for reading. If used, the seek position of the file will not
1090 to be used for reading. If used, the seek position of the file will not
1056 be preserved.
1091 be preserved.
1057
1092
1058 Returns a str holding uncompressed data for the requested revision.
1093 Returns a str holding uncompressed data for the requested revision.
1059 """
1094 """
1060 return decompress(self._chunkraw(rev, rev, df=df))
1095 return decompress(self._chunkraw(rev, rev, df=df))
1061
1096
1062 def _chunks(self, revs, df=None):
1097 def _chunks(self, revs, df=None):
1063 """Obtain decompressed chunks for the specified revisions.
1098 """Obtain decompressed chunks for the specified revisions.
1064
1099
1065 Accepts an iterable of numeric revisions that are assumed to be in
1100 Accepts an iterable of numeric revisions that are assumed to be in
1066 ascending order. Also accepts an optional already-open file handle
1101 ascending order. Also accepts an optional already-open file handle
1067 to be used for reading. If used, the seek position of the file will
1102 to be used for reading. If used, the seek position of the file will
1068 not be preserved.
1103 not be preserved.
1069
1104
1070 This function is similar to calling ``self._chunk()`` multiple times,
1105 This function is similar to calling ``self._chunk()`` multiple times,
1071 but is faster.
1106 but is faster.
1072
1107
1073 Returns a list with decompressed data for each requested revision.
1108 Returns a list with decompressed data for each requested revision.
1074 """
1109 """
1075 if not revs:
1110 if not revs:
1076 return []
1111 return []
1077 start = self.start
1112 start = self.start
1078 length = self.length
1113 length = self.length
1079 inline = self._inline
1114 inline = self._inline
1080 iosize = self._io.size
1115 iosize = self._io.size
1081 buffer = util.buffer
1116 buffer = util.buffer
1082
1117
1083 l = []
1118 l = []
1084 ladd = l.append
1119 ladd = l.append
1085
1120
1086 # preload the cache
1121 # preload the cache
1087 try:
1122 try:
1088 while True:
1123 while True:
1089 # ensure that the cache doesn't change out from under us
1124 # ensure that the cache doesn't change out from under us
1090 _cache = self._chunkcache
1125 _cache = self._chunkcache
1091 self._chunkraw(revs[0], revs[-1], df=df)
1126 self._chunkraw(revs[0], revs[-1], df=df)
1092 if _cache == self._chunkcache:
1127 if _cache == self._chunkcache:
1093 break
1128 break
1094 offset, data = _cache
1129 offset, data = _cache
1095 except OverflowError:
1130 except OverflowError:
1096 # issue4215 - we can't cache a run of chunks greater than
1131 # issue4215 - we can't cache a run of chunks greater than
1097 # 2G on Windows
1132 # 2G on Windows
1098 return [self._chunk(rev, df=df) for rev in revs]
1133 return [self._chunk(rev, df=df) for rev in revs]
1099
1134
1100 for rev in revs:
1135 for rev in revs:
1101 chunkstart = start(rev)
1136 chunkstart = start(rev)
1102 if inline:
1137 if inline:
1103 chunkstart += (rev + 1) * iosize
1138 chunkstart += (rev + 1) * iosize
1104 chunklength = length(rev)
1139 chunklength = length(rev)
1105 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1140 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1106
1141
1107 return l
1142 return l
1108
1143
1109 def _chunkclear(self):
1144 def _chunkclear(self):
1110 """Clear the raw chunk cache."""
1145 """Clear the raw chunk cache."""
1111 self._chunkcache = (0, '')
1146 self._chunkcache = (0, '')
1112
1147
1113 def deltaparent(self, rev):
1148 def deltaparent(self, rev):
1114 """return deltaparent of the given revision"""
1149 """return deltaparent of the given revision"""
1115 base = self.index[rev][3]
1150 base = self.index[rev][3]
1116 if base == rev:
1151 if base == rev:
1117 return nullrev
1152 return nullrev
1118 elif self._generaldelta:
1153 elif self._generaldelta:
1119 return base
1154 return base
1120 else:
1155 else:
1121 return rev - 1
1156 return rev - 1
1122
1157
1123 def revdiff(self, rev1, rev2):
1158 def revdiff(self, rev1, rev2):
1124 """return or calculate a delta between two revisions"""
1159 """return or calculate a delta between two revisions"""
1125 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1160 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1126 return str(self._chunk(rev2))
1161 return str(self._chunk(rev2))
1127
1162
1128 return mdiff.textdiff(self.revision(rev1),
1163 return mdiff.textdiff(self.revision(rev1),
1129 self.revision(rev2))
1164 self.revision(rev2))
1130
1165
1131 def revision(self, nodeorrev, _df=None):
1166 def revision(self, nodeorrev, _df=None):
1132 """return an uncompressed revision of a given node or revision
1167 """return an uncompressed revision of a given node or revision
1133 number.
1168 number.
1134
1169
1135 _df is an existing file handle to read from. It is meant to only be
1170 _df is an existing file handle to read from. It is meant to only be
1136 used internally.
1171 used internally.
1137 """
1172 """
1138 if isinstance(nodeorrev, int):
1173 if isinstance(nodeorrev, int):
1139 rev = nodeorrev
1174 rev = nodeorrev
1140 node = self.node(rev)
1175 node = self.node(rev)
1141 else:
1176 else:
1142 node = nodeorrev
1177 node = nodeorrev
1143 rev = None
1178 rev = None
1144
1179
1145 cachedrev = None
1180 cachedrev = None
1146 if node == nullid:
1181 if node == nullid:
1147 return ""
1182 return ""
1148 if self._cache:
1183 if self._cache:
1149 if self._cache[0] == node:
1184 if self._cache[0] == node:
1150 return self._cache[2]
1185 return self._cache[2]
1151 cachedrev = self._cache[1]
1186 cachedrev = self._cache[1]
1152
1187
1153 # look up what we need to read
1188 # look up what we need to read
1154 text = None
1189 text = None
1155 if rev is None:
1190 if rev is None:
1156 rev = self.rev(node)
1191 rev = self.rev(node)
1157
1192
1158 # check rev flags
1193 # check rev flags
1159 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1194 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1160 raise RevlogError(_('incompatible revision flag %x') %
1195 raise RevlogError(_('incompatible revision flag %x') %
1161 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1196 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1162
1197
1163 # build delta chain
1198 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1164 chain = []
1199 if stopped:
1165 index = self.index # for performance
1166 generaldelta = self._generaldelta
1167 iterrev = rev
1168 e = index[iterrev]
1169 while iterrev != e[3] and iterrev != cachedrev:
1170 chain.append(iterrev)
1171 if generaldelta:
1172 iterrev = e[3]
1173 else:
1174 iterrev -= 1
1175 e = index[iterrev]
1176
1177 if iterrev == cachedrev:
1178 # cache hit
1179 text = self._cache[2]
1200 text = self._cache[2]
1180 else:
1181 chain.append(iterrev)
1182 chain.reverse()
1183
1201
1184 # drop cache to save memory
1202 # drop cache to save memory
1185 self._cache = None
1203 self._cache = None
1186
1204
1187 bins = self._chunks(chain, df=_df)
1205 bins = self._chunks(chain, df=_df)
1188 if text is None:
1206 if text is None:
1189 text = str(bins[0])
1207 text = str(bins[0])
1190 bins = bins[1:]
1208 bins = bins[1:]
1191
1209
1192 text = mdiff.patches(text, bins)
1210 text = mdiff.patches(text, bins)
1193
1211
1194 text = self._checkhash(text, node, rev)
1212 text = self._checkhash(text, node, rev)
1195
1213
1196 self._cache = (node, rev, text)
1214 self._cache = (node, rev, text)
1197 return text
1215 return text
1198
1216
1199 def hash(self, text, p1, p2):
1217 def hash(self, text, p1, p2):
1200 """Compute a node hash.
1218 """Compute a node hash.
1201
1219
1202 Available as a function so that subclasses can replace the hash
1220 Available as a function so that subclasses can replace the hash
1203 as needed.
1221 as needed.
1204 """
1222 """
1205 return hash(text, p1, p2)
1223 return hash(text, p1, p2)
1206
1224
1207 def _checkhash(self, text, node, rev):
1225 def _checkhash(self, text, node, rev):
1208 p1, p2 = self.parents(node)
1226 p1, p2 = self.parents(node)
1209 self.checkhash(text, p1, p2, node, rev)
1227 self.checkhash(text, p1, p2, node, rev)
1210 return text
1228 return text
1211
1229
1212 def checkhash(self, text, p1, p2, node, rev=None):
1230 def checkhash(self, text, p1, p2, node, rev=None):
1213 if node != self.hash(text, p1, p2):
1231 if node != self.hash(text, p1, p2):
1214 revornode = rev
1232 revornode = rev
1215 if revornode is None:
1233 if revornode is None:
1216 revornode = templatefilters.short(hex(node))
1234 revornode = templatefilters.short(hex(node))
1217 raise RevlogError(_("integrity check failed on %s:%s")
1235 raise RevlogError(_("integrity check failed on %s:%s")
1218 % (self.indexfile, revornode))
1236 % (self.indexfile, revornode))
1219
1237
1220 def checkinlinesize(self, tr, fp=None):
1238 def checkinlinesize(self, tr, fp=None):
1221 """Check if the revlog is too big for inline and convert if so.
1239 """Check if the revlog is too big for inline and convert if so.
1222
1240
1223 This should be called after revisions are added to the revlog. If the
1241 This should be called after revisions are added to the revlog. If the
1224 revlog has grown too large to be an inline revlog, it will convert it
1242 revlog has grown too large to be an inline revlog, it will convert it
1225 to use multiple index and data files.
1243 to use multiple index and data files.
1226 """
1244 """
1227 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1245 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1228 return
1246 return
1229
1247
1230 trinfo = tr.find(self.indexfile)
1248 trinfo = tr.find(self.indexfile)
1231 if trinfo is None:
1249 if trinfo is None:
1232 raise RevlogError(_("%s not found in the transaction")
1250 raise RevlogError(_("%s not found in the transaction")
1233 % self.indexfile)
1251 % self.indexfile)
1234
1252
1235 trindex = trinfo[2]
1253 trindex = trinfo[2]
1236 if trindex is not None:
1254 if trindex is not None:
1237 dataoff = self.start(trindex)
1255 dataoff = self.start(trindex)
1238 else:
1256 else:
1239 # revlog was stripped at start of transaction, use all leftover data
1257 # revlog was stripped at start of transaction, use all leftover data
1240 trindex = len(self) - 1
1258 trindex = len(self) - 1
1241 dataoff = self.end(-2)
1259 dataoff = self.end(-2)
1242
1260
1243 tr.add(self.datafile, dataoff)
1261 tr.add(self.datafile, dataoff)
1244
1262
1245 if fp:
1263 if fp:
1246 fp.flush()
1264 fp.flush()
1247 fp.close()
1265 fp.close()
1248
1266
1249 df = self.opener(self.datafile, 'w')
1267 df = self.opener(self.datafile, 'w')
1250 try:
1268 try:
1251 for r in self:
1269 for r in self:
1252 df.write(self._chunkraw(r, r))
1270 df.write(self._chunkraw(r, r))
1253 finally:
1271 finally:
1254 df.close()
1272 df.close()
1255
1273
1256 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1274 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1257 self.version &= ~(REVLOGNGINLINEDATA)
1275 self.version &= ~(REVLOGNGINLINEDATA)
1258 self._inline = False
1276 self._inline = False
1259 for i in self:
1277 for i in self:
1260 e = self._io.packentry(self.index[i], self.node, self.version, i)
1278 e = self._io.packentry(self.index[i], self.node, self.version, i)
1261 fp.write(e)
1279 fp.write(e)
1262
1280
1263 # if we don't call close, the temp file will never replace the
1281 # if we don't call close, the temp file will never replace the
1264 # real index
1282 # real index
1265 fp.close()
1283 fp.close()
1266
1284
1267 tr.replace(self.indexfile, trindex * self._io.size)
1285 tr.replace(self.indexfile, trindex * self._io.size)
1268 self._chunkclear()
1286 self._chunkclear()
1269
1287
1270 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1288 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1271 node=None):
1289 node=None):
1272 """add a revision to the log
1290 """add a revision to the log
1273
1291
1274 text - the revision data to add
1292 text - the revision data to add
1275 transaction - the transaction object used for rollback
1293 transaction - the transaction object used for rollback
1276 link - the linkrev data to add
1294 link - the linkrev data to add
1277 p1, p2 - the parent nodeids of the revision
1295 p1, p2 - the parent nodeids of the revision
1278 cachedelta - an optional precomputed delta
1296 cachedelta - an optional precomputed delta
1279 node - nodeid of revision; typically node is not specified, and it is
1297 node - nodeid of revision; typically node is not specified, and it is
1280 computed by default as hash(text, p1, p2), however subclasses might
1298 computed by default as hash(text, p1, p2), however subclasses might
1281 use different hashing method (and override checkhash() in such case)
1299 use different hashing method (and override checkhash() in such case)
1282 """
1300 """
1283 if link == nullrev:
1301 if link == nullrev:
1284 raise RevlogError(_("attempted to add linkrev -1 to %s")
1302 raise RevlogError(_("attempted to add linkrev -1 to %s")
1285 % self.indexfile)
1303 % self.indexfile)
1286
1304
1287 if len(text) > _maxentrysize:
1305 if len(text) > _maxentrysize:
1288 raise RevlogError(
1306 raise RevlogError(
1289 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1307 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1290 % (self.indexfile, len(text)))
1308 % (self.indexfile, len(text)))
1291
1309
1292 node = node or self.hash(text, p1, p2)
1310 node = node or self.hash(text, p1, p2)
1293 if node in self.nodemap:
1311 if node in self.nodemap:
1294 return node
1312 return node
1295
1313
1296 dfh = None
1314 dfh = None
1297 if not self._inline:
1315 if not self._inline:
1298 dfh = self.opener(self.datafile, "a+")
1316 dfh = self.opener(self.datafile, "a+")
1299 ifh = self.opener(self.indexfile, "a+")
1317 ifh = self.opener(self.indexfile, "a+")
1300 try:
1318 try:
1301 return self._addrevision(node, text, transaction, link, p1, p2,
1319 return self._addrevision(node, text, transaction, link, p1, p2,
1302 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1320 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1303 finally:
1321 finally:
1304 if dfh:
1322 if dfh:
1305 dfh.close()
1323 dfh.close()
1306 ifh.close()
1324 ifh.close()
1307
1325
1308 def compress(self, text):
1326 def compress(self, text):
1309 """ generate a possibly-compressed representation of text """
1327 """ generate a possibly-compressed representation of text """
1310 if not text:
1328 if not text:
1311 return ("", text)
1329 return ("", text)
1312 l = len(text)
1330 l = len(text)
1313 bin = None
1331 bin = None
1314 if l < 44:
1332 if l < 44:
1315 pass
1333 pass
1316 elif l > 1000000:
1334 elif l > 1000000:
1317 # zlib makes an internal copy, thus doubling memory usage for
1335 # zlib makes an internal copy, thus doubling memory usage for
1318 # large files, so lets do this in pieces
1336 # large files, so lets do this in pieces
1319 z = zlib.compressobj()
1337 z = zlib.compressobj()
1320 p = []
1338 p = []
1321 pos = 0
1339 pos = 0
1322 while pos < l:
1340 while pos < l:
1323 pos2 = pos + 2**20
1341 pos2 = pos + 2**20
1324 p.append(z.compress(text[pos:pos2]))
1342 p.append(z.compress(text[pos:pos2]))
1325 pos = pos2
1343 pos = pos2
1326 p.append(z.flush())
1344 p.append(z.flush())
1327 if sum(map(len, p)) < l:
1345 if sum(map(len, p)) < l:
1328 bin = "".join(p)
1346 bin = "".join(p)
1329 else:
1347 else:
1330 bin = _compress(text)
1348 bin = _compress(text)
1331 if bin is None or len(bin) > l:
1349 if bin is None or len(bin) > l:
1332 if text[0] == '\0':
1350 if text[0] == '\0':
1333 return ("", text)
1351 return ("", text)
1334 return ('u', text)
1352 return ('u', text)
1335 return ("", bin)
1353 return ("", bin)
1336
1354
1337 def _isgooddelta(self, d, textlen):
1355 def _isgooddelta(self, d, textlen):
1338 """Returns True if the given delta is good. Good means that it is within
1356 """Returns True if the given delta is good. Good means that it is within
1339 the disk span, disk size, and chain length bounds that we know to be
1357 the disk span, disk size, and chain length bounds that we know to be
1340 performant."""
1358 performant."""
1341 if d is None:
1359 if d is None:
1342 return False
1360 return False
1343
1361
1344 # - 'dist' is the distance from the base revision -- bounding it limits
1362 # - 'dist' is the distance from the base revision -- bounding it limits
1345 # the amount of I/O we need to do.
1363 # the amount of I/O we need to do.
1346 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1364 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1347 # to apply -- bounding it limits the amount of CPU we consume.
1365 # to apply -- bounding it limits the amount of CPU we consume.
1348 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1366 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1349 if (dist > textlen * 4 or l > textlen or
1367 if (dist > textlen * 4 or l > textlen or
1350 compresseddeltalen > textlen * 2 or
1368 compresseddeltalen > textlen * 2 or
1351 (self._maxchainlen and chainlen > self._maxchainlen)):
1369 (self._maxchainlen and chainlen > self._maxchainlen)):
1352 return False
1370 return False
1353
1371
1354 return True
1372 return True
1355
1373
1356 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1374 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1357 cachedelta, ifh, dfh, alwayscache=False):
1375 cachedelta, ifh, dfh, alwayscache=False):
1358 """internal function to add revisions to the log
1376 """internal function to add revisions to the log
1359
1377
1360 see addrevision for argument descriptions.
1378 see addrevision for argument descriptions.
1361 invariants:
1379 invariants:
1362 - text is optional (can be None); if not set, cachedelta must be set.
1380 - text is optional (can be None); if not set, cachedelta must be set.
1363 if both are set, they must correspond to each other.
1381 if both are set, they must correspond to each other.
1364 """
1382 """
1365 btext = [text]
1383 btext = [text]
1366 def buildtext():
1384 def buildtext():
1367 if btext[0] is not None:
1385 if btext[0] is not None:
1368 return btext[0]
1386 return btext[0]
1369 baserev = cachedelta[0]
1387 baserev = cachedelta[0]
1370 delta = cachedelta[1]
1388 delta = cachedelta[1]
1371 # special case deltas which replace entire base; no need to decode
1389 # special case deltas which replace entire base; no need to decode
1372 # base revision. this neatly avoids censored bases, which throw when
1390 # base revision. this neatly avoids censored bases, which throw when
1373 # they're decoded.
1391 # they're decoded.
1374 hlen = struct.calcsize(">lll")
1392 hlen = struct.calcsize(">lll")
1375 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1393 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1376 len(delta) - hlen):
1394 len(delta) - hlen):
1377 btext[0] = delta[hlen:]
1395 btext[0] = delta[hlen:]
1378 else:
1396 else:
1379 if self._inline:
1397 if self._inline:
1380 fh = ifh
1398 fh = ifh
1381 else:
1399 else:
1382 fh = dfh
1400 fh = dfh
1383 basetext = self.revision(self.node(baserev), _df=fh)
1401 basetext = self.revision(self.node(baserev), _df=fh)
1384 btext[0] = mdiff.patch(basetext, delta)
1402 btext[0] = mdiff.patch(basetext, delta)
1385 try:
1403 try:
1386 self.checkhash(btext[0], p1, p2, node)
1404 self.checkhash(btext[0], p1, p2, node)
1387 if flags & REVIDX_ISCENSORED:
1405 if flags & REVIDX_ISCENSORED:
1388 raise RevlogError(_('node %s is not censored') % node)
1406 raise RevlogError(_('node %s is not censored') % node)
1389 except CensoredNodeError:
1407 except CensoredNodeError:
1390 # must pass the censored index flag to add censored revisions
1408 # must pass the censored index flag to add censored revisions
1391 if not flags & REVIDX_ISCENSORED:
1409 if not flags & REVIDX_ISCENSORED:
1392 raise
1410 raise
1393 return btext[0]
1411 return btext[0]
1394
1412
1395 def builddelta(rev):
1413 def builddelta(rev):
1396 # can we use the cached delta?
1414 # can we use the cached delta?
1397 if cachedelta and cachedelta[0] == rev:
1415 if cachedelta and cachedelta[0] == rev:
1398 delta = cachedelta[1]
1416 delta = cachedelta[1]
1399 else:
1417 else:
1400 t = buildtext()
1418 t = buildtext()
1401 if self.iscensored(rev):
1419 if self.iscensored(rev):
1402 # deltas based on a censored revision must replace the
1420 # deltas based on a censored revision must replace the
1403 # full content in one patch, so delta works everywhere
1421 # full content in one patch, so delta works everywhere
1404 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1422 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1405 delta = header + t
1423 delta = header + t
1406 else:
1424 else:
1407 if self._inline:
1425 if self._inline:
1408 fh = ifh
1426 fh = ifh
1409 else:
1427 else:
1410 fh = dfh
1428 fh = dfh
1411 ptext = self.revision(self.node(rev), _df=fh)
1429 ptext = self.revision(self.node(rev), _df=fh)
1412 delta = mdiff.textdiff(ptext, t)
1430 delta = mdiff.textdiff(ptext, t)
1413 data = self.compress(delta)
1431 data = self.compress(delta)
1414 l = len(data[1]) + len(data[0])
1432 l = len(data[1]) + len(data[0])
1415 if basecache[0] == rev:
1433 if basecache[0] == rev:
1416 chainbase = basecache[1]
1434 chainbase = basecache[1]
1417 else:
1435 else:
1418 chainbase = self.chainbase(rev)
1436 chainbase = self.chainbase(rev)
1419 dist = l + offset - self.start(chainbase)
1437 dist = l + offset - self.start(chainbase)
1420 if self._generaldelta:
1438 if self._generaldelta:
1421 base = rev
1439 base = rev
1422 else:
1440 else:
1423 base = chainbase
1441 base = chainbase
1424 chainlen, compresseddeltalen = self._chaininfo(rev)
1442 chainlen, compresseddeltalen = self._chaininfo(rev)
1425 chainlen += 1
1443 chainlen += 1
1426 compresseddeltalen += l
1444 compresseddeltalen += l
1427 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1445 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1428
1446
1429 curr = len(self)
1447 curr = len(self)
1430 prev = curr - 1
1448 prev = curr - 1
1431 base = chainbase = curr
1449 base = chainbase = curr
1432 offset = self.end(prev)
1450 offset = self.end(prev)
1433 delta = None
1451 delta = None
1434 if self._basecache is None:
1452 if self._basecache is None:
1435 self._basecache = (prev, self.chainbase(prev))
1453 self._basecache = (prev, self.chainbase(prev))
1436 basecache = self._basecache
1454 basecache = self._basecache
1437 p1r, p2r = self.rev(p1), self.rev(p2)
1455 p1r, p2r = self.rev(p1), self.rev(p2)
1438
1456
1439 # full versions are inserted when the needed deltas
1457 # full versions are inserted when the needed deltas
1440 # become comparable to the uncompressed text
1458 # become comparable to the uncompressed text
1441 if text is None:
1459 if text is None:
1442 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1460 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1443 cachedelta[1])
1461 cachedelta[1])
1444 else:
1462 else:
1445 textlen = len(text)
1463 textlen = len(text)
1446
1464
1447 # should we try to build a delta?
1465 # should we try to build a delta?
1448 if prev != nullrev:
1466 if prev != nullrev:
1449 tested = set()
1467 tested = set()
1450 if cachedelta and self._generaldelta and self._lazydeltabase:
1468 if cachedelta and self._generaldelta and self._lazydeltabase:
1451 # Assume what we received from the server is a good choice
1469 # Assume what we received from the server is a good choice
1452 # build delta will reuse the cache
1470 # build delta will reuse the cache
1453 candidatedelta = builddelta(cachedelta[0])
1471 candidatedelta = builddelta(cachedelta[0])
1454 tested.add(cachedelta[0])
1472 tested.add(cachedelta[0])
1455 if self._isgooddelta(candidatedelta, textlen):
1473 if self._isgooddelta(candidatedelta, textlen):
1456 delta = candidatedelta
1474 delta = candidatedelta
1457 if delta is None and self._generaldelta:
1475 if delta is None and self._generaldelta:
1458 # exclude already lazy tested base if any
1476 # exclude already lazy tested base if any
1459 parents = [p for p in (p1r, p2r)
1477 parents = [p for p in (p1r, p2r)
1460 if p != nullrev and p not in tested]
1478 if p != nullrev and p not in tested]
1461 if parents and not self._aggressivemergedeltas:
1479 if parents and not self._aggressivemergedeltas:
1462 # Pick whichever parent is closer to us (to minimize the
1480 # Pick whichever parent is closer to us (to minimize the
1463 # chance of having to build a fulltext).
1481 # chance of having to build a fulltext).
1464 parents = [max(parents)]
1482 parents = [max(parents)]
1465 tested.update(parents)
1483 tested.update(parents)
1466 pdeltas = []
1484 pdeltas = []
1467 for p in parents:
1485 for p in parents:
1468 pd = builddelta(p)
1486 pd = builddelta(p)
1469 if self._isgooddelta(pd, textlen):
1487 if self._isgooddelta(pd, textlen):
1470 pdeltas.append(pd)
1488 pdeltas.append(pd)
1471 if pdeltas:
1489 if pdeltas:
1472 delta = min(pdeltas, key=lambda x: x[1])
1490 delta = min(pdeltas, key=lambda x: x[1])
1473 if delta is None and prev not in tested:
1491 if delta is None and prev not in tested:
1474 # other approach failed try against prev to hopefully save us a
1492 # other approach failed try against prev to hopefully save us a
1475 # fulltext.
1493 # fulltext.
1476 candidatedelta = builddelta(prev)
1494 candidatedelta = builddelta(prev)
1477 if self._isgooddelta(candidatedelta, textlen):
1495 if self._isgooddelta(candidatedelta, textlen):
1478 delta = candidatedelta
1496 delta = candidatedelta
1479 if delta is not None:
1497 if delta is not None:
1480 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1498 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1481 else:
1499 else:
1482 text = buildtext()
1500 text = buildtext()
1483 data = self.compress(text)
1501 data = self.compress(text)
1484 l = len(data[1]) + len(data[0])
1502 l = len(data[1]) + len(data[0])
1485 base = chainbase = curr
1503 base = chainbase = curr
1486
1504
1487 e = (offset_type(offset, flags), l, textlen,
1505 e = (offset_type(offset, flags), l, textlen,
1488 base, link, p1r, p2r, node)
1506 base, link, p1r, p2r, node)
1489 self.index.insert(-1, e)
1507 self.index.insert(-1, e)
1490 self.nodemap[node] = curr
1508 self.nodemap[node] = curr
1491
1509
1492 entry = self._io.packentry(e, self.node, self.version, curr)
1510 entry = self._io.packentry(e, self.node, self.version, curr)
1493 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1511 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1494
1512
1495 if alwayscache and text is None:
1513 if alwayscache and text is None:
1496 text = buildtext()
1514 text = buildtext()
1497
1515
1498 if type(text) == str: # only accept immutable objects
1516 if type(text) == str: # only accept immutable objects
1499 self._cache = (node, curr, text)
1517 self._cache = (node, curr, text)
1500 self._basecache = (curr, chainbase)
1518 self._basecache = (curr, chainbase)
1501 return node
1519 return node
1502
1520
1503 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1521 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1504 # Files opened in a+ mode have inconsistent behavior on various
1522 # Files opened in a+ mode have inconsistent behavior on various
1505 # platforms. Windows requires that a file positioning call be made
1523 # platforms. Windows requires that a file positioning call be made
1506 # when the file handle transitions between reads and writes. See
1524 # when the file handle transitions between reads and writes. See
1507 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1525 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1508 # platforms, Python or the platform itself can be buggy. Some versions
1526 # platforms, Python or the platform itself can be buggy. Some versions
1509 # of Solaris have been observed to not append at the end of the file
1527 # of Solaris have been observed to not append at the end of the file
1510 # if the file was seeked to before the end. See issue4943 for more.
1528 # if the file was seeked to before the end. See issue4943 for more.
1511 #
1529 #
1512 # We work around this issue by inserting a seek() before writing.
1530 # We work around this issue by inserting a seek() before writing.
1513 # Note: This is likely not necessary on Python 3.
1531 # Note: This is likely not necessary on Python 3.
1514 ifh.seek(0, os.SEEK_END)
1532 ifh.seek(0, os.SEEK_END)
1515 if dfh:
1533 if dfh:
1516 dfh.seek(0, os.SEEK_END)
1534 dfh.seek(0, os.SEEK_END)
1517
1535
1518 curr = len(self) - 1
1536 curr = len(self) - 1
1519 if not self._inline:
1537 if not self._inline:
1520 transaction.add(self.datafile, offset)
1538 transaction.add(self.datafile, offset)
1521 transaction.add(self.indexfile, curr * len(entry))
1539 transaction.add(self.indexfile, curr * len(entry))
1522 if data[0]:
1540 if data[0]:
1523 dfh.write(data[0])
1541 dfh.write(data[0])
1524 dfh.write(data[1])
1542 dfh.write(data[1])
1525 ifh.write(entry)
1543 ifh.write(entry)
1526 else:
1544 else:
1527 offset += curr * self._io.size
1545 offset += curr * self._io.size
1528 transaction.add(self.indexfile, offset, curr)
1546 transaction.add(self.indexfile, offset, curr)
1529 ifh.write(entry)
1547 ifh.write(entry)
1530 ifh.write(data[0])
1548 ifh.write(data[0])
1531 ifh.write(data[1])
1549 ifh.write(data[1])
1532 self.checkinlinesize(transaction, ifh)
1550 self.checkinlinesize(transaction, ifh)
1533
1551
1534 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1552 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1535 """
1553 """
1536 add a delta group
1554 add a delta group
1537
1555
1538 given a set of deltas, add them to the revision log. the
1556 given a set of deltas, add them to the revision log. the
1539 first delta is against its parent, which should be in our
1557 first delta is against its parent, which should be in our
1540 log, the rest are against the previous delta.
1558 log, the rest are against the previous delta.
1541
1559
1542 If ``addrevisioncb`` is defined, it will be called with arguments of
1560 If ``addrevisioncb`` is defined, it will be called with arguments of
1543 this revlog and the node that was added.
1561 this revlog and the node that was added.
1544 """
1562 """
1545
1563
1546 # track the base of the current delta log
1564 # track the base of the current delta log
1547 content = []
1565 content = []
1548 node = None
1566 node = None
1549
1567
1550 r = len(self)
1568 r = len(self)
1551 end = 0
1569 end = 0
1552 if r:
1570 if r:
1553 end = self.end(r - 1)
1571 end = self.end(r - 1)
1554 ifh = self.opener(self.indexfile, "a+")
1572 ifh = self.opener(self.indexfile, "a+")
1555 isize = r * self._io.size
1573 isize = r * self._io.size
1556 if self._inline:
1574 if self._inline:
1557 transaction.add(self.indexfile, end + isize, r)
1575 transaction.add(self.indexfile, end + isize, r)
1558 dfh = None
1576 dfh = None
1559 else:
1577 else:
1560 transaction.add(self.indexfile, isize, r)
1578 transaction.add(self.indexfile, isize, r)
1561 transaction.add(self.datafile, end)
1579 transaction.add(self.datafile, end)
1562 dfh = self.opener(self.datafile, "a+")
1580 dfh = self.opener(self.datafile, "a+")
1563 def flush():
1581 def flush():
1564 if dfh:
1582 if dfh:
1565 dfh.flush()
1583 dfh.flush()
1566 ifh.flush()
1584 ifh.flush()
1567 try:
1585 try:
1568 # loop through our set of deltas
1586 # loop through our set of deltas
1569 chain = None
1587 chain = None
1570 while True:
1588 while True:
1571 chunkdata = cg.deltachunk(chain)
1589 chunkdata = cg.deltachunk(chain)
1572 if not chunkdata:
1590 if not chunkdata:
1573 break
1591 break
1574 node = chunkdata['node']
1592 node = chunkdata['node']
1575 p1 = chunkdata['p1']
1593 p1 = chunkdata['p1']
1576 p2 = chunkdata['p2']
1594 p2 = chunkdata['p2']
1577 cs = chunkdata['cs']
1595 cs = chunkdata['cs']
1578 deltabase = chunkdata['deltabase']
1596 deltabase = chunkdata['deltabase']
1579 delta = chunkdata['delta']
1597 delta = chunkdata['delta']
1580 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1598 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1581
1599
1582 content.append(node)
1600 content.append(node)
1583
1601
1584 link = linkmapper(cs)
1602 link = linkmapper(cs)
1585 if node in self.nodemap:
1603 if node in self.nodemap:
1586 # this can happen if two branches make the same change
1604 # this can happen if two branches make the same change
1587 chain = node
1605 chain = node
1588 continue
1606 continue
1589
1607
1590 for p in (p1, p2):
1608 for p in (p1, p2):
1591 if p not in self.nodemap:
1609 if p not in self.nodemap:
1592 raise LookupError(p, self.indexfile,
1610 raise LookupError(p, self.indexfile,
1593 _('unknown parent'))
1611 _('unknown parent'))
1594
1612
1595 if deltabase not in self.nodemap:
1613 if deltabase not in self.nodemap:
1596 raise LookupError(deltabase, self.indexfile,
1614 raise LookupError(deltabase, self.indexfile,
1597 _('unknown delta base'))
1615 _('unknown delta base'))
1598
1616
1599 baserev = self.rev(deltabase)
1617 baserev = self.rev(deltabase)
1600
1618
1601 if baserev != nullrev and self.iscensored(baserev):
1619 if baserev != nullrev and self.iscensored(baserev):
1602 # if base is censored, delta must be full replacement in a
1620 # if base is censored, delta must be full replacement in a
1603 # single patch operation
1621 # single patch operation
1604 hlen = struct.calcsize(">lll")
1622 hlen = struct.calcsize(">lll")
1605 oldlen = self.rawsize(baserev)
1623 oldlen = self.rawsize(baserev)
1606 newlen = len(delta) - hlen
1624 newlen = len(delta) - hlen
1607 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1625 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1608 raise error.CensoredBaseError(self.indexfile,
1626 raise error.CensoredBaseError(self.indexfile,
1609 self.node(baserev))
1627 self.node(baserev))
1610
1628
1611 if not flags and self._peek_iscensored(baserev, delta, flush):
1629 if not flags and self._peek_iscensored(baserev, delta, flush):
1612 flags |= REVIDX_ISCENSORED
1630 flags |= REVIDX_ISCENSORED
1613
1631
1614 # We assume consumers of addrevisioncb will want to retrieve
1632 # We assume consumers of addrevisioncb will want to retrieve
1615 # the added revision, which will require a call to
1633 # the added revision, which will require a call to
1616 # revision(). revision() will fast path if there is a cache
1634 # revision(). revision() will fast path if there is a cache
1617 # hit. So, we tell _addrevision() to always cache in this case.
1635 # hit. So, we tell _addrevision() to always cache in this case.
1618 chain = self._addrevision(node, None, transaction, link,
1636 chain = self._addrevision(node, None, transaction, link,
1619 p1, p2, flags, (baserev, delta),
1637 p1, p2, flags, (baserev, delta),
1620 ifh, dfh,
1638 ifh, dfh,
1621 alwayscache=bool(addrevisioncb))
1639 alwayscache=bool(addrevisioncb))
1622
1640
1623 if addrevisioncb:
1641 if addrevisioncb:
1624 addrevisioncb(self, chain)
1642 addrevisioncb(self, chain)
1625
1643
1626 if not dfh and not self._inline:
1644 if not dfh and not self._inline:
1627 # addrevision switched from inline to conventional
1645 # addrevision switched from inline to conventional
1628 # reopen the index
1646 # reopen the index
1629 ifh.close()
1647 ifh.close()
1630 dfh = self.opener(self.datafile, "a+")
1648 dfh = self.opener(self.datafile, "a+")
1631 ifh = self.opener(self.indexfile, "a+")
1649 ifh = self.opener(self.indexfile, "a+")
1632 finally:
1650 finally:
1633 if dfh:
1651 if dfh:
1634 dfh.close()
1652 dfh.close()
1635 ifh.close()
1653 ifh.close()
1636
1654
1637 return content
1655 return content
1638
1656
1639 def iscensored(self, rev):
1657 def iscensored(self, rev):
1640 """Check if a file revision is censored."""
1658 """Check if a file revision is censored."""
1641 return False
1659 return False
1642
1660
1643 def _peek_iscensored(self, baserev, delta, flush):
1661 def _peek_iscensored(self, baserev, delta, flush):
1644 """Quickly check if a delta produces a censored revision."""
1662 """Quickly check if a delta produces a censored revision."""
1645 return False
1663 return False
1646
1664
1647 def getstrippoint(self, minlink):
1665 def getstrippoint(self, minlink):
1648 """find the minimum rev that must be stripped to strip the linkrev
1666 """find the minimum rev that must be stripped to strip the linkrev
1649
1667
1650 Returns a tuple containing the minimum rev and a set of all revs that
1668 Returns a tuple containing the minimum rev and a set of all revs that
1651 have linkrevs that will be broken by this strip.
1669 have linkrevs that will be broken by this strip.
1652 """
1670 """
1653 brokenrevs = set()
1671 brokenrevs = set()
1654 strippoint = len(self)
1672 strippoint = len(self)
1655
1673
1656 heads = {}
1674 heads = {}
1657 futurelargelinkrevs = set()
1675 futurelargelinkrevs = set()
1658 for head in self.headrevs():
1676 for head in self.headrevs():
1659 headlinkrev = self.linkrev(head)
1677 headlinkrev = self.linkrev(head)
1660 heads[head] = headlinkrev
1678 heads[head] = headlinkrev
1661 if headlinkrev >= minlink:
1679 if headlinkrev >= minlink:
1662 futurelargelinkrevs.add(headlinkrev)
1680 futurelargelinkrevs.add(headlinkrev)
1663
1681
1664 # This algorithm involves walking down the rev graph, starting at the
1682 # This algorithm involves walking down the rev graph, starting at the
1665 # heads. Since the revs are topologically sorted according to linkrev,
1683 # heads. Since the revs are topologically sorted according to linkrev,
1666 # once all head linkrevs are below the minlink, we know there are
1684 # once all head linkrevs are below the minlink, we know there are
1667 # no more revs that could have a linkrev greater than minlink.
1685 # no more revs that could have a linkrev greater than minlink.
1668 # So we can stop walking.
1686 # So we can stop walking.
1669 while futurelargelinkrevs:
1687 while futurelargelinkrevs:
1670 strippoint -= 1
1688 strippoint -= 1
1671 linkrev = heads.pop(strippoint)
1689 linkrev = heads.pop(strippoint)
1672
1690
1673 if linkrev < minlink:
1691 if linkrev < minlink:
1674 brokenrevs.add(strippoint)
1692 brokenrevs.add(strippoint)
1675 else:
1693 else:
1676 futurelargelinkrevs.remove(linkrev)
1694 futurelargelinkrevs.remove(linkrev)
1677
1695
1678 for p in self.parentrevs(strippoint):
1696 for p in self.parentrevs(strippoint):
1679 if p != nullrev:
1697 if p != nullrev:
1680 plinkrev = self.linkrev(p)
1698 plinkrev = self.linkrev(p)
1681 heads[p] = plinkrev
1699 heads[p] = plinkrev
1682 if plinkrev >= minlink:
1700 if plinkrev >= minlink:
1683 futurelargelinkrevs.add(plinkrev)
1701 futurelargelinkrevs.add(plinkrev)
1684
1702
1685 return strippoint, brokenrevs
1703 return strippoint, brokenrevs
1686
1704
1687 def strip(self, minlink, transaction):
1705 def strip(self, minlink, transaction):
1688 """truncate the revlog on the first revision with a linkrev >= minlink
1706 """truncate the revlog on the first revision with a linkrev >= minlink
1689
1707
1690 This function is called when we're stripping revision minlink and
1708 This function is called when we're stripping revision minlink and
1691 its descendants from the repository.
1709 its descendants from the repository.
1692
1710
1693 We have to remove all revisions with linkrev >= minlink, because
1711 We have to remove all revisions with linkrev >= minlink, because
1694 the equivalent changelog revisions will be renumbered after the
1712 the equivalent changelog revisions will be renumbered after the
1695 strip.
1713 strip.
1696
1714
1697 So we truncate the revlog on the first of these revisions, and
1715 So we truncate the revlog on the first of these revisions, and
1698 trust that the caller has saved the revisions that shouldn't be
1716 trust that the caller has saved the revisions that shouldn't be
1699 removed and that it'll re-add them after this truncation.
1717 removed and that it'll re-add them after this truncation.
1700 """
1718 """
1701 if len(self) == 0:
1719 if len(self) == 0:
1702 return
1720 return
1703
1721
1704 rev, _ = self.getstrippoint(minlink)
1722 rev, _ = self.getstrippoint(minlink)
1705 if rev == len(self):
1723 if rev == len(self):
1706 return
1724 return
1707
1725
1708 # first truncate the files on disk
1726 # first truncate the files on disk
1709 end = self.start(rev)
1727 end = self.start(rev)
1710 if not self._inline:
1728 if not self._inline:
1711 transaction.add(self.datafile, end)
1729 transaction.add(self.datafile, end)
1712 end = rev * self._io.size
1730 end = rev * self._io.size
1713 else:
1731 else:
1714 end += rev * self._io.size
1732 end += rev * self._io.size
1715
1733
1716 transaction.add(self.indexfile, end)
1734 transaction.add(self.indexfile, end)
1717
1735
1718 # then reset internal state in memory to forget those revisions
1736 # then reset internal state in memory to forget those revisions
1719 self._cache = None
1737 self._cache = None
1720 self._chaininfocache = {}
1738 self._chaininfocache = {}
1721 self._chunkclear()
1739 self._chunkclear()
1722 for x in xrange(rev, len(self)):
1740 for x in xrange(rev, len(self)):
1723 del self.nodemap[self.node(x)]
1741 del self.nodemap[self.node(x)]
1724
1742
1725 del self.index[rev:-1]
1743 del self.index[rev:-1]
1726
1744
1727 def checksize(self):
1745 def checksize(self):
1728 expected = 0
1746 expected = 0
1729 if len(self):
1747 if len(self):
1730 expected = max(0, self.end(len(self) - 1))
1748 expected = max(0, self.end(len(self) - 1))
1731
1749
1732 try:
1750 try:
1733 f = self.opener(self.datafile)
1751 f = self.opener(self.datafile)
1734 f.seek(0, 2)
1752 f.seek(0, 2)
1735 actual = f.tell()
1753 actual = f.tell()
1736 f.close()
1754 f.close()
1737 dd = actual - expected
1755 dd = actual - expected
1738 except IOError as inst:
1756 except IOError as inst:
1739 if inst.errno != errno.ENOENT:
1757 if inst.errno != errno.ENOENT:
1740 raise
1758 raise
1741 dd = 0
1759 dd = 0
1742
1760
1743 try:
1761 try:
1744 f = self.opener(self.indexfile)
1762 f = self.opener(self.indexfile)
1745 f.seek(0, 2)
1763 f.seek(0, 2)
1746 actual = f.tell()
1764 actual = f.tell()
1747 f.close()
1765 f.close()
1748 s = self._io.size
1766 s = self._io.size
1749 i = max(0, actual // s)
1767 i = max(0, actual // s)
1750 di = actual - (i * s)
1768 di = actual - (i * s)
1751 if self._inline:
1769 if self._inline:
1752 databytes = 0
1770 databytes = 0
1753 for r in self:
1771 for r in self:
1754 databytes += max(0, self.length(r))
1772 databytes += max(0, self.length(r))
1755 dd = 0
1773 dd = 0
1756 di = actual - len(self) * s - databytes
1774 di = actual - len(self) * s - databytes
1757 except IOError as inst:
1775 except IOError as inst:
1758 if inst.errno != errno.ENOENT:
1776 if inst.errno != errno.ENOENT:
1759 raise
1777 raise
1760 di = 0
1778 di = 0
1761
1779
1762 return (dd, di)
1780 return (dd, di)
1763
1781
1764 def files(self):
1782 def files(self):
1765 res = [self.indexfile]
1783 res = [self.indexfile]
1766 if not self._inline:
1784 if not self._inline:
1767 res.append(self.datafile)
1785 res.append(self.datafile)
1768 return res
1786 return res
General Comments 0
You need to be logged in to leave comments. Login now