##// END OF EJS Templates
revlog: make clearcaches() more effective...
Gregory Szorc -
r27465:072a675c default
parent child Browse files
Show More
@@ -1,1763 +1,1768 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import struct
19 import struct
20 import zlib
20 import zlib
21
21
22 # import stuff from node for others to import from revlog
22 # import stuff from node for others to import from revlog
23 from .node import (
23 from .node import (
24 bin,
24 bin,
25 hex,
25 hex,
26 nullid,
26 nullid,
27 nullrev,
27 nullrev,
28 )
28 )
29 from .i18n import _
29 from .i18n import _
30 from . import (
30 from . import (
31 ancestor,
31 ancestor,
32 error,
32 error,
33 mdiff,
33 mdiff,
34 parsers,
34 parsers,
35 templatefilters,
35 templatefilters,
36 util,
36 util,
37 )
37 )
38
38
39 _pack = struct.pack
39 _pack = struct.pack
40 _unpack = struct.unpack
40 _unpack = struct.unpack
41 _compress = zlib.compress
41 _compress = zlib.compress
42 _decompress = zlib.decompress
42 _decompress = zlib.decompress
43 _sha = util.sha1
43 _sha = util.sha1
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = _sha(nullid)
77 _nullhash = _sha(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = _sha(l[0])
95 s = _sha(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return bin[1:]
113 return bin[1:]
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
229 # begins at for a revision.
230 self._basecache = None
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
232 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
233 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
234 self._chunkcachesize = 65536
235 self._maxchainlen = None
235 self._maxchainlen = None
236 self._aggressivemergedeltas = False
236 self._aggressivemergedeltas = False
237 self.index = []
237 self.index = []
238 # Mapping of partial identifiers to full nodes.
238 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
239 self._pcache = {}
240 # Mapping of revision integer to full node.
240 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
241 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
242 self._nodepos = None
243
243
244 v = REVLOG_DEFAULT_VERSION
244 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 if 'revlogv1' in opts:
247 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
248 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
249 v |= REVLOGGENERALDELTA
250 else:
250 else:
251 v = 0
251 v = 0
252 if 'chunkcachesize' in opts:
252 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
253 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
254 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
255 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
256 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
259
260 if self._chunkcachesize <= 0:
260 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
262 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
265 'of 2') % self._chunkcachesize)
266
266
267 indexdata = ''
267 indexdata = ''
268 self._initempty = True
268 self._initempty = True
269 try:
269 try:
270 f = self.opener(self.indexfile)
270 f = self.opener(self.indexfile)
271 indexdata = f.read()
271 indexdata = f.read()
272 f.close()
272 f.close()
273 if len(indexdata) > 0:
273 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
275 self._initempty = False
276 except IOError as inst:
276 except IOError as inst:
277 if inst.errno != errno.ENOENT:
277 if inst.errno != errno.ENOENT:
278 raise
278 raise
279
279
280 self.version = v
280 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
281 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
282 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
283 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
284 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
285 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
287 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
290 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
291 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
292 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
293 % (self.indexfile, fmt))
294
294
295 self._io = revlogio()
295 self._io = revlogio()
296 if self.version == REVLOGV0:
296 if self.version == REVLOGV0:
297 self._io = revlogoldio()
297 self._io = revlogoldio()
298 try:
298 try:
299 d = self._io.parseindex(indexdata, self._inline)
299 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
300 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
302 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
303 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
304 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
305 if not self._chunkcache:
306 self._chunkclear()
306 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
307 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
308 self._chaininfocache = {}
309
309
310 def tip(self):
310 def tip(self):
311 return self.node(len(self.index) - 2)
311 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
312 def __contains__(self, rev):
313 return 0 <= rev < len(self)
313 return 0 <= rev < len(self)
314 def __len__(self):
314 def __len__(self):
315 return len(self.index) - 1
315 return len(self.index) - 1
316 def __iter__(self):
316 def __iter__(self):
317 return iter(xrange(len(self)))
317 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
318 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
319 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
320 step = 1
321 if stop is not None:
321 if stop is not None:
322 if start > stop:
322 if start > stop:
323 step = -1
323 step = -1
324 stop += step
324 stop += step
325 else:
325 else:
326 stop = len(self)
326 stop = len(self)
327 return xrange(start, stop, step)
327 return xrange(start, stop, step)
328
328
329 @util.propertycache
329 @util.propertycache
330 def nodemap(self):
330 def nodemap(self):
331 self.rev(self.node(0))
331 self.rev(self.node(0))
332 return self._nodecache
332 return self._nodecache
333
333
334 def hasnode(self, node):
334 def hasnode(self, node):
335 try:
335 try:
336 self.rev(node)
336 self.rev(node)
337 return True
337 return True
338 except KeyError:
338 except KeyError:
339 return False
339 return False
340
340
341 def clearcaches(self):
341 def clearcaches(self):
342 self._cache = None
343 self._basecache = None
344 self._chunkcache = (0, '')
345 self._pcache = {}
346
342 try:
347 try:
343 self._nodecache.clearcaches()
348 self._nodecache.clearcaches()
344 except AttributeError:
349 except AttributeError:
345 self._nodecache = {nullid: nullrev}
350 self._nodecache = {nullid: nullrev}
346 self._nodepos = None
351 self._nodepos = None
347
352
348 def rev(self, node):
353 def rev(self, node):
349 try:
354 try:
350 return self._nodecache[node]
355 return self._nodecache[node]
351 except TypeError:
356 except TypeError:
352 raise
357 raise
353 except RevlogError:
358 except RevlogError:
354 # parsers.c radix tree lookup failed
359 # parsers.c radix tree lookup failed
355 raise LookupError(node, self.indexfile, _('no node'))
360 raise LookupError(node, self.indexfile, _('no node'))
356 except KeyError:
361 except KeyError:
357 # pure python cache lookup failed
362 # pure python cache lookup failed
358 n = self._nodecache
363 n = self._nodecache
359 i = self.index
364 i = self.index
360 p = self._nodepos
365 p = self._nodepos
361 if p is None:
366 if p is None:
362 p = len(i) - 2
367 p = len(i) - 2
363 for r in xrange(p, -1, -1):
368 for r in xrange(p, -1, -1):
364 v = i[r][7]
369 v = i[r][7]
365 n[v] = r
370 n[v] = r
366 if v == node:
371 if v == node:
367 self._nodepos = r - 1
372 self._nodepos = r - 1
368 return r
373 return r
369 raise LookupError(node, self.indexfile, _('no node'))
374 raise LookupError(node, self.indexfile, _('no node'))
370
375
371 def node(self, rev):
376 def node(self, rev):
372 return self.index[rev][7]
377 return self.index[rev][7]
373 def linkrev(self, rev):
378 def linkrev(self, rev):
374 return self.index[rev][4]
379 return self.index[rev][4]
375 def parents(self, node):
380 def parents(self, node):
376 i = self.index
381 i = self.index
377 d = i[self.rev(node)]
382 d = i[self.rev(node)]
378 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
379 def parentrevs(self, rev):
384 def parentrevs(self, rev):
380 return self.index[rev][5:7]
385 return self.index[rev][5:7]
381 def start(self, rev):
386 def start(self, rev):
382 return int(self.index[rev][0] >> 16)
387 return int(self.index[rev][0] >> 16)
383 def end(self, rev):
388 def end(self, rev):
384 return self.start(rev) + self.length(rev)
389 return self.start(rev) + self.length(rev)
385 def length(self, rev):
390 def length(self, rev):
386 return self.index[rev][1]
391 return self.index[rev][1]
387 def chainbase(self, rev):
392 def chainbase(self, rev):
388 index = self.index
393 index = self.index
389 base = index[rev][3]
394 base = index[rev][3]
390 while base != rev:
395 while base != rev:
391 rev = base
396 rev = base
392 base = index[rev][3]
397 base = index[rev][3]
393 return base
398 return base
394 def chainlen(self, rev):
399 def chainlen(self, rev):
395 return self._chaininfo(rev)[0]
400 return self._chaininfo(rev)[0]
396
401
397 def _chaininfo(self, rev):
402 def _chaininfo(self, rev):
398 chaininfocache = self._chaininfocache
403 chaininfocache = self._chaininfocache
399 if rev in chaininfocache:
404 if rev in chaininfocache:
400 return chaininfocache[rev]
405 return chaininfocache[rev]
401 index = self.index
406 index = self.index
402 generaldelta = self._generaldelta
407 generaldelta = self._generaldelta
403 iterrev = rev
408 iterrev = rev
404 e = index[iterrev]
409 e = index[iterrev]
405 clen = 0
410 clen = 0
406 compresseddeltalen = 0
411 compresseddeltalen = 0
407 while iterrev != e[3]:
412 while iterrev != e[3]:
408 clen += 1
413 clen += 1
409 compresseddeltalen += e[1]
414 compresseddeltalen += e[1]
410 if generaldelta:
415 if generaldelta:
411 iterrev = e[3]
416 iterrev = e[3]
412 else:
417 else:
413 iterrev -= 1
418 iterrev -= 1
414 if iterrev in chaininfocache:
419 if iterrev in chaininfocache:
415 t = chaininfocache[iterrev]
420 t = chaininfocache[iterrev]
416 clen += t[0]
421 clen += t[0]
417 compresseddeltalen += t[1]
422 compresseddeltalen += t[1]
418 break
423 break
419 e = index[iterrev]
424 e = index[iterrev]
420 else:
425 else:
421 # Add text length of base since decompressing that also takes
426 # Add text length of base since decompressing that also takes
422 # work. For cache hits the length is already included.
427 # work. For cache hits the length is already included.
423 compresseddeltalen += e[1]
428 compresseddeltalen += e[1]
424 r = (clen, compresseddeltalen)
429 r = (clen, compresseddeltalen)
425 chaininfocache[rev] = r
430 chaininfocache[rev] = r
426 return r
431 return r
427
432
428 def flags(self, rev):
433 def flags(self, rev):
429 return self.index[rev][0] & 0xFFFF
434 return self.index[rev][0] & 0xFFFF
430 def rawsize(self, rev):
435 def rawsize(self, rev):
431 """return the length of the uncompressed text for a given revision"""
436 """return the length of the uncompressed text for a given revision"""
432 l = self.index[rev][2]
437 l = self.index[rev][2]
433 if l >= 0:
438 if l >= 0:
434 return l
439 return l
435
440
436 t = self.revision(self.node(rev))
441 t = self.revision(self.node(rev))
437 return len(t)
442 return len(t)
438 size = rawsize
443 size = rawsize
439
444
440 def ancestors(self, revs, stoprev=0, inclusive=False):
445 def ancestors(self, revs, stoprev=0, inclusive=False):
441 """Generate the ancestors of 'revs' in reverse topological order.
446 """Generate the ancestors of 'revs' in reverse topological order.
442 Does not generate revs lower than stoprev.
447 Does not generate revs lower than stoprev.
443
448
444 See the documentation for ancestor.lazyancestors for more details."""
449 See the documentation for ancestor.lazyancestors for more details."""
445
450
446 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
451 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
447 inclusive=inclusive)
452 inclusive=inclusive)
448
453
449 def descendants(self, revs):
454 def descendants(self, revs):
450 """Generate the descendants of 'revs' in revision order.
455 """Generate the descendants of 'revs' in revision order.
451
456
452 Yield a sequence of revision numbers starting with a child of
457 Yield a sequence of revision numbers starting with a child of
453 some rev in revs, i.e., each revision is *not* considered a
458 some rev in revs, i.e., each revision is *not* considered a
454 descendant of itself. Results are ordered by revision number (a
459 descendant of itself. Results are ordered by revision number (a
455 topological sort)."""
460 topological sort)."""
456 first = min(revs)
461 first = min(revs)
457 if first == nullrev:
462 if first == nullrev:
458 for i in self:
463 for i in self:
459 yield i
464 yield i
460 return
465 return
461
466
462 seen = set(revs)
467 seen = set(revs)
463 for i in self.revs(start=first + 1):
468 for i in self.revs(start=first + 1):
464 for x in self.parentrevs(i):
469 for x in self.parentrevs(i):
465 if x != nullrev and x in seen:
470 if x != nullrev and x in seen:
466 seen.add(i)
471 seen.add(i)
467 yield i
472 yield i
468 break
473 break
469
474
470 def findcommonmissing(self, common=None, heads=None):
475 def findcommonmissing(self, common=None, heads=None):
471 """Return a tuple of the ancestors of common and the ancestors of heads
476 """Return a tuple of the ancestors of common and the ancestors of heads
472 that are not ancestors of common. In revset terminology, we return the
477 that are not ancestors of common. In revset terminology, we return the
473 tuple:
478 tuple:
474
479
475 ::common, (::heads) - (::common)
480 ::common, (::heads) - (::common)
476
481
477 The list is sorted by revision number, meaning it is
482 The list is sorted by revision number, meaning it is
478 topologically sorted.
483 topologically sorted.
479
484
480 'heads' and 'common' are both lists of node IDs. If heads is
485 'heads' and 'common' are both lists of node IDs. If heads is
481 not supplied, uses all of the revlog's heads. If common is not
486 not supplied, uses all of the revlog's heads. If common is not
482 supplied, uses nullid."""
487 supplied, uses nullid."""
483 if common is None:
488 if common is None:
484 common = [nullid]
489 common = [nullid]
485 if heads is None:
490 if heads is None:
486 heads = self.heads()
491 heads = self.heads()
487
492
488 common = [self.rev(n) for n in common]
493 common = [self.rev(n) for n in common]
489 heads = [self.rev(n) for n in heads]
494 heads = [self.rev(n) for n in heads]
490
495
491 # we want the ancestors, but inclusive
496 # we want the ancestors, but inclusive
492 class lazyset(object):
497 class lazyset(object):
493 def __init__(self, lazyvalues):
498 def __init__(self, lazyvalues):
494 self.addedvalues = set()
499 self.addedvalues = set()
495 self.lazyvalues = lazyvalues
500 self.lazyvalues = lazyvalues
496
501
497 def __contains__(self, value):
502 def __contains__(self, value):
498 return value in self.addedvalues or value in self.lazyvalues
503 return value in self.addedvalues or value in self.lazyvalues
499
504
500 def __iter__(self):
505 def __iter__(self):
501 added = self.addedvalues
506 added = self.addedvalues
502 for r in added:
507 for r in added:
503 yield r
508 yield r
504 for r in self.lazyvalues:
509 for r in self.lazyvalues:
505 if not r in added:
510 if not r in added:
506 yield r
511 yield r
507
512
508 def add(self, value):
513 def add(self, value):
509 self.addedvalues.add(value)
514 self.addedvalues.add(value)
510
515
511 def update(self, values):
516 def update(self, values):
512 self.addedvalues.update(values)
517 self.addedvalues.update(values)
513
518
514 has = lazyset(self.ancestors(common))
519 has = lazyset(self.ancestors(common))
515 has.add(nullrev)
520 has.add(nullrev)
516 has.update(common)
521 has.update(common)
517
522
518 # take all ancestors from heads that aren't in has
523 # take all ancestors from heads that aren't in has
519 missing = set()
524 missing = set()
520 visit = collections.deque(r for r in heads if r not in has)
525 visit = collections.deque(r for r in heads if r not in has)
521 while visit:
526 while visit:
522 r = visit.popleft()
527 r = visit.popleft()
523 if r in missing:
528 if r in missing:
524 continue
529 continue
525 else:
530 else:
526 missing.add(r)
531 missing.add(r)
527 for p in self.parentrevs(r):
532 for p in self.parentrevs(r):
528 if p not in has:
533 if p not in has:
529 visit.append(p)
534 visit.append(p)
530 missing = list(missing)
535 missing = list(missing)
531 missing.sort()
536 missing.sort()
532 return has, [self.node(r) for r in missing]
537 return has, [self.node(r) for r in missing]
533
538
534 def incrementalmissingrevs(self, common=None):
539 def incrementalmissingrevs(self, common=None):
535 """Return an object that can be used to incrementally compute the
540 """Return an object that can be used to incrementally compute the
536 revision numbers of the ancestors of arbitrary sets that are not
541 revision numbers of the ancestors of arbitrary sets that are not
537 ancestors of common. This is an ancestor.incrementalmissingancestors
542 ancestors of common. This is an ancestor.incrementalmissingancestors
538 object.
543 object.
539
544
540 'common' is a list of revision numbers. If common is not supplied, uses
545 'common' is a list of revision numbers. If common is not supplied, uses
541 nullrev.
546 nullrev.
542 """
547 """
543 if common is None:
548 if common is None:
544 common = [nullrev]
549 common = [nullrev]
545
550
546 return ancestor.incrementalmissingancestors(self.parentrevs, common)
551 return ancestor.incrementalmissingancestors(self.parentrevs, common)
547
552
548 def findmissingrevs(self, common=None, heads=None):
553 def findmissingrevs(self, common=None, heads=None):
549 """Return the revision numbers of the ancestors of heads that
554 """Return the revision numbers of the ancestors of heads that
550 are not ancestors of common.
555 are not ancestors of common.
551
556
552 More specifically, return a list of revision numbers corresponding to
557 More specifically, return a list of revision numbers corresponding to
553 nodes N such that every N satisfies the following constraints:
558 nodes N such that every N satisfies the following constraints:
554
559
555 1. N is an ancestor of some node in 'heads'
560 1. N is an ancestor of some node in 'heads'
556 2. N is not an ancestor of any node in 'common'
561 2. N is not an ancestor of any node in 'common'
557
562
558 The list is sorted by revision number, meaning it is
563 The list is sorted by revision number, meaning it is
559 topologically sorted.
564 topologically sorted.
560
565
561 'heads' and 'common' are both lists of revision numbers. If heads is
566 'heads' and 'common' are both lists of revision numbers. If heads is
562 not supplied, uses all of the revlog's heads. If common is not
567 not supplied, uses all of the revlog's heads. If common is not
563 supplied, uses nullid."""
568 supplied, uses nullid."""
564 if common is None:
569 if common is None:
565 common = [nullrev]
570 common = [nullrev]
566 if heads is None:
571 if heads is None:
567 heads = self.headrevs()
572 heads = self.headrevs()
568
573
569 inc = self.incrementalmissingrevs(common=common)
574 inc = self.incrementalmissingrevs(common=common)
570 return inc.missingancestors(heads)
575 return inc.missingancestors(heads)
571
576
572 def findmissing(self, common=None, heads=None):
577 def findmissing(self, common=None, heads=None):
573 """Return the ancestors of heads that are not ancestors of common.
578 """Return the ancestors of heads that are not ancestors of common.
574
579
575 More specifically, return a list of nodes N such that every N
580 More specifically, return a list of nodes N such that every N
576 satisfies the following constraints:
581 satisfies the following constraints:
577
582
578 1. N is an ancestor of some node in 'heads'
583 1. N is an ancestor of some node in 'heads'
579 2. N is not an ancestor of any node in 'common'
584 2. N is not an ancestor of any node in 'common'
580
585
581 The list is sorted by revision number, meaning it is
586 The list is sorted by revision number, meaning it is
582 topologically sorted.
587 topologically sorted.
583
588
584 'heads' and 'common' are both lists of node IDs. If heads is
589 'heads' and 'common' are both lists of node IDs. If heads is
585 not supplied, uses all of the revlog's heads. If common is not
590 not supplied, uses all of the revlog's heads. If common is not
586 supplied, uses nullid."""
591 supplied, uses nullid."""
587 if common is None:
592 if common is None:
588 common = [nullid]
593 common = [nullid]
589 if heads is None:
594 if heads is None:
590 heads = self.heads()
595 heads = self.heads()
591
596
592 common = [self.rev(n) for n in common]
597 common = [self.rev(n) for n in common]
593 heads = [self.rev(n) for n in heads]
598 heads = [self.rev(n) for n in heads]
594
599
595 inc = self.incrementalmissingrevs(common=common)
600 inc = self.incrementalmissingrevs(common=common)
596 return [self.node(r) for r in inc.missingancestors(heads)]
601 return [self.node(r) for r in inc.missingancestors(heads)]
597
602
598 def nodesbetween(self, roots=None, heads=None):
603 def nodesbetween(self, roots=None, heads=None):
599 """Return a topological path from 'roots' to 'heads'.
604 """Return a topological path from 'roots' to 'heads'.
600
605
601 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
606 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
602 topologically sorted list of all nodes N that satisfy both of
607 topologically sorted list of all nodes N that satisfy both of
603 these constraints:
608 these constraints:
604
609
605 1. N is a descendant of some node in 'roots'
610 1. N is a descendant of some node in 'roots'
606 2. N is an ancestor of some node in 'heads'
611 2. N is an ancestor of some node in 'heads'
607
612
608 Every node is considered to be both a descendant and an ancestor
613 Every node is considered to be both a descendant and an ancestor
609 of itself, so every reachable node in 'roots' and 'heads' will be
614 of itself, so every reachable node in 'roots' and 'heads' will be
610 included in 'nodes'.
615 included in 'nodes'.
611
616
612 'outroots' is the list of reachable nodes in 'roots', i.e., the
617 'outroots' is the list of reachable nodes in 'roots', i.e., the
613 subset of 'roots' that is returned in 'nodes'. Likewise,
618 subset of 'roots' that is returned in 'nodes'. Likewise,
614 'outheads' is the subset of 'heads' that is also in 'nodes'.
619 'outheads' is the subset of 'heads' that is also in 'nodes'.
615
620
616 'roots' and 'heads' are both lists of node IDs. If 'roots' is
621 'roots' and 'heads' are both lists of node IDs. If 'roots' is
617 unspecified, uses nullid as the only root. If 'heads' is
622 unspecified, uses nullid as the only root. If 'heads' is
618 unspecified, uses list of all of the revlog's heads."""
623 unspecified, uses list of all of the revlog's heads."""
619 nonodes = ([], [], [])
624 nonodes = ([], [], [])
620 if roots is not None:
625 if roots is not None:
621 roots = list(roots)
626 roots = list(roots)
622 if not roots:
627 if not roots:
623 return nonodes
628 return nonodes
624 lowestrev = min([self.rev(n) for n in roots])
629 lowestrev = min([self.rev(n) for n in roots])
625 else:
630 else:
626 roots = [nullid] # Everybody's a descendant of nullid
631 roots = [nullid] # Everybody's a descendant of nullid
627 lowestrev = nullrev
632 lowestrev = nullrev
628 if (lowestrev == nullrev) and (heads is None):
633 if (lowestrev == nullrev) and (heads is None):
629 # We want _all_ the nodes!
634 # We want _all_ the nodes!
630 return ([self.node(r) for r in self], [nullid], list(self.heads()))
635 return ([self.node(r) for r in self], [nullid], list(self.heads()))
631 if heads is None:
636 if heads is None:
632 # All nodes are ancestors, so the latest ancestor is the last
637 # All nodes are ancestors, so the latest ancestor is the last
633 # node.
638 # node.
634 highestrev = len(self) - 1
639 highestrev = len(self) - 1
635 # Set ancestors to None to signal that every node is an ancestor.
640 # Set ancestors to None to signal that every node is an ancestor.
636 ancestors = None
641 ancestors = None
637 # Set heads to an empty dictionary for later discovery of heads
642 # Set heads to an empty dictionary for later discovery of heads
638 heads = {}
643 heads = {}
639 else:
644 else:
640 heads = list(heads)
645 heads = list(heads)
641 if not heads:
646 if not heads:
642 return nonodes
647 return nonodes
643 ancestors = set()
648 ancestors = set()
644 # Turn heads into a dictionary so we can remove 'fake' heads.
649 # Turn heads into a dictionary so we can remove 'fake' heads.
645 # Also, later we will be using it to filter out the heads we can't
650 # Also, later we will be using it to filter out the heads we can't
646 # find from roots.
651 # find from roots.
647 heads = dict.fromkeys(heads, False)
652 heads = dict.fromkeys(heads, False)
648 # Start at the top and keep marking parents until we're done.
653 # Start at the top and keep marking parents until we're done.
649 nodestotag = set(heads)
654 nodestotag = set(heads)
650 # Remember where the top was so we can use it as a limit later.
655 # Remember where the top was so we can use it as a limit later.
651 highestrev = max([self.rev(n) for n in nodestotag])
656 highestrev = max([self.rev(n) for n in nodestotag])
652 while nodestotag:
657 while nodestotag:
653 # grab a node to tag
658 # grab a node to tag
654 n = nodestotag.pop()
659 n = nodestotag.pop()
655 # Never tag nullid
660 # Never tag nullid
656 if n == nullid:
661 if n == nullid:
657 continue
662 continue
658 # A node's revision number represents its place in a
663 # A node's revision number represents its place in a
659 # topologically sorted list of nodes.
664 # topologically sorted list of nodes.
660 r = self.rev(n)
665 r = self.rev(n)
661 if r >= lowestrev:
666 if r >= lowestrev:
662 if n not in ancestors:
667 if n not in ancestors:
663 # If we are possibly a descendant of one of the roots
668 # If we are possibly a descendant of one of the roots
664 # and we haven't already been marked as an ancestor
669 # and we haven't already been marked as an ancestor
665 ancestors.add(n) # Mark as ancestor
670 ancestors.add(n) # Mark as ancestor
666 # Add non-nullid parents to list of nodes to tag.
671 # Add non-nullid parents to list of nodes to tag.
667 nodestotag.update([p for p in self.parents(n) if
672 nodestotag.update([p for p in self.parents(n) if
668 p != nullid])
673 p != nullid])
669 elif n in heads: # We've seen it before, is it a fake head?
674 elif n in heads: # We've seen it before, is it a fake head?
670 # So it is, real heads should not be the ancestors of
675 # So it is, real heads should not be the ancestors of
671 # any other heads.
676 # any other heads.
672 heads.pop(n)
677 heads.pop(n)
673 if not ancestors:
678 if not ancestors:
674 return nonodes
679 return nonodes
675 # Now that we have our set of ancestors, we want to remove any
680 # Now that we have our set of ancestors, we want to remove any
676 # roots that are not ancestors.
681 # roots that are not ancestors.
677
682
678 # If one of the roots was nullid, everything is included anyway.
683 # If one of the roots was nullid, everything is included anyway.
679 if lowestrev > nullrev:
684 if lowestrev > nullrev:
680 # But, since we weren't, let's recompute the lowest rev to not
685 # But, since we weren't, let's recompute the lowest rev to not
681 # include roots that aren't ancestors.
686 # include roots that aren't ancestors.
682
687
683 # Filter out roots that aren't ancestors of heads
688 # Filter out roots that aren't ancestors of heads
684 roots = [n for n in roots if n in ancestors]
689 roots = [n for n in roots if n in ancestors]
685 # Recompute the lowest revision
690 # Recompute the lowest revision
686 if roots:
691 if roots:
687 lowestrev = min([self.rev(n) for n in roots])
692 lowestrev = min([self.rev(n) for n in roots])
688 else:
693 else:
689 # No more roots? Return empty list
694 # No more roots? Return empty list
690 return nonodes
695 return nonodes
691 else:
696 else:
692 # We are descending from nullid, and don't need to care about
697 # We are descending from nullid, and don't need to care about
693 # any other roots.
698 # any other roots.
694 lowestrev = nullrev
699 lowestrev = nullrev
695 roots = [nullid]
700 roots = [nullid]
696 # Transform our roots list into a set.
701 # Transform our roots list into a set.
697 descendants = set(roots)
702 descendants = set(roots)
698 # Also, keep the original roots so we can filter out roots that aren't
703 # Also, keep the original roots so we can filter out roots that aren't
699 # 'real' roots (i.e. are descended from other roots).
704 # 'real' roots (i.e. are descended from other roots).
700 roots = descendants.copy()
705 roots = descendants.copy()
701 # Our topologically sorted list of output nodes.
706 # Our topologically sorted list of output nodes.
702 orderedout = []
707 orderedout = []
703 # Don't start at nullid since we don't want nullid in our output list,
708 # Don't start at nullid since we don't want nullid in our output list,
704 # and if nullid shows up in descendants, empty parents will look like
709 # and if nullid shows up in descendants, empty parents will look like
705 # they're descendants.
710 # they're descendants.
706 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
711 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
707 n = self.node(r)
712 n = self.node(r)
708 isdescendant = False
713 isdescendant = False
709 if lowestrev == nullrev: # Everybody is a descendant of nullid
714 if lowestrev == nullrev: # Everybody is a descendant of nullid
710 isdescendant = True
715 isdescendant = True
711 elif n in descendants:
716 elif n in descendants:
712 # n is already a descendant
717 # n is already a descendant
713 isdescendant = True
718 isdescendant = True
714 # This check only needs to be done here because all the roots
719 # This check only needs to be done here because all the roots
715 # will start being marked is descendants before the loop.
720 # will start being marked is descendants before the loop.
716 if n in roots:
721 if n in roots:
717 # If n was a root, check if it's a 'real' root.
722 # If n was a root, check if it's a 'real' root.
718 p = tuple(self.parents(n))
723 p = tuple(self.parents(n))
719 # If any of its parents are descendants, it's not a root.
724 # If any of its parents are descendants, it's not a root.
720 if (p[0] in descendants) or (p[1] in descendants):
725 if (p[0] in descendants) or (p[1] in descendants):
721 roots.remove(n)
726 roots.remove(n)
722 else:
727 else:
723 p = tuple(self.parents(n))
728 p = tuple(self.parents(n))
724 # A node is a descendant if either of its parents are
729 # A node is a descendant if either of its parents are
725 # descendants. (We seeded the dependents list with the roots
730 # descendants. (We seeded the dependents list with the roots
726 # up there, remember?)
731 # up there, remember?)
727 if (p[0] in descendants) or (p[1] in descendants):
732 if (p[0] in descendants) or (p[1] in descendants):
728 descendants.add(n)
733 descendants.add(n)
729 isdescendant = True
734 isdescendant = True
730 if isdescendant and ((ancestors is None) or (n in ancestors)):
735 if isdescendant and ((ancestors is None) or (n in ancestors)):
731 # Only include nodes that are both descendants and ancestors.
736 # Only include nodes that are both descendants and ancestors.
732 orderedout.append(n)
737 orderedout.append(n)
733 if (ancestors is not None) and (n in heads):
738 if (ancestors is not None) and (n in heads):
734 # We're trying to figure out which heads are reachable
739 # We're trying to figure out which heads are reachable
735 # from roots.
740 # from roots.
736 # Mark this head as having been reached
741 # Mark this head as having been reached
737 heads[n] = True
742 heads[n] = True
738 elif ancestors is None:
743 elif ancestors is None:
739 # Otherwise, we're trying to discover the heads.
744 # Otherwise, we're trying to discover the heads.
740 # Assume this is a head because if it isn't, the next step
745 # Assume this is a head because if it isn't, the next step
741 # will eventually remove it.
746 # will eventually remove it.
742 heads[n] = True
747 heads[n] = True
743 # But, obviously its parents aren't.
748 # But, obviously its parents aren't.
744 for p in self.parents(n):
749 for p in self.parents(n):
745 heads.pop(p, None)
750 heads.pop(p, None)
746 heads = [n for n, flag in heads.iteritems() if flag]
751 heads = [n for n, flag in heads.iteritems() if flag]
747 roots = list(roots)
752 roots = list(roots)
748 assert orderedout
753 assert orderedout
749 assert roots
754 assert roots
750 assert heads
755 assert heads
751 return (orderedout, roots, heads)
756 return (orderedout, roots, heads)
752
757
753 def headrevs(self):
758 def headrevs(self):
754 try:
759 try:
755 return self.index.headrevs()
760 return self.index.headrevs()
756 except AttributeError:
761 except AttributeError:
757 return self._headrevs()
762 return self._headrevs()
758
763
759 def computephases(self, roots):
764 def computephases(self, roots):
760 return self.index.computephasesmapsets(roots)
765 return self.index.computephasesmapsets(roots)
761
766
762 def _headrevs(self):
767 def _headrevs(self):
763 count = len(self)
768 count = len(self)
764 if not count:
769 if not count:
765 return [nullrev]
770 return [nullrev]
766 # we won't iter over filtered rev so nobody is a head at start
771 # we won't iter over filtered rev so nobody is a head at start
767 ishead = [0] * (count + 1)
772 ishead = [0] * (count + 1)
768 index = self.index
773 index = self.index
769 for r in self:
774 for r in self:
770 ishead[r] = 1 # I may be an head
775 ishead[r] = 1 # I may be an head
771 e = index[r]
776 e = index[r]
772 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
777 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
773 return [r for r, val in enumerate(ishead) if val]
778 return [r for r, val in enumerate(ishead) if val]
774
779
775 def heads(self, start=None, stop=None):
780 def heads(self, start=None, stop=None):
776 """return the list of all nodes that have no children
781 """return the list of all nodes that have no children
777
782
778 if start is specified, only heads that are descendants of
783 if start is specified, only heads that are descendants of
779 start will be returned
784 start will be returned
780 if stop is specified, it will consider all the revs from stop
785 if stop is specified, it will consider all the revs from stop
781 as if they had no children
786 as if they had no children
782 """
787 """
783 if start is None and stop is None:
788 if start is None and stop is None:
784 if not len(self):
789 if not len(self):
785 return [nullid]
790 return [nullid]
786 return [self.node(r) for r in self.headrevs()]
791 return [self.node(r) for r in self.headrevs()]
787
792
788 if start is None:
793 if start is None:
789 start = nullid
794 start = nullid
790 if stop is None:
795 if stop is None:
791 stop = []
796 stop = []
792 stoprevs = set([self.rev(n) for n in stop])
797 stoprevs = set([self.rev(n) for n in stop])
793 startrev = self.rev(start)
798 startrev = self.rev(start)
794 reachable = set((startrev,))
799 reachable = set((startrev,))
795 heads = set((startrev,))
800 heads = set((startrev,))
796
801
797 parentrevs = self.parentrevs
802 parentrevs = self.parentrevs
798 for r in self.revs(start=startrev + 1):
803 for r in self.revs(start=startrev + 1):
799 for p in parentrevs(r):
804 for p in parentrevs(r):
800 if p in reachable:
805 if p in reachable:
801 if r not in stoprevs:
806 if r not in stoprevs:
802 reachable.add(r)
807 reachable.add(r)
803 heads.add(r)
808 heads.add(r)
804 if p in heads and p not in stoprevs:
809 if p in heads and p not in stoprevs:
805 heads.remove(p)
810 heads.remove(p)
806
811
807 return [self.node(r) for r in heads]
812 return [self.node(r) for r in heads]
808
813
809 def children(self, node):
814 def children(self, node):
810 """find the children of a given node"""
815 """find the children of a given node"""
811 c = []
816 c = []
812 p = self.rev(node)
817 p = self.rev(node)
813 for r in self.revs(start=p + 1):
818 for r in self.revs(start=p + 1):
814 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
819 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
815 if prevs:
820 if prevs:
816 for pr in prevs:
821 for pr in prevs:
817 if pr == p:
822 if pr == p:
818 c.append(self.node(r))
823 c.append(self.node(r))
819 elif p == nullrev:
824 elif p == nullrev:
820 c.append(self.node(r))
825 c.append(self.node(r))
821 return c
826 return c
822
827
823 def descendant(self, start, end):
828 def descendant(self, start, end):
824 if start == nullrev:
829 if start == nullrev:
825 return True
830 return True
826 for i in self.descendants([start]):
831 for i in self.descendants([start]):
827 if i == end:
832 if i == end:
828 return True
833 return True
829 elif i > end:
834 elif i > end:
830 break
835 break
831 return False
836 return False
832
837
833 def commonancestorsheads(self, a, b):
838 def commonancestorsheads(self, a, b):
834 """calculate all the heads of the common ancestors of nodes a and b"""
839 """calculate all the heads of the common ancestors of nodes a and b"""
835 a, b = self.rev(a), self.rev(b)
840 a, b = self.rev(a), self.rev(b)
836 try:
841 try:
837 ancs = self.index.commonancestorsheads(a, b)
842 ancs = self.index.commonancestorsheads(a, b)
838 except (AttributeError, OverflowError): # C implementation failed
843 except (AttributeError, OverflowError): # C implementation failed
839 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
844 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
840 return map(self.node, ancs)
845 return map(self.node, ancs)
841
846
842 def isancestor(self, a, b):
847 def isancestor(self, a, b):
843 """return True if node a is an ancestor of node b
848 """return True if node a is an ancestor of node b
844
849
845 The implementation of this is trivial but the use of
850 The implementation of this is trivial but the use of
846 commonancestorsheads is not."""
851 commonancestorsheads is not."""
847 return a in self.commonancestorsheads(a, b)
852 return a in self.commonancestorsheads(a, b)
848
853
849 def ancestor(self, a, b):
854 def ancestor(self, a, b):
850 """calculate the "best" common ancestor of nodes a and b"""
855 """calculate the "best" common ancestor of nodes a and b"""
851
856
852 a, b = self.rev(a), self.rev(b)
857 a, b = self.rev(a), self.rev(b)
853 try:
858 try:
854 ancs = self.index.ancestors(a, b)
859 ancs = self.index.ancestors(a, b)
855 except (AttributeError, OverflowError):
860 except (AttributeError, OverflowError):
856 ancs = ancestor.ancestors(self.parentrevs, a, b)
861 ancs = ancestor.ancestors(self.parentrevs, a, b)
857 if ancs:
862 if ancs:
858 # choose a consistent winner when there's a tie
863 # choose a consistent winner when there's a tie
859 return min(map(self.node, ancs))
864 return min(map(self.node, ancs))
860 return nullid
865 return nullid
861
866
862 def _match(self, id):
867 def _match(self, id):
863 if isinstance(id, int):
868 if isinstance(id, int):
864 # rev
869 # rev
865 return self.node(id)
870 return self.node(id)
866 if len(id) == 20:
871 if len(id) == 20:
867 # possibly a binary node
872 # possibly a binary node
868 # odds of a binary node being all hex in ASCII are 1 in 10**25
873 # odds of a binary node being all hex in ASCII are 1 in 10**25
869 try:
874 try:
870 node = id
875 node = id
871 self.rev(node) # quick search the index
876 self.rev(node) # quick search the index
872 return node
877 return node
873 except LookupError:
878 except LookupError:
874 pass # may be partial hex id
879 pass # may be partial hex id
875 try:
880 try:
876 # str(rev)
881 # str(rev)
877 rev = int(id)
882 rev = int(id)
878 if str(rev) != id:
883 if str(rev) != id:
879 raise ValueError
884 raise ValueError
880 if rev < 0:
885 if rev < 0:
881 rev = len(self) + rev
886 rev = len(self) + rev
882 if rev < 0 or rev >= len(self):
887 if rev < 0 or rev >= len(self):
883 raise ValueError
888 raise ValueError
884 return self.node(rev)
889 return self.node(rev)
885 except (ValueError, OverflowError):
890 except (ValueError, OverflowError):
886 pass
891 pass
887 if len(id) == 40:
892 if len(id) == 40:
888 try:
893 try:
889 # a full hex nodeid?
894 # a full hex nodeid?
890 node = bin(id)
895 node = bin(id)
891 self.rev(node)
896 self.rev(node)
892 return node
897 return node
893 except (TypeError, LookupError):
898 except (TypeError, LookupError):
894 pass
899 pass
895
900
896 def _partialmatch(self, id):
901 def _partialmatch(self, id):
897 try:
902 try:
898 n = self.index.partialmatch(id)
903 n = self.index.partialmatch(id)
899 if n and self.hasnode(n):
904 if n and self.hasnode(n):
900 return n
905 return n
901 return None
906 return None
902 except RevlogError:
907 except RevlogError:
903 # parsers.c radix tree lookup gave multiple matches
908 # parsers.c radix tree lookup gave multiple matches
904 # fall through to slow path that filters hidden revisions
909 # fall through to slow path that filters hidden revisions
905 pass
910 pass
906 except (AttributeError, ValueError):
911 except (AttributeError, ValueError):
907 # we are pure python, or key was too short to search radix tree
912 # we are pure python, or key was too short to search radix tree
908 pass
913 pass
909
914
910 if id in self._pcache:
915 if id in self._pcache:
911 return self._pcache[id]
916 return self._pcache[id]
912
917
913 if len(id) < 40:
918 if len(id) < 40:
914 try:
919 try:
915 # hex(node)[:...]
920 # hex(node)[:...]
916 l = len(id) // 2 # grab an even number of digits
921 l = len(id) // 2 # grab an even number of digits
917 prefix = bin(id[:l * 2])
922 prefix = bin(id[:l * 2])
918 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
923 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
919 nl = [n for n in nl if hex(n).startswith(id) and
924 nl = [n for n in nl if hex(n).startswith(id) and
920 self.hasnode(n)]
925 self.hasnode(n)]
921 if len(nl) > 0:
926 if len(nl) > 0:
922 if len(nl) == 1:
927 if len(nl) == 1:
923 self._pcache[id] = nl[0]
928 self._pcache[id] = nl[0]
924 return nl[0]
929 return nl[0]
925 raise LookupError(id, self.indexfile,
930 raise LookupError(id, self.indexfile,
926 _('ambiguous identifier'))
931 _('ambiguous identifier'))
927 return None
932 return None
928 except TypeError:
933 except TypeError:
929 pass
934 pass
930
935
931 def lookup(self, id):
936 def lookup(self, id):
932 """locate a node based on:
937 """locate a node based on:
933 - revision number or str(revision number)
938 - revision number or str(revision number)
934 - nodeid or subset of hex nodeid
939 - nodeid or subset of hex nodeid
935 """
940 """
936 n = self._match(id)
941 n = self._match(id)
937 if n is not None:
942 if n is not None:
938 return n
943 return n
939 n = self._partialmatch(id)
944 n = self._partialmatch(id)
940 if n:
945 if n:
941 return n
946 return n
942
947
943 raise LookupError(id, self.indexfile, _('no match found'))
948 raise LookupError(id, self.indexfile, _('no match found'))
944
949
945 def cmp(self, node, text):
950 def cmp(self, node, text):
946 """compare text with a given file revision
951 """compare text with a given file revision
947
952
948 returns True if text is different than what is stored.
953 returns True if text is different than what is stored.
949 """
954 """
950 p1, p2 = self.parents(node)
955 p1, p2 = self.parents(node)
951 return hash(text, p1, p2) != node
956 return hash(text, p1, p2) != node
952
957
953 def _addchunk(self, offset, data):
958 def _addchunk(self, offset, data):
954 """Add a segment to the revlog cache.
959 """Add a segment to the revlog cache.
955
960
956 Accepts an absolute offset and the data that is at that location.
961 Accepts an absolute offset and the data that is at that location.
957 """
962 """
958 o, d = self._chunkcache
963 o, d = self._chunkcache
959 # try to add to existing cache
964 # try to add to existing cache
960 if o + len(d) == offset and len(d) + len(data) < _chunksize:
965 if o + len(d) == offset and len(d) + len(data) < _chunksize:
961 self._chunkcache = o, d + data
966 self._chunkcache = o, d + data
962 else:
967 else:
963 self._chunkcache = offset, data
968 self._chunkcache = offset, data
964
969
965 def _loadchunk(self, offset, length, df=None):
970 def _loadchunk(self, offset, length, df=None):
966 """Load a segment of raw data from the revlog.
971 """Load a segment of raw data from the revlog.
967
972
968 Accepts an absolute offset, length to read, and an optional existing
973 Accepts an absolute offset, length to read, and an optional existing
969 file handle to read from.
974 file handle to read from.
970
975
971 If an existing file handle is passed, it will be seeked and the
976 If an existing file handle is passed, it will be seeked and the
972 original seek position will NOT be restored.
977 original seek position will NOT be restored.
973
978
974 Returns a str or buffer of raw byte data.
979 Returns a str or buffer of raw byte data.
975 """
980 """
976 if df is not None:
981 if df is not None:
977 closehandle = False
982 closehandle = False
978 else:
983 else:
979 if self._inline:
984 if self._inline:
980 df = self.opener(self.indexfile)
985 df = self.opener(self.indexfile)
981 else:
986 else:
982 df = self.opener(self.datafile)
987 df = self.opener(self.datafile)
983 closehandle = True
988 closehandle = True
984
989
985 # Cache data both forward and backward around the requested
990 # Cache data both forward and backward around the requested
986 # data, in a fixed size window. This helps speed up operations
991 # data, in a fixed size window. This helps speed up operations
987 # involving reading the revlog backwards.
992 # involving reading the revlog backwards.
988 cachesize = self._chunkcachesize
993 cachesize = self._chunkcachesize
989 realoffset = offset & ~(cachesize - 1)
994 realoffset = offset & ~(cachesize - 1)
990 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
995 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
991 - realoffset)
996 - realoffset)
992 df.seek(realoffset)
997 df.seek(realoffset)
993 d = df.read(reallength)
998 d = df.read(reallength)
994 if closehandle:
999 if closehandle:
995 df.close()
1000 df.close()
996 self._addchunk(realoffset, d)
1001 self._addchunk(realoffset, d)
997 if offset != realoffset or reallength != length:
1002 if offset != realoffset or reallength != length:
998 return util.buffer(d, offset - realoffset, length)
1003 return util.buffer(d, offset - realoffset, length)
999 return d
1004 return d
1000
1005
1001 def _getchunk(self, offset, length, df=None):
1006 def _getchunk(self, offset, length, df=None):
1002 """Obtain a segment of raw data from the revlog.
1007 """Obtain a segment of raw data from the revlog.
1003
1008
1004 Accepts an absolute offset, length of bytes to obtain, and an
1009 Accepts an absolute offset, length of bytes to obtain, and an
1005 optional file handle to the already-opened revlog. If the file
1010 optional file handle to the already-opened revlog. If the file
1006 handle is used, it's original seek position will not be preserved.
1011 handle is used, it's original seek position will not be preserved.
1007
1012
1008 Requests for data may be returned from a cache.
1013 Requests for data may be returned from a cache.
1009
1014
1010 Returns a str or a buffer instance of raw byte data.
1015 Returns a str or a buffer instance of raw byte data.
1011 """
1016 """
1012 o, d = self._chunkcache
1017 o, d = self._chunkcache
1013 l = len(d)
1018 l = len(d)
1014
1019
1015 # is it in the cache?
1020 # is it in the cache?
1016 cachestart = offset - o
1021 cachestart = offset - o
1017 cacheend = cachestart + length
1022 cacheend = cachestart + length
1018 if cachestart >= 0 and cacheend <= l:
1023 if cachestart >= 0 and cacheend <= l:
1019 if cachestart == 0 and cacheend == l:
1024 if cachestart == 0 and cacheend == l:
1020 return d # avoid a copy
1025 return d # avoid a copy
1021 return util.buffer(d, cachestart, cacheend - cachestart)
1026 return util.buffer(d, cachestart, cacheend - cachestart)
1022
1027
1023 return self._loadchunk(offset, length, df=df)
1028 return self._loadchunk(offset, length, df=df)
1024
1029
1025 def _chunkraw(self, startrev, endrev, df=None):
1030 def _chunkraw(self, startrev, endrev, df=None):
1026 """Obtain a segment of raw data corresponding to a range of revisions.
1031 """Obtain a segment of raw data corresponding to a range of revisions.
1027
1032
1028 Accepts the start and end revisions and an optional already-open
1033 Accepts the start and end revisions and an optional already-open
1029 file handle to be used for reading. If the file handle is read, its
1034 file handle to be used for reading. If the file handle is read, its
1030 seek position will not be preserved.
1035 seek position will not be preserved.
1031
1036
1032 Requests for data may be satisfied by a cache.
1037 Requests for data may be satisfied by a cache.
1033
1038
1034 Returns a str or a buffer instance of raw byte data. Callers will
1039 Returns a str or a buffer instance of raw byte data. Callers will
1035 need to call ``self.start(rev)`` and ``self.length()`` to determine
1040 need to call ``self.start(rev)`` and ``self.length()`` to determine
1036 where each revision's data begins and ends.
1041 where each revision's data begins and ends.
1037 """
1042 """
1038 start = self.start(startrev)
1043 start = self.start(startrev)
1039 end = self.end(endrev)
1044 end = self.end(endrev)
1040 if self._inline:
1045 if self._inline:
1041 start += (startrev + 1) * self._io.size
1046 start += (startrev + 1) * self._io.size
1042 end += (endrev + 1) * self._io.size
1047 end += (endrev + 1) * self._io.size
1043 length = end - start
1048 length = end - start
1044 return self._getchunk(start, length, df=df)
1049 return self._getchunk(start, length, df=df)
1045
1050
1046 def _chunk(self, rev, df=None):
1051 def _chunk(self, rev, df=None):
1047 """Obtain a single decompressed chunk for a revision.
1052 """Obtain a single decompressed chunk for a revision.
1048
1053
1049 Accepts an integer revision and an optional already-open file handle
1054 Accepts an integer revision and an optional already-open file handle
1050 to be used for reading. If used, the seek position of the file will not
1055 to be used for reading. If used, the seek position of the file will not
1051 be preserved.
1056 be preserved.
1052
1057
1053 Returns a str holding uncompressed data for the requested revision.
1058 Returns a str holding uncompressed data for the requested revision.
1054 """
1059 """
1055 return decompress(self._chunkraw(rev, rev, df=df))
1060 return decompress(self._chunkraw(rev, rev, df=df))
1056
1061
1057 def _chunks(self, revs, df=None):
1062 def _chunks(self, revs, df=None):
1058 """Obtain decompressed chunks for the specified revisions.
1063 """Obtain decompressed chunks for the specified revisions.
1059
1064
1060 Accepts an iterable of numeric revisions that are assumed to be in
1065 Accepts an iterable of numeric revisions that are assumed to be in
1061 ascending order. Also accepts an optional already-open file handle
1066 ascending order. Also accepts an optional already-open file handle
1062 to be used for reading. If used, the seek position of the file will
1067 to be used for reading. If used, the seek position of the file will
1063 not be preserved.
1068 not be preserved.
1064
1069
1065 This function is similar to calling ``self._chunk()`` multiple times,
1070 This function is similar to calling ``self._chunk()`` multiple times,
1066 but is faster.
1071 but is faster.
1067
1072
1068 Returns a list with decompressed data for each requested revision.
1073 Returns a list with decompressed data for each requested revision.
1069 """
1074 """
1070 if not revs:
1075 if not revs:
1071 return []
1076 return []
1072 start = self.start
1077 start = self.start
1073 length = self.length
1078 length = self.length
1074 inline = self._inline
1079 inline = self._inline
1075 iosize = self._io.size
1080 iosize = self._io.size
1076 buffer = util.buffer
1081 buffer = util.buffer
1077
1082
1078 l = []
1083 l = []
1079 ladd = l.append
1084 ladd = l.append
1080
1085
1081 # preload the cache
1086 # preload the cache
1082 try:
1087 try:
1083 while True:
1088 while True:
1084 # ensure that the cache doesn't change out from under us
1089 # ensure that the cache doesn't change out from under us
1085 _cache = self._chunkcache
1090 _cache = self._chunkcache
1086 self._chunkraw(revs[0], revs[-1], df=df)
1091 self._chunkraw(revs[0], revs[-1], df=df)
1087 if _cache == self._chunkcache:
1092 if _cache == self._chunkcache:
1088 break
1093 break
1089 offset, data = _cache
1094 offset, data = _cache
1090 except OverflowError:
1095 except OverflowError:
1091 # issue4215 - we can't cache a run of chunks greater than
1096 # issue4215 - we can't cache a run of chunks greater than
1092 # 2G on Windows
1097 # 2G on Windows
1093 return [self._chunk(rev, df=df) for rev in revs]
1098 return [self._chunk(rev, df=df) for rev in revs]
1094
1099
1095 for rev in revs:
1100 for rev in revs:
1096 chunkstart = start(rev)
1101 chunkstart = start(rev)
1097 if inline:
1102 if inline:
1098 chunkstart += (rev + 1) * iosize
1103 chunkstart += (rev + 1) * iosize
1099 chunklength = length(rev)
1104 chunklength = length(rev)
1100 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1105 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1101
1106
1102 return l
1107 return l
1103
1108
1104 def _chunkclear(self):
1109 def _chunkclear(self):
1105 """Clear the raw chunk cache."""
1110 """Clear the raw chunk cache."""
1106 self._chunkcache = (0, '')
1111 self._chunkcache = (0, '')
1107
1112
1108 def deltaparent(self, rev):
1113 def deltaparent(self, rev):
1109 """return deltaparent of the given revision"""
1114 """return deltaparent of the given revision"""
1110 base = self.index[rev][3]
1115 base = self.index[rev][3]
1111 if base == rev:
1116 if base == rev:
1112 return nullrev
1117 return nullrev
1113 elif self._generaldelta:
1118 elif self._generaldelta:
1114 return base
1119 return base
1115 else:
1120 else:
1116 return rev - 1
1121 return rev - 1
1117
1122
1118 def revdiff(self, rev1, rev2):
1123 def revdiff(self, rev1, rev2):
1119 """return or calculate a delta between two revisions"""
1124 """return or calculate a delta between two revisions"""
1120 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1125 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1121 return str(self._chunk(rev2))
1126 return str(self._chunk(rev2))
1122
1127
1123 return mdiff.textdiff(self.revision(rev1),
1128 return mdiff.textdiff(self.revision(rev1),
1124 self.revision(rev2))
1129 self.revision(rev2))
1125
1130
1126 def revision(self, nodeorrev, _df=None):
1131 def revision(self, nodeorrev, _df=None):
1127 """return an uncompressed revision of a given node or revision
1132 """return an uncompressed revision of a given node or revision
1128 number.
1133 number.
1129
1134
1130 _df is an existing file handle to read from. It is meant to only be
1135 _df is an existing file handle to read from. It is meant to only be
1131 used internally.
1136 used internally.
1132 """
1137 """
1133 if isinstance(nodeorrev, int):
1138 if isinstance(nodeorrev, int):
1134 rev = nodeorrev
1139 rev = nodeorrev
1135 node = self.node(rev)
1140 node = self.node(rev)
1136 else:
1141 else:
1137 node = nodeorrev
1142 node = nodeorrev
1138 rev = None
1143 rev = None
1139
1144
1140 cachedrev = None
1145 cachedrev = None
1141 if node == nullid:
1146 if node == nullid:
1142 return ""
1147 return ""
1143 if self._cache:
1148 if self._cache:
1144 if self._cache[0] == node:
1149 if self._cache[0] == node:
1145 return self._cache[2]
1150 return self._cache[2]
1146 cachedrev = self._cache[1]
1151 cachedrev = self._cache[1]
1147
1152
1148 # look up what we need to read
1153 # look up what we need to read
1149 text = None
1154 text = None
1150 if rev is None:
1155 if rev is None:
1151 rev = self.rev(node)
1156 rev = self.rev(node)
1152
1157
1153 # check rev flags
1158 # check rev flags
1154 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1159 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1155 raise RevlogError(_('incompatible revision flag %x') %
1160 raise RevlogError(_('incompatible revision flag %x') %
1156 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1161 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1157
1162
1158 # build delta chain
1163 # build delta chain
1159 chain = []
1164 chain = []
1160 index = self.index # for performance
1165 index = self.index # for performance
1161 generaldelta = self._generaldelta
1166 generaldelta = self._generaldelta
1162 iterrev = rev
1167 iterrev = rev
1163 e = index[iterrev]
1168 e = index[iterrev]
1164 while iterrev != e[3] and iterrev != cachedrev:
1169 while iterrev != e[3] and iterrev != cachedrev:
1165 chain.append(iterrev)
1170 chain.append(iterrev)
1166 if generaldelta:
1171 if generaldelta:
1167 iterrev = e[3]
1172 iterrev = e[3]
1168 else:
1173 else:
1169 iterrev -= 1
1174 iterrev -= 1
1170 e = index[iterrev]
1175 e = index[iterrev]
1171
1176
1172 if iterrev == cachedrev:
1177 if iterrev == cachedrev:
1173 # cache hit
1178 # cache hit
1174 text = self._cache[2]
1179 text = self._cache[2]
1175 else:
1180 else:
1176 chain.append(iterrev)
1181 chain.append(iterrev)
1177 chain.reverse()
1182 chain.reverse()
1178
1183
1179 # drop cache to save memory
1184 # drop cache to save memory
1180 self._cache = None
1185 self._cache = None
1181
1186
1182 bins = self._chunks(chain, df=_df)
1187 bins = self._chunks(chain, df=_df)
1183 if text is None:
1188 if text is None:
1184 text = str(bins[0])
1189 text = str(bins[0])
1185 bins = bins[1:]
1190 bins = bins[1:]
1186
1191
1187 text = mdiff.patches(text, bins)
1192 text = mdiff.patches(text, bins)
1188
1193
1189 text = self._checkhash(text, node, rev)
1194 text = self._checkhash(text, node, rev)
1190
1195
1191 self._cache = (node, rev, text)
1196 self._cache = (node, rev, text)
1192 return text
1197 return text
1193
1198
1194 def hash(self, text, p1, p2):
1199 def hash(self, text, p1, p2):
1195 """Compute a node hash.
1200 """Compute a node hash.
1196
1201
1197 Available as a function so that subclasses can replace the hash
1202 Available as a function so that subclasses can replace the hash
1198 as needed.
1203 as needed.
1199 """
1204 """
1200 return hash(text, p1, p2)
1205 return hash(text, p1, p2)
1201
1206
1202 def _checkhash(self, text, node, rev):
1207 def _checkhash(self, text, node, rev):
1203 p1, p2 = self.parents(node)
1208 p1, p2 = self.parents(node)
1204 self.checkhash(text, p1, p2, node, rev)
1209 self.checkhash(text, p1, p2, node, rev)
1205 return text
1210 return text
1206
1211
1207 def checkhash(self, text, p1, p2, node, rev=None):
1212 def checkhash(self, text, p1, p2, node, rev=None):
1208 if node != self.hash(text, p1, p2):
1213 if node != self.hash(text, p1, p2):
1209 revornode = rev
1214 revornode = rev
1210 if revornode is None:
1215 if revornode is None:
1211 revornode = templatefilters.short(hex(node))
1216 revornode = templatefilters.short(hex(node))
1212 raise RevlogError(_("integrity check failed on %s:%s")
1217 raise RevlogError(_("integrity check failed on %s:%s")
1213 % (self.indexfile, revornode))
1218 % (self.indexfile, revornode))
1214
1219
1215 def checkinlinesize(self, tr, fp=None):
1220 def checkinlinesize(self, tr, fp=None):
1216 """Check if the revlog is too big for inline and convert if so.
1221 """Check if the revlog is too big for inline and convert if so.
1217
1222
1218 This should be called after revisions are added to the revlog. If the
1223 This should be called after revisions are added to the revlog. If the
1219 revlog has grown too large to be an inline revlog, it will convert it
1224 revlog has grown too large to be an inline revlog, it will convert it
1220 to use multiple index and data files.
1225 to use multiple index and data files.
1221 """
1226 """
1222 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1227 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1223 return
1228 return
1224
1229
1225 trinfo = tr.find(self.indexfile)
1230 trinfo = tr.find(self.indexfile)
1226 if trinfo is None:
1231 if trinfo is None:
1227 raise RevlogError(_("%s not found in the transaction")
1232 raise RevlogError(_("%s not found in the transaction")
1228 % self.indexfile)
1233 % self.indexfile)
1229
1234
1230 trindex = trinfo[2]
1235 trindex = trinfo[2]
1231 if trindex is not None:
1236 if trindex is not None:
1232 dataoff = self.start(trindex)
1237 dataoff = self.start(trindex)
1233 else:
1238 else:
1234 # revlog was stripped at start of transaction, use all leftover data
1239 # revlog was stripped at start of transaction, use all leftover data
1235 trindex = len(self) - 1
1240 trindex = len(self) - 1
1236 dataoff = self.end(-2)
1241 dataoff = self.end(-2)
1237
1242
1238 tr.add(self.datafile, dataoff)
1243 tr.add(self.datafile, dataoff)
1239
1244
1240 if fp:
1245 if fp:
1241 fp.flush()
1246 fp.flush()
1242 fp.close()
1247 fp.close()
1243
1248
1244 df = self.opener(self.datafile, 'w')
1249 df = self.opener(self.datafile, 'w')
1245 try:
1250 try:
1246 for r in self:
1251 for r in self:
1247 df.write(self._chunkraw(r, r))
1252 df.write(self._chunkraw(r, r))
1248 finally:
1253 finally:
1249 df.close()
1254 df.close()
1250
1255
1251 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1256 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1252 self.version &= ~(REVLOGNGINLINEDATA)
1257 self.version &= ~(REVLOGNGINLINEDATA)
1253 self._inline = False
1258 self._inline = False
1254 for i in self:
1259 for i in self:
1255 e = self._io.packentry(self.index[i], self.node, self.version, i)
1260 e = self._io.packentry(self.index[i], self.node, self.version, i)
1256 fp.write(e)
1261 fp.write(e)
1257
1262
1258 # if we don't call close, the temp file will never replace the
1263 # if we don't call close, the temp file will never replace the
1259 # real index
1264 # real index
1260 fp.close()
1265 fp.close()
1261
1266
1262 tr.replace(self.indexfile, trindex * self._io.size)
1267 tr.replace(self.indexfile, trindex * self._io.size)
1263 self._chunkclear()
1268 self._chunkclear()
1264
1269
1265 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1270 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1266 node=None):
1271 node=None):
1267 """add a revision to the log
1272 """add a revision to the log
1268
1273
1269 text - the revision data to add
1274 text - the revision data to add
1270 transaction - the transaction object used for rollback
1275 transaction - the transaction object used for rollback
1271 link - the linkrev data to add
1276 link - the linkrev data to add
1272 p1, p2 - the parent nodeids of the revision
1277 p1, p2 - the parent nodeids of the revision
1273 cachedelta - an optional precomputed delta
1278 cachedelta - an optional precomputed delta
1274 node - nodeid of revision; typically node is not specified, and it is
1279 node - nodeid of revision; typically node is not specified, and it is
1275 computed by default as hash(text, p1, p2), however subclasses might
1280 computed by default as hash(text, p1, p2), however subclasses might
1276 use different hashing method (and override checkhash() in such case)
1281 use different hashing method (and override checkhash() in such case)
1277 """
1282 """
1278 if link == nullrev:
1283 if link == nullrev:
1279 raise RevlogError(_("attempted to add linkrev -1 to %s")
1284 raise RevlogError(_("attempted to add linkrev -1 to %s")
1280 % self.indexfile)
1285 % self.indexfile)
1281
1286
1282 if len(text) > _maxentrysize:
1287 if len(text) > _maxentrysize:
1283 raise RevlogError(
1288 raise RevlogError(
1284 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1289 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1285 % (self.indexfile, len(text)))
1290 % (self.indexfile, len(text)))
1286
1291
1287 node = node or self.hash(text, p1, p2)
1292 node = node or self.hash(text, p1, p2)
1288 if node in self.nodemap:
1293 if node in self.nodemap:
1289 return node
1294 return node
1290
1295
1291 dfh = None
1296 dfh = None
1292 if not self._inline:
1297 if not self._inline:
1293 dfh = self.opener(self.datafile, "a+")
1298 dfh = self.opener(self.datafile, "a+")
1294 ifh = self.opener(self.indexfile, "a+")
1299 ifh = self.opener(self.indexfile, "a+")
1295 try:
1300 try:
1296 return self._addrevision(node, text, transaction, link, p1, p2,
1301 return self._addrevision(node, text, transaction, link, p1, p2,
1297 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1302 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1298 finally:
1303 finally:
1299 if dfh:
1304 if dfh:
1300 dfh.close()
1305 dfh.close()
1301 ifh.close()
1306 ifh.close()
1302
1307
1303 def compress(self, text):
1308 def compress(self, text):
1304 """ generate a possibly-compressed representation of text """
1309 """ generate a possibly-compressed representation of text """
1305 if not text:
1310 if not text:
1306 return ("", text)
1311 return ("", text)
1307 l = len(text)
1312 l = len(text)
1308 bin = None
1313 bin = None
1309 if l < 44:
1314 if l < 44:
1310 pass
1315 pass
1311 elif l > 1000000:
1316 elif l > 1000000:
1312 # zlib makes an internal copy, thus doubling memory usage for
1317 # zlib makes an internal copy, thus doubling memory usage for
1313 # large files, so lets do this in pieces
1318 # large files, so lets do this in pieces
1314 z = zlib.compressobj()
1319 z = zlib.compressobj()
1315 p = []
1320 p = []
1316 pos = 0
1321 pos = 0
1317 while pos < l:
1322 while pos < l:
1318 pos2 = pos + 2**20
1323 pos2 = pos + 2**20
1319 p.append(z.compress(text[pos:pos2]))
1324 p.append(z.compress(text[pos:pos2]))
1320 pos = pos2
1325 pos = pos2
1321 p.append(z.flush())
1326 p.append(z.flush())
1322 if sum(map(len, p)) < l:
1327 if sum(map(len, p)) < l:
1323 bin = "".join(p)
1328 bin = "".join(p)
1324 else:
1329 else:
1325 bin = _compress(text)
1330 bin = _compress(text)
1326 if bin is None or len(bin) > l:
1331 if bin is None or len(bin) > l:
1327 if text[0] == '\0':
1332 if text[0] == '\0':
1328 return ("", text)
1333 return ("", text)
1329 return ('u', text)
1334 return ('u', text)
1330 return ("", bin)
1335 return ("", bin)
1331
1336
1332 def _isgooddelta(self, d, textlen):
1337 def _isgooddelta(self, d, textlen):
1333 """Returns True if the given delta is good. Good means that it is within
1338 """Returns True if the given delta is good. Good means that it is within
1334 the disk span, disk size, and chain length bounds that we know to be
1339 the disk span, disk size, and chain length bounds that we know to be
1335 performant."""
1340 performant."""
1336 if d is None:
1341 if d is None:
1337 return False
1342 return False
1338
1343
1339 # - 'dist' is the distance from the base revision -- bounding it limits
1344 # - 'dist' is the distance from the base revision -- bounding it limits
1340 # the amount of I/O we need to do.
1345 # the amount of I/O we need to do.
1341 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1346 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1342 # to apply -- bounding it limits the amount of CPU we consume.
1347 # to apply -- bounding it limits the amount of CPU we consume.
1343 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1348 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1344 if (dist > textlen * 4 or l > textlen or
1349 if (dist > textlen * 4 or l > textlen or
1345 compresseddeltalen > textlen * 2 or
1350 compresseddeltalen > textlen * 2 or
1346 (self._maxchainlen and chainlen > self._maxchainlen)):
1351 (self._maxchainlen and chainlen > self._maxchainlen)):
1347 return False
1352 return False
1348
1353
1349 return True
1354 return True
1350
1355
1351 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1356 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1352 cachedelta, ifh, dfh, alwayscache=False):
1357 cachedelta, ifh, dfh, alwayscache=False):
1353 """internal function to add revisions to the log
1358 """internal function to add revisions to the log
1354
1359
1355 see addrevision for argument descriptions.
1360 see addrevision for argument descriptions.
1356 invariants:
1361 invariants:
1357 - text is optional (can be None); if not set, cachedelta must be set.
1362 - text is optional (can be None); if not set, cachedelta must be set.
1358 if both are set, they must correspond to each other.
1363 if both are set, they must correspond to each other.
1359 """
1364 """
1360 btext = [text]
1365 btext = [text]
1361 def buildtext():
1366 def buildtext():
1362 if btext[0] is not None:
1367 if btext[0] is not None:
1363 return btext[0]
1368 return btext[0]
1364 baserev = cachedelta[0]
1369 baserev = cachedelta[0]
1365 delta = cachedelta[1]
1370 delta = cachedelta[1]
1366 # special case deltas which replace entire base; no need to decode
1371 # special case deltas which replace entire base; no need to decode
1367 # base revision. this neatly avoids censored bases, which throw when
1372 # base revision. this neatly avoids censored bases, which throw when
1368 # they're decoded.
1373 # they're decoded.
1369 hlen = struct.calcsize(">lll")
1374 hlen = struct.calcsize(">lll")
1370 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1375 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1371 len(delta) - hlen):
1376 len(delta) - hlen):
1372 btext[0] = delta[hlen:]
1377 btext[0] = delta[hlen:]
1373 else:
1378 else:
1374 if self._inline:
1379 if self._inline:
1375 fh = ifh
1380 fh = ifh
1376 else:
1381 else:
1377 fh = dfh
1382 fh = dfh
1378 basetext = self.revision(self.node(baserev), _df=fh)
1383 basetext = self.revision(self.node(baserev), _df=fh)
1379 btext[0] = mdiff.patch(basetext, delta)
1384 btext[0] = mdiff.patch(basetext, delta)
1380 try:
1385 try:
1381 self.checkhash(btext[0], p1, p2, node)
1386 self.checkhash(btext[0], p1, p2, node)
1382 if flags & REVIDX_ISCENSORED:
1387 if flags & REVIDX_ISCENSORED:
1383 raise RevlogError(_('node %s is not censored') % node)
1388 raise RevlogError(_('node %s is not censored') % node)
1384 except CensoredNodeError:
1389 except CensoredNodeError:
1385 # must pass the censored index flag to add censored revisions
1390 # must pass the censored index flag to add censored revisions
1386 if not flags & REVIDX_ISCENSORED:
1391 if not flags & REVIDX_ISCENSORED:
1387 raise
1392 raise
1388 return btext[0]
1393 return btext[0]
1389
1394
1390 def builddelta(rev):
1395 def builddelta(rev):
1391 # can we use the cached delta?
1396 # can we use the cached delta?
1392 if cachedelta and cachedelta[0] == rev:
1397 if cachedelta and cachedelta[0] == rev:
1393 delta = cachedelta[1]
1398 delta = cachedelta[1]
1394 else:
1399 else:
1395 t = buildtext()
1400 t = buildtext()
1396 if self.iscensored(rev):
1401 if self.iscensored(rev):
1397 # deltas based on a censored revision must replace the
1402 # deltas based on a censored revision must replace the
1398 # full content in one patch, so delta works everywhere
1403 # full content in one patch, so delta works everywhere
1399 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1404 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1400 delta = header + t
1405 delta = header + t
1401 else:
1406 else:
1402 if self._inline:
1407 if self._inline:
1403 fh = ifh
1408 fh = ifh
1404 else:
1409 else:
1405 fh = dfh
1410 fh = dfh
1406 ptext = self.revision(self.node(rev), _df=fh)
1411 ptext = self.revision(self.node(rev), _df=fh)
1407 delta = mdiff.textdiff(ptext, t)
1412 delta = mdiff.textdiff(ptext, t)
1408 data = self.compress(delta)
1413 data = self.compress(delta)
1409 l = len(data[1]) + len(data[0])
1414 l = len(data[1]) + len(data[0])
1410 if basecache[0] == rev:
1415 if basecache[0] == rev:
1411 chainbase = basecache[1]
1416 chainbase = basecache[1]
1412 else:
1417 else:
1413 chainbase = self.chainbase(rev)
1418 chainbase = self.chainbase(rev)
1414 dist = l + offset - self.start(chainbase)
1419 dist = l + offset - self.start(chainbase)
1415 if self._generaldelta:
1420 if self._generaldelta:
1416 base = rev
1421 base = rev
1417 else:
1422 else:
1418 base = chainbase
1423 base = chainbase
1419 chainlen, compresseddeltalen = self._chaininfo(rev)
1424 chainlen, compresseddeltalen = self._chaininfo(rev)
1420 chainlen += 1
1425 chainlen += 1
1421 compresseddeltalen += l
1426 compresseddeltalen += l
1422 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1427 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1423
1428
1424 curr = len(self)
1429 curr = len(self)
1425 prev = curr - 1
1430 prev = curr - 1
1426 base = chainbase = curr
1431 base = chainbase = curr
1427 offset = self.end(prev)
1432 offset = self.end(prev)
1428 delta = None
1433 delta = None
1429 if self._basecache is None:
1434 if self._basecache is None:
1430 self._basecache = (prev, self.chainbase(prev))
1435 self._basecache = (prev, self.chainbase(prev))
1431 basecache = self._basecache
1436 basecache = self._basecache
1432 p1r, p2r = self.rev(p1), self.rev(p2)
1437 p1r, p2r = self.rev(p1), self.rev(p2)
1433
1438
1434 # full versions are inserted when the needed deltas
1439 # full versions are inserted when the needed deltas
1435 # become comparable to the uncompressed text
1440 # become comparable to the uncompressed text
1436 if text is None:
1441 if text is None:
1437 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1442 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1438 cachedelta[1])
1443 cachedelta[1])
1439 else:
1444 else:
1440 textlen = len(text)
1445 textlen = len(text)
1441
1446
1442 # should we try to build a delta?
1447 # should we try to build a delta?
1443 if prev != nullrev:
1448 if prev != nullrev:
1444 tested = set()
1449 tested = set()
1445 if cachedelta and self._generaldelta and self._lazydeltabase:
1450 if cachedelta and self._generaldelta and self._lazydeltabase:
1446 # Assume what we received from the server is a good choice
1451 # Assume what we received from the server is a good choice
1447 # build delta will reuse the cache
1452 # build delta will reuse the cache
1448 candidatedelta = builddelta(cachedelta[0])
1453 candidatedelta = builddelta(cachedelta[0])
1449 tested.add(cachedelta[0])
1454 tested.add(cachedelta[0])
1450 if self._isgooddelta(candidatedelta, textlen):
1455 if self._isgooddelta(candidatedelta, textlen):
1451 delta = candidatedelta
1456 delta = candidatedelta
1452 if delta is None and self._generaldelta:
1457 if delta is None and self._generaldelta:
1453 # exclude already lazy tested base if any
1458 # exclude already lazy tested base if any
1454 parents = [p for p in (p1r, p2r)
1459 parents = [p for p in (p1r, p2r)
1455 if p != nullrev and p not in tested]
1460 if p != nullrev and p not in tested]
1456 if parents and not self._aggressivemergedeltas:
1461 if parents and not self._aggressivemergedeltas:
1457 # Pick whichever parent is closer to us (to minimize the
1462 # Pick whichever parent is closer to us (to minimize the
1458 # chance of having to build a fulltext).
1463 # chance of having to build a fulltext).
1459 parents = [max(parents)]
1464 parents = [max(parents)]
1460 tested.update(parents)
1465 tested.update(parents)
1461 pdeltas = []
1466 pdeltas = []
1462 for p in parents:
1467 for p in parents:
1463 pd = builddelta(p)
1468 pd = builddelta(p)
1464 if self._isgooddelta(pd, textlen):
1469 if self._isgooddelta(pd, textlen):
1465 pdeltas.append(pd)
1470 pdeltas.append(pd)
1466 if pdeltas:
1471 if pdeltas:
1467 delta = min(pdeltas, key=lambda x: x[1])
1472 delta = min(pdeltas, key=lambda x: x[1])
1468 if delta is None and prev not in tested:
1473 if delta is None and prev not in tested:
1469 # other approach failed try against prev to hopefully save us a
1474 # other approach failed try against prev to hopefully save us a
1470 # fulltext.
1475 # fulltext.
1471 candidatedelta = builddelta(prev)
1476 candidatedelta = builddelta(prev)
1472 if self._isgooddelta(candidatedelta, textlen):
1477 if self._isgooddelta(candidatedelta, textlen):
1473 delta = candidatedelta
1478 delta = candidatedelta
1474 if delta is not None:
1479 if delta is not None:
1475 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1480 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1476 else:
1481 else:
1477 text = buildtext()
1482 text = buildtext()
1478 data = self.compress(text)
1483 data = self.compress(text)
1479 l = len(data[1]) + len(data[0])
1484 l = len(data[1]) + len(data[0])
1480 base = chainbase = curr
1485 base = chainbase = curr
1481
1486
1482 e = (offset_type(offset, flags), l, textlen,
1487 e = (offset_type(offset, flags), l, textlen,
1483 base, link, p1r, p2r, node)
1488 base, link, p1r, p2r, node)
1484 self.index.insert(-1, e)
1489 self.index.insert(-1, e)
1485 self.nodemap[node] = curr
1490 self.nodemap[node] = curr
1486
1491
1487 entry = self._io.packentry(e, self.node, self.version, curr)
1492 entry = self._io.packentry(e, self.node, self.version, curr)
1488 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1493 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1489
1494
1490 if alwayscache and text is None:
1495 if alwayscache and text is None:
1491 text = buildtext()
1496 text = buildtext()
1492
1497
1493 if type(text) == str: # only accept immutable objects
1498 if type(text) == str: # only accept immutable objects
1494 self._cache = (node, curr, text)
1499 self._cache = (node, curr, text)
1495 self._basecache = (curr, chainbase)
1500 self._basecache = (curr, chainbase)
1496 return node
1501 return node
1497
1502
1498 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1503 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1499 # Files opened in a+ mode have inconsistent behavior on various
1504 # Files opened in a+ mode have inconsistent behavior on various
1500 # platforms. Windows requires that a file positioning call be made
1505 # platforms. Windows requires that a file positioning call be made
1501 # when the file handle transitions between reads and writes. See
1506 # when the file handle transitions between reads and writes. See
1502 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1507 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1503 # platforms, Python or the platform itself can be buggy. Some versions
1508 # platforms, Python or the platform itself can be buggy. Some versions
1504 # of Solaris have been observed to not append at the end of the file
1509 # of Solaris have been observed to not append at the end of the file
1505 # if the file was seeked to before the end. See issue4943 for more.
1510 # if the file was seeked to before the end. See issue4943 for more.
1506 #
1511 #
1507 # We work around this issue by inserting a seek() before writing.
1512 # We work around this issue by inserting a seek() before writing.
1508 # Note: This is likely not necessary on Python 3.
1513 # Note: This is likely not necessary on Python 3.
1509 ifh.seek(0, os.SEEK_END)
1514 ifh.seek(0, os.SEEK_END)
1510 if dfh:
1515 if dfh:
1511 dfh.seek(0, os.SEEK_END)
1516 dfh.seek(0, os.SEEK_END)
1512
1517
1513 curr = len(self) - 1
1518 curr = len(self) - 1
1514 if not self._inline:
1519 if not self._inline:
1515 transaction.add(self.datafile, offset)
1520 transaction.add(self.datafile, offset)
1516 transaction.add(self.indexfile, curr * len(entry))
1521 transaction.add(self.indexfile, curr * len(entry))
1517 if data[0]:
1522 if data[0]:
1518 dfh.write(data[0])
1523 dfh.write(data[0])
1519 dfh.write(data[1])
1524 dfh.write(data[1])
1520 ifh.write(entry)
1525 ifh.write(entry)
1521 else:
1526 else:
1522 offset += curr * self._io.size
1527 offset += curr * self._io.size
1523 transaction.add(self.indexfile, offset, curr)
1528 transaction.add(self.indexfile, offset, curr)
1524 ifh.write(entry)
1529 ifh.write(entry)
1525 ifh.write(data[0])
1530 ifh.write(data[0])
1526 ifh.write(data[1])
1531 ifh.write(data[1])
1527 self.checkinlinesize(transaction, ifh)
1532 self.checkinlinesize(transaction, ifh)
1528
1533
1529 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1534 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1530 """
1535 """
1531 add a delta group
1536 add a delta group
1532
1537
1533 given a set of deltas, add them to the revision log. the
1538 given a set of deltas, add them to the revision log. the
1534 first delta is against its parent, which should be in our
1539 first delta is against its parent, which should be in our
1535 log, the rest are against the previous delta.
1540 log, the rest are against the previous delta.
1536
1541
1537 If ``addrevisioncb`` is defined, it will be called with arguments of
1542 If ``addrevisioncb`` is defined, it will be called with arguments of
1538 this revlog and the node that was added.
1543 this revlog and the node that was added.
1539 """
1544 """
1540
1545
1541 # track the base of the current delta log
1546 # track the base of the current delta log
1542 content = []
1547 content = []
1543 node = None
1548 node = None
1544
1549
1545 r = len(self)
1550 r = len(self)
1546 end = 0
1551 end = 0
1547 if r:
1552 if r:
1548 end = self.end(r - 1)
1553 end = self.end(r - 1)
1549 ifh = self.opener(self.indexfile, "a+")
1554 ifh = self.opener(self.indexfile, "a+")
1550 isize = r * self._io.size
1555 isize = r * self._io.size
1551 if self._inline:
1556 if self._inline:
1552 transaction.add(self.indexfile, end + isize, r)
1557 transaction.add(self.indexfile, end + isize, r)
1553 dfh = None
1558 dfh = None
1554 else:
1559 else:
1555 transaction.add(self.indexfile, isize, r)
1560 transaction.add(self.indexfile, isize, r)
1556 transaction.add(self.datafile, end)
1561 transaction.add(self.datafile, end)
1557 dfh = self.opener(self.datafile, "a+")
1562 dfh = self.opener(self.datafile, "a+")
1558 def flush():
1563 def flush():
1559 if dfh:
1564 if dfh:
1560 dfh.flush()
1565 dfh.flush()
1561 ifh.flush()
1566 ifh.flush()
1562 try:
1567 try:
1563 # loop through our set of deltas
1568 # loop through our set of deltas
1564 chain = None
1569 chain = None
1565 while True:
1570 while True:
1566 chunkdata = cg.deltachunk(chain)
1571 chunkdata = cg.deltachunk(chain)
1567 if not chunkdata:
1572 if not chunkdata:
1568 break
1573 break
1569 node = chunkdata['node']
1574 node = chunkdata['node']
1570 p1 = chunkdata['p1']
1575 p1 = chunkdata['p1']
1571 p2 = chunkdata['p2']
1576 p2 = chunkdata['p2']
1572 cs = chunkdata['cs']
1577 cs = chunkdata['cs']
1573 deltabase = chunkdata['deltabase']
1578 deltabase = chunkdata['deltabase']
1574 delta = chunkdata['delta']
1579 delta = chunkdata['delta']
1575 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1580 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1576
1581
1577 content.append(node)
1582 content.append(node)
1578
1583
1579 link = linkmapper(cs)
1584 link = linkmapper(cs)
1580 if node in self.nodemap:
1585 if node in self.nodemap:
1581 # this can happen if two branches make the same change
1586 # this can happen if two branches make the same change
1582 chain = node
1587 chain = node
1583 continue
1588 continue
1584
1589
1585 for p in (p1, p2):
1590 for p in (p1, p2):
1586 if p not in self.nodemap:
1591 if p not in self.nodemap:
1587 raise LookupError(p, self.indexfile,
1592 raise LookupError(p, self.indexfile,
1588 _('unknown parent'))
1593 _('unknown parent'))
1589
1594
1590 if deltabase not in self.nodemap:
1595 if deltabase not in self.nodemap:
1591 raise LookupError(deltabase, self.indexfile,
1596 raise LookupError(deltabase, self.indexfile,
1592 _('unknown delta base'))
1597 _('unknown delta base'))
1593
1598
1594 baserev = self.rev(deltabase)
1599 baserev = self.rev(deltabase)
1595
1600
1596 if baserev != nullrev and self.iscensored(baserev):
1601 if baserev != nullrev and self.iscensored(baserev):
1597 # if base is censored, delta must be full replacement in a
1602 # if base is censored, delta must be full replacement in a
1598 # single patch operation
1603 # single patch operation
1599 hlen = struct.calcsize(">lll")
1604 hlen = struct.calcsize(">lll")
1600 oldlen = self.rawsize(baserev)
1605 oldlen = self.rawsize(baserev)
1601 newlen = len(delta) - hlen
1606 newlen = len(delta) - hlen
1602 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1607 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1603 raise error.CensoredBaseError(self.indexfile,
1608 raise error.CensoredBaseError(self.indexfile,
1604 self.node(baserev))
1609 self.node(baserev))
1605
1610
1606 if not flags and self._peek_iscensored(baserev, delta, flush):
1611 if not flags and self._peek_iscensored(baserev, delta, flush):
1607 flags |= REVIDX_ISCENSORED
1612 flags |= REVIDX_ISCENSORED
1608
1613
1609 # We assume consumers of addrevisioncb will want to retrieve
1614 # We assume consumers of addrevisioncb will want to retrieve
1610 # the added revision, which will require a call to
1615 # the added revision, which will require a call to
1611 # revision(). revision() will fast path if there is a cache
1616 # revision(). revision() will fast path if there is a cache
1612 # hit. So, we tell _addrevision() to always cache in this case.
1617 # hit. So, we tell _addrevision() to always cache in this case.
1613 chain = self._addrevision(node, None, transaction, link,
1618 chain = self._addrevision(node, None, transaction, link,
1614 p1, p2, flags, (baserev, delta),
1619 p1, p2, flags, (baserev, delta),
1615 ifh, dfh,
1620 ifh, dfh,
1616 alwayscache=bool(addrevisioncb))
1621 alwayscache=bool(addrevisioncb))
1617
1622
1618 if addrevisioncb:
1623 if addrevisioncb:
1619 addrevisioncb(self, chain)
1624 addrevisioncb(self, chain)
1620
1625
1621 if not dfh and not self._inline:
1626 if not dfh and not self._inline:
1622 # addrevision switched from inline to conventional
1627 # addrevision switched from inline to conventional
1623 # reopen the index
1628 # reopen the index
1624 ifh.close()
1629 ifh.close()
1625 dfh = self.opener(self.datafile, "a+")
1630 dfh = self.opener(self.datafile, "a+")
1626 ifh = self.opener(self.indexfile, "a+")
1631 ifh = self.opener(self.indexfile, "a+")
1627 finally:
1632 finally:
1628 if dfh:
1633 if dfh:
1629 dfh.close()
1634 dfh.close()
1630 ifh.close()
1635 ifh.close()
1631
1636
1632 return content
1637 return content
1633
1638
1634 def iscensored(self, rev):
1639 def iscensored(self, rev):
1635 """Check if a file revision is censored."""
1640 """Check if a file revision is censored."""
1636 return False
1641 return False
1637
1642
1638 def _peek_iscensored(self, baserev, delta, flush):
1643 def _peek_iscensored(self, baserev, delta, flush):
1639 """Quickly check if a delta produces a censored revision."""
1644 """Quickly check if a delta produces a censored revision."""
1640 return False
1645 return False
1641
1646
1642 def getstrippoint(self, minlink):
1647 def getstrippoint(self, minlink):
1643 """find the minimum rev that must be stripped to strip the linkrev
1648 """find the minimum rev that must be stripped to strip the linkrev
1644
1649
1645 Returns a tuple containing the minimum rev and a set of all revs that
1650 Returns a tuple containing the minimum rev and a set of all revs that
1646 have linkrevs that will be broken by this strip.
1651 have linkrevs that will be broken by this strip.
1647 """
1652 """
1648 brokenrevs = set()
1653 brokenrevs = set()
1649 strippoint = len(self)
1654 strippoint = len(self)
1650
1655
1651 heads = {}
1656 heads = {}
1652 futurelargelinkrevs = set()
1657 futurelargelinkrevs = set()
1653 for head in self.headrevs():
1658 for head in self.headrevs():
1654 headlinkrev = self.linkrev(head)
1659 headlinkrev = self.linkrev(head)
1655 heads[head] = headlinkrev
1660 heads[head] = headlinkrev
1656 if headlinkrev >= minlink:
1661 if headlinkrev >= minlink:
1657 futurelargelinkrevs.add(headlinkrev)
1662 futurelargelinkrevs.add(headlinkrev)
1658
1663
1659 # This algorithm involves walking down the rev graph, starting at the
1664 # This algorithm involves walking down the rev graph, starting at the
1660 # heads. Since the revs are topologically sorted according to linkrev,
1665 # heads. Since the revs are topologically sorted according to linkrev,
1661 # once all head linkrevs are below the minlink, we know there are
1666 # once all head linkrevs are below the minlink, we know there are
1662 # no more revs that could have a linkrev greater than minlink.
1667 # no more revs that could have a linkrev greater than minlink.
1663 # So we can stop walking.
1668 # So we can stop walking.
1664 while futurelargelinkrevs:
1669 while futurelargelinkrevs:
1665 strippoint -= 1
1670 strippoint -= 1
1666 linkrev = heads.pop(strippoint)
1671 linkrev = heads.pop(strippoint)
1667
1672
1668 if linkrev < minlink:
1673 if linkrev < minlink:
1669 brokenrevs.add(strippoint)
1674 brokenrevs.add(strippoint)
1670 else:
1675 else:
1671 futurelargelinkrevs.remove(linkrev)
1676 futurelargelinkrevs.remove(linkrev)
1672
1677
1673 for p in self.parentrevs(strippoint):
1678 for p in self.parentrevs(strippoint):
1674 if p != nullrev:
1679 if p != nullrev:
1675 plinkrev = self.linkrev(p)
1680 plinkrev = self.linkrev(p)
1676 heads[p] = plinkrev
1681 heads[p] = plinkrev
1677 if plinkrev >= minlink:
1682 if plinkrev >= minlink:
1678 futurelargelinkrevs.add(plinkrev)
1683 futurelargelinkrevs.add(plinkrev)
1679
1684
1680 return strippoint, brokenrevs
1685 return strippoint, brokenrevs
1681
1686
1682 def strip(self, minlink, transaction):
1687 def strip(self, minlink, transaction):
1683 """truncate the revlog on the first revision with a linkrev >= minlink
1688 """truncate the revlog on the first revision with a linkrev >= minlink
1684
1689
1685 This function is called when we're stripping revision minlink and
1690 This function is called when we're stripping revision minlink and
1686 its descendants from the repository.
1691 its descendants from the repository.
1687
1692
1688 We have to remove all revisions with linkrev >= minlink, because
1693 We have to remove all revisions with linkrev >= minlink, because
1689 the equivalent changelog revisions will be renumbered after the
1694 the equivalent changelog revisions will be renumbered after the
1690 strip.
1695 strip.
1691
1696
1692 So we truncate the revlog on the first of these revisions, and
1697 So we truncate the revlog on the first of these revisions, and
1693 trust that the caller has saved the revisions that shouldn't be
1698 trust that the caller has saved the revisions that shouldn't be
1694 removed and that it'll re-add them after this truncation.
1699 removed and that it'll re-add them after this truncation.
1695 """
1700 """
1696 if len(self) == 0:
1701 if len(self) == 0:
1697 return
1702 return
1698
1703
1699 rev, _ = self.getstrippoint(minlink)
1704 rev, _ = self.getstrippoint(minlink)
1700 if rev == len(self):
1705 if rev == len(self):
1701 return
1706 return
1702
1707
1703 # first truncate the files on disk
1708 # first truncate the files on disk
1704 end = self.start(rev)
1709 end = self.start(rev)
1705 if not self._inline:
1710 if not self._inline:
1706 transaction.add(self.datafile, end)
1711 transaction.add(self.datafile, end)
1707 end = rev * self._io.size
1712 end = rev * self._io.size
1708 else:
1713 else:
1709 end += rev * self._io.size
1714 end += rev * self._io.size
1710
1715
1711 transaction.add(self.indexfile, end)
1716 transaction.add(self.indexfile, end)
1712
1717
1713 # then reset internal state in memory to forget those revisions
1718 # then reset internal state in memory to forget those revisions
1714 self._cache = None
1719 self._cache = None
1715 self._chaininfocache = {}
1720 self._chaininfocache = {}
1716 self._chunkclear()
1721 self._chunkclear()
1717 for x in xrange(rev, len(self)):
1722 for x in xrange(rev, len(self)):
1718 del self.nodemap[self.node(x)]
1723 del self.nodemap[self.node(x)]
1719
1724
1720 del self.index[rev:-1]
1725 del self.index[rev:-1]
1721
1726
1722 def checksize(self):
1727 def checksize(self):
1723 expected = 0
1728 expected = 0
1724 if len(self):
1729 if len(self):
1725 expected = max(0, self.end(len(self) - 1))
1730 expected = max(0, self.end(len(self) - 1))
1726
1731
1727 try:
1732 try:
1728 f = self.opener(self.datafile)
1733 f = self.opener(self.datafile)
1729 f.seek(0, 2)
1734 f.seek(0, 2)
1730 actual = f.tell()
1735 actual = f.tell()
1731 f.close()
1736 f.close()
1732 dd = actual - expected
1737 dd = actual - expected
1733 except IOError as inst:
1738 except IOError as inst:
1734 if inst.errno != errno.ENOENT:
1739 if inst.errno != errno.ENOENT:
1735 raise
1740 raise
1736 dd = 0
1741 dd = 0
1737
1742
1738 try:
1743 try:
1739 f = self.opener(self.indexfile)
1744 f = self.opener(self.indexfile)
1740 f.seek(0, 2)
1745 f.seek(0, 2)
1741 actual = f.tell()
1746 actual = f.tell()
1742 f.close()
1747 f.close()
1743 s = self._io.size
1748 s = self._io.size
1744 i = max(0, actual // s)
1749 i = max(0, actual // s)
1745 di = actual - (i * s)
1750 di = actual - (i * s)
1746 if self._inline:
1751 if self._inline:
1747 databytes = 0
1752 databytes = 0
1748 for r in self:
1753 for r in self:
1749 databytes += max(0, self.length(r))
1754 databytes += max(0, self.length(r))
1750 dd = 0
1755 dd = 0
1751 di = actual - len(self) * s - databytes
1756 di = actual - len(self) * s - databytes
1752 except IOError as inst:
1757 except IOError as inst:
1753 if inst.errno != errno.ENOENT:
1758 if inst.errno != errno.ENOENT:
1754 raise
1759 raise
1755 di = 0
1760 di = 0
1756
1761
1757 return (dd, di)
1762 return (dd, di)
1758
1763
1759 def files(self):
1764 def files(self):
1760 res = [self.indexfile]
1765 res = [self.indexfile]
1761 if not self._inline:
1766 if not self._inline:
1762 res.append(self.datafile)
1767 res.append(self.datafile)
1763 return res
1768 return res
General Comments 0
You need to be logged in to leave comments. Login now