##// END OF EJS Templates
revlog: reorder index accessors to match data structure order...
Gregory Szorc -
r30287:0986f225 default
parent child Browse files
Show More
@@ -1,1796 +1,1811
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = hashlib.sha1(nullid)
77 _nullhash = hashlib.sha1(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = hashlib.sha1(l[0])
95 s = hashlib.sha1(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215
215
216 If checkambig, indexfile is opened with checkambig=True at
216 If checkambig, indexfile is opened with checkambig=True at
217 writing, to avoid file stat ambiguity.
217 writing, to avoid file stat ambiguity.
218 """
218 """
219 def __init__(self, opener, indexfile, checkambig=False):
219 def __init__(self, opener, indexfile, checkambig=False):
220 """
220 """
221 create a revlog object
221 create a revlog object
222
222
223 opener is a function that abstracts the file opening operation
223 opener is a function that abstracts the file opening operation
224 and can be used to implement COW semantics or the like.
224 and can be used to implement COW semantics or the like.
225 """
225 """
226 self.indexfile = indexfile
226 self.indexfile = indexfile
227 self.datafile = indexfile[:-2] + ".d"
227 self.datafile = indexfile[:-2] + ".d"
228 self.opener = opener
228 self.opener = opener
229 # When True, indexfile is opened with checkambig=True at writing, to
229 # When True, indexfile is opened with checkambig=True at writing, to
230 # avoid file stat ambiguity.
230 # avoid file stat ambiguity.
231 self._checkambig = checkambig
231 self._checkambig = checkambig
232 # 3-tuple of (node, rev, text) for a raw revision.
232 # 3-tuple of (node, rev, text) for a raw revision.
233 self._cache = None
233 self._cache = None
234 # Maps rev to chain base rev.
234 # Maps rev to chain base rev.
235 self._chainbasecache = util.lrucachedict(100)
235 self._chainbasecache = util.lrucachedict(100)
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 self._chunkcache = (0, '')
237 self._chunkcache = (0, '')
238 # How much data to read and cache into the raw revlog data cache.
238 # How much data to read and cache into the raw revlog data cache.
239 self._chunkcachesize = 65536
239 self._chunkcachesize = 65536
240 self._maxchainlen = None
240 self._maxchainlen = None
241 self._aggressivemergedeltas = False
241 self._aggressivemergedeltas = False
242 self.index = []
242 self.index = []
243 # Mapping of partial identifiers to full nodes.
243 # Mapping of partial identifiers to full nodes.
244 self._pcache = {}
244 self._pcache = {}
245 # Mapping of revision integer to full node.
245 # Mapping of revision integer to full node.
246 self._nodecache = {nullid: nullrev}
246 self._nodecache = {nullid: nullrev}
247 self._nodepos = None
247 self._nodepos = None
248
248
249 v = REVLOG_DEFAULT_VERSION
249 v = REVLOG_DEFAULT_VERSION
250 opts = getattr(opener, 'options', None)
250 opts = getattr(opener, 'options', None)
251 if opts is not None:
251 if opts is not None:
252 if 'revlogv1' in opts:
252 if 'revlogv1' in opts:
253 if 'generaldelta' in opts:
253 if 'generaldelta' in opts:
254 v |= REVLOGGENERALDELTA
254 v |= REVLOGGENERALDELTA
255 else:
255 else:
256 v = 0
256 v = 0
257 if 'chunkcachesize' in opts:
257 if 'chunkcachesize' in opts:
258 self._chunkcachesize = opts['chunkcachesize']
258 self._chunkcachesize = opts['chunkcachesize']
259 if 'maxchainlen' in opts:
259 if 'maxchainlen' in opts:
260 self._maxchainlen = opts['maxchainlen']
260 self._maxchainlen = opts['maxchainlen']
261 if 'aggressivemergedeltas' in opts:
261 if 'aggressivemergedeltas' in opts:
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264
264
265 if self._chunkcachesize <= 0:
265 if self._chunkcachesize <= 0:
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 'than 0') % self._chunkcachesize)
267 'than 0') % self._chunkcachesize)
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 'of 2') % self._chunkcachesize)
270 'of 2') % self._chunkcachesize)
271
271
272 indexdata = ''
272 indexdata = ''
273 self._initempty = True
273 self._initempty = True
274 try:
274 try:
275 f = self.opener(self.indexfile)
275 f = self.opener(self.indexfile)
276 indexdata = f.read()
276 indexdata = f.read()
277 f.close()
277 f.close()
278 if len(indexdata) > 0:
278 if len(indexdata) > 0:
279 v = struct.unpack(versionformat, indexdata[:4])[0]
279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 self._initempty = False
280 self._initempty = False
281 except IOError as inst:
281 except IOError as inst:
282 if inst.errno != errno.ENOENT:
282 if inst.errno != errno.ENOENT:
283 raise
283 raise
284
284
285 self.version = v
285 self.version = v
286 self._inline = v & REVLOGNGINLINEDATA
286 self._inline = v & REVLOGNGINLINEDATA
287 self._generaldelta = v & REVLOGGENERALDELTA
287 self._generaldelta = v & REVLOGGENERALDELTA
288 flags = v & ~0xFFFF
288 flags = v & ~0xFFFF
289 fmt = v & 0xFFFF
289 fmt = v & 0xFFFF
290 if fmt == REVLOGV0 and flags:
290 if fmt == REVLOGV0 and flags:
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 % (self.indexfile, flags >> 16))
292 % (self.indexfile, flags >> 16))
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 % (self.indexfile, flags >> 16))
295 % (self.indexfile, flags >> 16))
296 elif fmt > REVLOGNG:
296 elif fmt > REVLOGNG:
297 raise RevlogError(_("index %s unknown format %d")
297 raise RevlogError(_("index %s unknown format %d")
298 % (self.indexfile, fmt))
298 % (self.indexfile, fmt))
299
299
300 self.storedeltachains = True
300 self.storedeltachains = True
301
301
302 self._io = revlogio()
302 self._io = revlogio()
303 if self.version == REVLOGV0:
303 if self.version == REVLOGV0:
304 self._io = revlogoldio()
304 self._io = revlogoldio()
305 try:
305 try:
306 d = self._io.parseindex(indexdata, self._inline)
306 d = self._io.parseindex(indexdata, self._inline)
307 except (ValueError, IndexError):
307 except (ValueError, IndexError):
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 self.index, nodemap, self._chunkcache = d
309 self.index, nodemap, self._chunkcache = d
310 if nodemap is not None:
310 if nodemap is not None:
311 self.nodemap = self._nodecache = nodemap
311 self.nodemap = self._nodecache = nodemap
312 if not self._chunkcache:
312 if not self._chunkcache:
313 self._chunkclear()
313 self._chunkclear()
314 # revnum -> (chain-length, sum-delta-length)
314 # revnum -> (chain-length, sum-delta-length)
315 self._chaininfocache = {}
315 self._chaininfocache = {}
316
316
317 def tip(self):
317 def tip(self):
318 return self.node(len(self.index) - 2)
318 return self.node(len(self.index) - 2)
319 def __contains__(self, rev):
319 def __contains__(self, rev):
320 return 0 <= rev < len(self)
320 return 0 <= rev < len(self)
321 def __len__(self):
321 def __len__(self):
322 return len(self.index) - 1
322 return len(self.index) - 1
323 def __iter__(self):
323 def __iter__(self):
324 return iter(xrange(len(self)))
324 return iter(xrange(len(self)))
325 def revs(self, start=0, stop=None):
325 def revs(self, start=0, stop=None):
326 """iterate over all rev in this revlog (from start to stop)"""
326 """iterate over all rev in this revlog (from start to stop)"""
327 step = 1
327 step = 1
328 if stop is not None:
328 if stop is not None:
329 if start > stop:
329 if start > stop:
330 step = -1
330 step = -1
331 stop += step
331 stop += step
332 else:
332 else:
333 stop = len(self)
333 stop = len(self)
334 return xrange(start, stop, step)
334 return xrange(start, stop, step)
335
335
336 @util.propertycache
336 @util.propertycache
337 def nodemap(self):
337 def nodemap(self):
338 self.rev(self.node(0))
338 self.rev(self.node(0))
339 return self._nodecache
339 return self._nodecache
340
340
341 def hasnode(self, node):
341 def hasnode(self, node):
342 try:
342 try:
343 self.rev(node)
343 self.rev(node)
344 return True
344 return True
345 except KeyError:
345 except KeyError:
346 return False
346 return False
347
347
348 def clearcaches(self):
348 def clearcaches(self):
349 self._cache = None
349 self._cache = None
350 self._chainbasecache.clear()
350 self._chainbasecache.clear()
351 self._chunkcache = (0, '')
351 self._chunkcache = (0, '')
352 self._pcache = {}
352 self._pcache = {}
353
353
354 try:
354 try:
355 self._nodecache.clearcaches()
355 self._nodecache.clearcaches()
356 except AttributeError:
356 except AttributeError:
357 self._nodecache = {nullid: nullrev}
357 self._nodecache = {nullid: nullrev}
358 self._nodepos = None
358 self._nodepos = None
359
359
360 def rev(self, node):
360 def rev(self, node):
361 try:
361 try:
362 return self._nodecache[node]
362 return self._nodecache[node]
363 except TypeError:
363 except TypeError:
364 raise
364 raise
365 except RevlogError:
365 except RevlogError:
366 # parsers.c radix tree lookup failed
366 # parsers.c radix tree lookup failed
367 raise LookupError(node, self.indexfile, _('no node'))
367 raise LookupError(node, self.indexfile, _('no node'))
368 except KeyError:
368 except KeyError:
369 # pure python cache lookup failed
369 # pure python cache lookup failed
370 n = self._nodecache
370 n = self._nodecache
371 i = self.index
371 i = self.index
372 p = self._nodepos
372 p = self._nodepos
373 if p is None:
373 if p is None:
374 p = len(i) - 2
374 p = len(i) - 2
375 for r in xrange(p, -1, -1):
375 for r in xrange(p, -1, -1):
376 v = i[r][7]
376 v = i[r][7]
377 n[v] = r
377 n[v] = r
378 if v == node:
378 if v == node:
379 self._nodepos = r - 1
379 self._nodepos = r - 1
380 return r
380 return r
381 raise LookupError(node, self.indexfile, _('no node'))
381 raise LookupError(node, self.indexfile, _('no node'))
382
382
383 def node(self, rev):
383 # Accessors for index entries.
384 return self.index[rev][7]
384
385 def linkrev(self, rev):
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 return self.index[rev][4]
386 # are flags.
387 def parents(self, node):
388 i = self.index
389 d = i[self.rev(node)]
390 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
391 def parentrevs(self, rev):
392 return self.index[rev][5:7]
393 def start(self, rev):
387 def start(self, rev):
394 return int(self.index[rev][0] >> 16)
388 return int(self.index[rev][0] >> 16)
395 def end(self, rev):
389
396 return self.start(rev) + self.length(rev)
390 def flags(self, rev):
391 return self.index[rev][0] & 0xFFFF
392
397 def length(self, rev):
393 def length(self, rev):
398 return self.index[rev][1]
394 return self.index[rev][1]
395
396 def rawsize(self, rev):
397 """return the length of the uncompressed text for a given revision"""
398 l = self.index[rev][2]
399 if l >= 0:
400 return l
401
402 t = self.revision(self.node(rev))
403 return len(t)
404 size = rawsize
405
399 def chainbase(self, rev):
406 def chainbase(self, rev):
400 base = self._chainbasecache.get(rev)
407 base = self._chainbasecache.get(rev)
401 if base is not None:
408 if base is not None:
402 return base
409 return base
403
410
404 index = self.index
411 index = self.index
405 base = index[rev][3]
412 base = index[rev][3]
406 while base != rev:
413 while base != rev:
407 rev = base
414 rev = base
408 base = index[rev][3]
415 base = index[rev][3]
409
416
410 self._chainbasecache[rev] = base
417 self._chainbasecache[rev] = base
411 return base
418 return base
419
420 def linkrev(self, rev):
421 return self.index[rev][4]
422
423 def parentrevs(self, rev):
424 return self.index[rev][5:7]
425
426 def node(self, rev):
427 return self.index[rev][7]
428
429 # Derived from index values.
430
431 def end(self, rev):
432 return self.start(rev) + self.length(rev)
433
434 def parents(self, node):
435 i = self.index
436 d = i[self.rev(node)]
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438
412 def chainlen(self, rev):
439 def chainlen(self, rev):
413 return self._chaininfo(rev)[0]
440 return self._chaininfo(rev)[0]
414
441
415 def _chaininfo(self, rev):
442 def _chaininfo(self, rev):
416 chaininfocache = self._chaininfocache
443 chaininfocache = self._chaininfocache
417 if rev in chaininfocache:
444 if rev in chaininfocache:
418 return chaininfocache[rev]
445 return chaininfocache[rev]
419 index = self.index
446 index = self.index
420 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
421 iterrev = rev
448 iterrev = rev
422 e = index[iterrev]
449 e = index[iterrev]
423 clen = 0
450 clen = 0
424 compresseddeltalen = 0
451 compresseddeltalen = 0
425 while iterrev != e[3]:
452 while iterrev != e[3]:
426 clen += 1
453 clen += 1
427 compresseddeltalen += e[1]
454 compresseddeltalen += e[1]
428 if generaldelta:
455 if generaldelta:
429 iterrev = e[3]
456 iterrev = e[3]
430 else:
457 else:
431 iterrev -= 1
458 iterrev -= 1
432 if iterrev in chaininfocache:
459 if iterrev in chaininfocache:
433 t = chaininfocache[iterrev]
460 t = chaininfocache[iterrev]
434 clen += t[0]
461 clen += t[0]
435 compresseddeltalen += t[1]
462 compresseddeltalen += t[1]
436 break
463 break
437 e = index[iterrev]
464 e = index[iterrev]
438 else:
465 else:
439 # Add text length of base since decompressing that also takes
466 # Add text length of base since decompressing that also takes
440 # work. For cache hits the length is already included.
467 # work. For cache hits the length is already included.
441 compresseddeltalen += e[1]
468 compresseddeltalen += e[1]
442 r = (clen, compresseddeltalen)
469 r = (clen, compresseddeltalen)
443 chaininfocache[rev] = r
470 chaininfocache[rev] = r
444 return r
471 return r
445
472
446 def _deltachain(self, rev, stoprev=None):
473 def _deltachain(self, rev, stoprev=None):
447 """Obtain the delta chain for a revision.
474 """Obtain the delta chain for a revision.
448
475
449 ``stoprev`` specifies a revision to stop at. If not specified, we
476 ``stoprev`` specifies a revision to stop at. If not specified, we
450 stop at the base of the chain.
477 stop at the base of the chain.
451
478
452 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
453 revs in ascending order and ``stopped`` is a bool indicating whether
480 revs in ascending order and ``stopped`` is a bool indicating whether
454 ``stoprev`` was hit.
481 ``stoprev`` was hit.
455 """
482 """
456 chain = []
483 chain = []
457
484
458 # Alias to prevent attribute lookup in tight loop.
485 # Alias to prevent attribute lookup in tight loop.
459 index = self.index
486 index = self.index
460 generaldelta = self._generaldelta
487 generaldelta = self._generaldelta
461
488
462 iterrev = rev
489 iterrev = rev
463 e = index[iterrev]
490 e = index[iterrev]
464 while iterrev != e[3] and iterrev != stoprev:
491 while iterrev != e[3] and iterrev != stoprev:
465 chain.append(iterrev)
492 chain.append(iterrev)
466 if generaldelta:
493 if generaldelta:
467 iterrev = e[3]
494 iterrev = e[3]
468 else:
495 else:
469 iterrev -= 1
496 iterrev -= 1
470 e = index[iterrev]
497 e = index[iterrev]
471
498
472 if iterrev == stoprev:
499 if iterrev == stoprev:
473 stopped = True
500 stopped = True
474 else:
501 else:
475 chain.append(iterrev)
502 chain.append(iterrev)
476 stopped = False
503 stopped = False
477
504
478 chain.reverse()
505 chain.reverse()
479 return chain, stopped
506 return chain, stopped
480
507
481 def flags(self, rev):
482 return self.index[rev][0] & 0xFFFF
483 def rawsize(self, rev):
484 """return the length of the uncompressed text for a given revision"""
485 l = self.index[rev][2]
486 if l >= 0:
487 return l
488
489 t = self.revision(self.node(rev))
490 return len(t)
491 size = rawsize
492
493 def ancestors(self, revs, stoprev=0, inclusive=False):
508 def ancestors(self, revs, stoprev=0, inclusive=False):
494 """Generate the ancestors of 'revs' in reverse topological order.
509 """Generate the ancestors of 'revs' in reverse topological order.
495 Does not generate revs lower than stoprev.
510 Does not generate revs lower than stoprev.
496
511
497 See the documentation for ancestor.lazyancestors for more details."""
512 See the documentation for ancestor.lazyancestors for more details."""
498
513
499 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
500 inclusive=inclusive)
515 inclusive=inclusive)
501
516
502 def descendants(self, revs):
517 def descendants(self, revs):
503 """Generate the descendants of 'revs' in revision order.
518 """Generate the descendants of 'revs' in revision order.
504
519
505 Yield a sequence of revision numbers starting with a child of
520 Yield a sequence of revision numbers starting with a child of
506 some rev in revs, i.e., each revision is *not* considered a
521 some rev in revs, i.e., each revision is *not* considered a
507 descendant of itself. Results are ordered by revision number (a
522 descendant of itself. Results are ordered by revision number (a
508 topological sort)."""
523 topological sort)."""
509 first = min(revs)
524 first = min(revs)
510 if first == nullrev:
525 if first == nullrev:
511 for i in self:
526 for i in self:
512 yield i
527 yield i
513 return
528 return
514
529
515 seen = set(revs)
530 seen = set(revs)
516 for i in self.revs(start=first + 1):
531 for i in self.revs(start=first + 1):
517 for x in self.parentrevs(i):
532 for x in self.parentrevs(i):
518 if x != nullrev and x in seen:
533 if x != nullrev and x in seen:
519 seen.add(i)
534 seen.add(i)
520 yield i
535 yield i
521 break
536 break
522
537
523 def findcommonmissing(self, common=None, heads=None):
538 def findcommonmissing(self, common=None, heads=None):
524 """Return a tuple of the ancestors of common and the ancestors of heads
539 """Return a tuple of the ancestors of common and the ancestors of heads
525 that are not ancestors of common. In revset terminology, we return the
540 that are not ancestors of common. In revset terminology, we return the
526 tuple:
541 tuple:
527
542
528 ::common, (::heads) - (::common)
543 ::common, (::heads) - (::common)
529
544
530 The list is sorted by revision number, meaning it is
545 The list is sorted by revision number, meaning it is
531 topologically sorted.
546 topologically sorted.
532
547
533 'heads' and 'common' are both lists of node IDs. If heads is
548 'heads' and 'common' are both lists of node IDs. If heads is
534 not supplied, uses all of the revlog's heads. If common is not
549 not supplied, uses all of the revlog's heads. If common is not
535 supplied, uses nullid."""
550 supplied, uses nullid."""
536 if common is None:
551 if common is None:
537 common = [nullid]
552 common = [nullid]
538 if heads is None:
553 if heads is None:
539 heads = self.heads()
554 heads = self.heads()
540
555
541 common = [self.rev(n) for n in common]
556 common = [self.rev(n) for n in common]
542 heads = [self.rev(n) for n in heads]
557 heads = [self.rev(n) for n in heads]
543
558
544 # we want the ancestors, but inclusive
559 # we want the ancestors, but inclusive
545 class lazyset(object):
560 class lazyset(object):
546 def __init__(self, lazyvalues):
561 def __init__(self, lazyvalues):
547 self.addedvalues = set()
562 self.addedvalues = set()
548 self.lazyvalues = lazyvalues
563 self.lazyvalues = lazyvalues
549
564
550 def __contains__(self, value):
565 def __contains__(self, value):
551 return value in self.addedvalues or value in self.lazyvalues
566 return value in self.addedvalues or value in self.lazyvalues
552
567
553 def __iter__(self):
568 def __iter__(self):
554 added = self.addedvalues
569 added = self.addedvalues
555 for r in added:
570 for r in added:
556 yield r
571 yield r
557 for r in self.lazyvalues:
572 for r in self.lazyvalues:
558 if not r in added:
573 if not r in added:
559 yield r
574 yield r
560
575
561 def add(self, value):
576 def add(self, value):
562 self.addedvalues.add(value)
577 self.addedvalues.add(value)
563
578
564 def update(self, values):
579 def update(self, values):
565 self.addedvalues.update(values)
580 self.addedvalues.update(values)
566
581
567 has = lazyset(self.ancestors(common))
582 has = lazyset(self.ancestors(common))
568 has.add(nullrev)
583 has.add(nullrev)
569 has.update(common)
584 has.update(common)
570
585
571 # take all ancestors from heads that aren't in has
586 # take all ancestors from heads that aren't in has
572 missing = set()
587 missing = set()
573 visit = collections.deque(r for r in heads if r not in has)
588 visit = collections.deque(r for r in heads if r not in has)
574 while visit:
589 while visit:
575 r = visit.popleft()
590 r = visit.popleft()
576 if r in missing:
591 if r in missing:
577 continue
592 continue
578 else:
593 else:
579 missing.add(r)
594 missing.add(r)
580 for p in self.parentrevs(r):
595 for p in self.parentrevs(r):
581 if p not in has:
596 if p not in has:
582 visit.append(p)
597 visit.append(p)
583 missing = list(missing)
598 missing = list(missing)
584 missing.sort()
599 missing.sort()
585 return has, [self.node(r) for r in missing]
600 return has, [self.node(r) for r in missing]
586
601
587 def incrementalmissingrevs(self, common=None):
602 def incrementalmissingrevs(self, common=None):
588 """Return an object that can be used to incrementally compute the
603 """Return an object that can be used to incrementally compute the
589 revision numbers of the ancestors of arbitrary sets that are not
604 revision numbers of the ancestors of arbitrary sets that are not
590 ancestors of common. This is an ancestor.incrementalmissingancestors
605 ancestors of common. This is an ancestor.incrementalmissingancestors
591 object.
606 object.
592
607
593 'common' is a list of revision numbers. If common is not supplied, uses
608 'common' is a list of revision numbers. If common is not supplied, uses
594 nullrev.
609 nullrev.
595 """
610 """
596 if common is None:
611 if common is None:
597 common = [nullrev]
612 common = [nullrev]
598
613
599 return ancestor.incrementalmissingancestors(self.parentrevs, common)
614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
600
615
601 def findmissingrevs(self, common=None, heads=None):
616 def findmissingrevs(self, common=None, heads=None):
602 """Return the revision numbers of the ancestors of heads that
617 """Return the revision numbers of the ancestors of heads that
603 are not ancestors of common.
618 are not ancestors of common.
604
619
605 More specifically, return a list of revision numbers corresponding to
620 More specifically, return a list of revision numbers corresponding to
606 nodes N such that every N satisfies the following constraints:
621 nodes N such that every N satisfies the following constraints:
607
622
608 1. N is an ancestor of some node in 'heads'
623 1. N is an ancestor of some node in 'heads'
609 2. N is not an ancestor of any node in 'common'
624 2. N is not an ancestor of any node in 'common'
610
625
611 The list is sorted by revision number, meaning it is
626 The list is sorted by revision number, meaning it is
612 topologically sorted.
627 topologically sorted.
613
628
614 'heads' and 'common' are both lists of revision numbers. If heads is
629 'heads' and 'common' are both lists of revision numbers. If heads is
615 not supplied, uses all of the revlog's heads. If common is not
630 not supplied, uses all of the revlog's heads. If common is not
616 supplied, uses nullid."""
631 supplied, uses nullid."""
617 if common is None:
632 if common is None:
618 common = [nullrev]
633 common = [nullrev]
619 if heads is None:
634 if heads is None:
620 heads = self.headrevs()
635 heads = self.headrevs()
621
636
622 inc = self.incrementalmissingrevs(common=common)
637 inc = self.incrementalmissingrevs(common=common)
623 return inc.missingancestors(heads)
638 return inc.missingancestors(heads)
624
639
625 def findmissing(self, common=None, heads=None):
640 def findmissing(self, common=None, heads=None):
626 """Return the ancestors of heads that are not ancestors of common.
641 """Return the ancestors of heads that are not ancestors of common.
627
642
628 More specifically, return a list of nodes N such that every N
643 More specifically, return a list of nodes N such that every N
629 satisfies the following constraints:
644 satisfies the following constraints:
630
645
631 1. N is an ancestor of some node in 'heads'
646 1. N is an ancestor of some node in 'heads'
632 2. N is not an ancestor of any node in 'common'
647 2. N is not an ancestor of any node in 'common'
633
648
634 The list is sorted by revision number, meaning it is
649 The list is sorted by revision number, meaning it is
635 topologically sorted.
650 topologically sorted.
636
651
637 'heads' and 'common' are both lists of node IDs. If heads is
652 'heads' and 'common' are both lists of node IDs. If heads is
638 not supplied, uses all of the revlog's heads. If common is not
653 not supplied, uses all of the revlog's heads. If common is not
639 supplied, uses nullid."""
654 supplied, uses nullid."""
640 if common is None:
655 if common is None:
641 common = [nullid]
656 common = [nullid]
642 if heads is None:
657 if heads is None:
643 heads = self.heads()
658 heads = self.heads()
644
659
645 common = [self.rev(n) for n in common]
660 common = [self.rev(n) for n in common]
646 heads = [self.rev(n) for n in heads]
661 heads = [self.rev(n) for n in heads]
647
662
648 inc = self.incrementalmissingrevs(common=common)
663 inc = self.incrementalmissingrevs(common=common)
649 return [self.node(r) for r in inc.missingancestors(heads)]
664 return [self.node(r) for r in inc.missingancestors(heads)]
650
665
651 def nodesbetween(self, roots=None, heads=None):
666 def nodesbetween(self, roots=None, heads=None):
652 """Return a topological path from 'roots' to 'heads'.
667 """Return a topological path from 'roots' to 'heads'.
653
668
654 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
655 topologically sorted list of all nodes N that satisfy both of
670 topologically sorted list of all nodes N that satisfy both of
656 these constraints:
671 these constraints:
657
672
658 1. N is a descendant of some node in 'roots'
673 1. N is a descendant of some node in 'roots'
659 2. N is an ancestor of some node in 'heads'
674 2. N is an ancestor of some node in 'heads'
660
675
661 Every node is considered to be both a descendant and an ancestor
676 Every node is considered to be both a descendant and an ancestor
662 of itself, so every reachable node in 'roots' and 'heads' will be
677 of itself, so every reachable node in 'roots' and 'heads' will be
663 included in 'nodes'.
678 included in 'nodes'.
664
679
665 'outroots' is the list of reachable nodes in 'roots', i.e., the
680 'outroots' is the list of reachable nodes in 'roots', i.e., the
666 subset of 'roots' that is returned in 'nodes'. Likewise,
681 subset of 'roots' that is returned in 'nodes'. Likewise,
667 'outheads' is the subset of 'heads' that is also in 'nodes'.
682 'outheads' is the subset of 'heads' that is also in 'nodes'.
668
683
669 'roots' and 'heads' are both lists of node IDs. If 'roots' is
684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
670 unspecified, uses nullid as the only root. If 'heads' is
685 unspecified, uses nullid as the only root. If 'heads' is
671 unspecified, uses list of all of the revlog's heads."""
686 unspecified, uses list of all of the revlog's heads."""
672 nonodes = ([], [], [])
687 nonodes = ([], [], [])
673 if roots is not None:
688 if roots is not None:
674 roots = list(roots)
689 roots = list(roots)
675 if not roots:
690 if not roots:
676 return nonodes
691 return nonodes
677 lowestrev = min([self.rev(n) for n in roots])
692 lowestrev = min([self.rev(n) for n in roots])
678 else:
693 else:
679 roots = [nullid] # Everybody's a descendant of nullid
694 roots = [nullid] # Everybody's a descendant of nullid
680 lowestrev = nullrev
695 lowestrev = nullrev
681 if (lowestrev == nullrev) and (heads is None):
696 if (lowestrev == nullrev) and (heads is None):
682 # We want _all_ the nodes!
697 # We want _all_ the nodes!
683 return ([self.node(r) for r in self], [nullid], list(self.heads()))
698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
684 if heads is None:
699 if heads is None:
685 # All nodes are ancestors, so the latest ancestor is the last
700 # All nodes are ancestors, so the latest ancestor is the last
686 # node.
701 # node.
687 highestrev = len(self) - 1
702 highestrev = len(self) - 1
688 # Set ancestors to None to signal that every node is an ancestor.
703 # Set ancestors to None to signal that every node is an ancestor.
689 ancestors = None
704 ancestors = None
690 # Set heads to an empty dictionary for later discovery of heads
705 # Set heads to an empty dictionary for later discovery of heads
691 heads = {}
706 heads = {}
692 else:
707 else:
693 heads = list(heads)
708 heads = list(heads)
694 if not heads:
709 if not heads:
695 return nonodes
710 return nonodes
696 ancestors = set()
711 ancestors = set()
697 # Turn heads into a dictionary so we can remove 'fake' heads.
712 # Turn heads into a dictionary so we can remove 'fake' heads.
698 # Also, later we will be using it to filter out the heads we can't
713 # Also, later we will be using it to filter out the heads we can't
699 # find from roots.
714 # find from roots.
700 heads = dict.fromkeys(heads, False)
715 heads = dict.fromkeys(heads, False)
701 # Start at the top and keep marking parents until we're done.
716 # Start at the top and keep marking parents until we're done.
702 nodestotag = set(heads)
717 nodestotag = set(heads)
703 # Remember where the top was so we can use it as a limit later.
718 # Remember where the top was so we can use it as a limit later.
704 highestrev = max([self.rev(n) for n in nodestotag])
719 highestrev = max([self.rev(n) for n in nodestotag])
705 while nodestotag:
720 while nodestotag:
706 # grab a node to tag
721 # grab a node to tag
707 n = nodestotag.pop()
722 n = nodestotag.pop()
708 # Never tag nullid
723 # Never tag nullid
709 if n == nullid:
724 if n == nullid:
710 continue
725 continue
711 # A node's revision number represents its place in a
726 # A node's revision number represents its place in a
712 # topologically sorted list of nodes.
727 # topologically sorted list of nodes.
713 r = self.rev(n)
728 r = self.rev(n)
714 if r >= lowestrev:
729 if r >= lowestrev:
715 if n not in ancestors:
730 if n not in ancestors:
716 # If we are possibly a descendant of one of the roots
731 # If we are possibly a descendant of one of the roots
717 # and we haven't already been marked as an ancestor
732 # and we haven't already been marked as an ancestor
718 ancestors.add(n) # Mark as ancestor
733 ancestors.add(n) # Mark as ancestor
719 # Add non-nullid parents to list of nodes to tag.
734 # Add non-nullid parents to list of nodes to tag.
720 nodestotag.update([p for p in self.parents(n) if
735 nodestotag.update([p for p in self.parents(n) if
721 p != nullid])
736 p != nullid])
722 elif n in heads: # We've seen it before, is it a fake head?
737 elif n in heads: # We've seen it before, is it a fake head?
723 # So it is, real heads should not be the ancestors of
738 # So it is, real heads should not be the ancestors of
724 # any other heads.
739 # any other heads.
725 heads.pop(n)
740 heads.pop(n)
726 if not ancestors:
741 if not ancestors:
727 return nonodes
742 return nonodes
728 # Now that we have our set of ancestors, we want to remove any
743 # Now that we have our set of ancestors, we want to remove any
729 # roots that are not ancestors.
744 # roots that are not ancestors.
730
745
731 # If one of the roots was nullid, everything is included anyway.
746 # If one of the roots was nullid, everything is included anyway.
732 if lowestrev > nullrev:
747 if lowestrev > nullrev:
733 # But, since we weren't, let's recompute the lowest rev to not
748 # But, since we weren't, let's recompute the lowest rev to not
734 # include roots that aren't ancestors.
749 # include roots that aren't ancestors.
735
750
736 # Filter out roots that aren't ancestors of heads
751 # Filter out roots that aren't ancestors of heads
737 roots = [n for n in roots if n in ancestors]
752 roots = [n for n in roots if n in ancestors]
738 # Recompute the lowest revision
753 # Recompute the lowest revision
739 if roots:
754 if roots:
740 lowestrev = min([self.rev(n) for n in roots])
755 lowestrev = min([self.rev(n) for n in roots])
741 else:
756 else:
742 # No more roots? Return empty list
757 # No more roots? Return empty list
743 return nonodes
758 return nonodes
744 else:
759 else:
745 # We are descending from nullid, and don't need to care about
760 # We are descending from nullid, and don't need to care about
746 # any other roots.
761 # any other roots.
747 lowestrev = nullrev
762 lowestrev = nullrev
748 roots = [nullid]
763 roots = [nullid]
749 # Transform our roots list into a set.
764 # Transform our roots list into a set.
750 descendants = set(roots)
765 descendants = set(roots)
751 # Also, keep the original roots so we can filter out roots that aren't
766 # Also, keep the original roots so we can filter out roots that aren't
752 # 'real' roots (i.e. are descended from other roots).
767 # 'real' roots (i.e. are descended from other roots).
753 roots = descendants.copy()
768 roots = descendants.copy()
754 # Our topologically sorted list of output nodes.
769 # Our topologically sorted list of output nodes.
755 orderedout = []
770 orderedout = []
756 # Don't start at nullid since we don't want nullid in our output list,
771 # Don't start at nullid since we don't want nullid in our output list,
757 # and if nullid shows up in descendants, empty parents will look like
772 # and if nullid shows up in descendants, empty parents will look like
758 # they're descendants.
773 # they're descendants.
759 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
760 n = self.node(r)
775 n = self.node(r)
761 isdescendant = False
776 isdescendant = False
762 if lowestrev == nullrev: # Everybody is a descendant of nullid
777 if lowestrev == nullrev: # Everybody is a descendant of nullid
763 isdescendant = True
778 isdescendant = True
764 elif n in descendants:
779 elif n in descendants:
765 # n is already a descendant
780 # n is already a descendant
766 isdescendant = True
781 isdescendant = True
767 # This check only needs to be done here because all the roots
782 # This check only needs to be done here because all the roots
768 # will start being marked is descendants before the loop.
783 # will start being marked is descendants before the loop.
769 if n in roots:
784 if n in roots:
770 # If n was a root, check if it's a 'real' root.
785 # If n was a root, check if it's a 'real' root.
771 p = tuple(self.parents(n))
786 p = tuple(self.parents(n))
772 # If any of its parents are descendants, it's not a root.
787 # If any of its parents are descendants, it's not a root.
773 if (p[0] in descendants) or (p[1] in descendants):
788 if (p[0] in descendants) or (p[1] in descendants):
774 roots.remove(n)
789 roots.remove(n)
775 else:
790 else:
776 p = tuple(self.parents(n))
791 p = tuple(self.parents(n))
777 # A node is a descendant if either of its parents are
792 # A node is a descendant if either of its parents are
778 # descendants. (We seeded the dependents list with the roots
793 # descendants. (We seeded the dependents list with the roots
779 # up there, remember?)
794 # up there, remember?)
780 if (p[0] in descendants) or (p[1] in descendants):
795 if (p[0] in descendants) or (p[1] in descendants):
781 descendants.add(n)
796 descendants.add(n)
782 isdescendant = True
797 isdescendant = True
783 if isdescendant and ((ancestors is None) or (n in ancestors)):
798 if isdescendant and ((ancestors is None) or (n in ancestors)):
784 # Only include nodes that are both descendants and ancestors.
799 # Only include nodes that are both descendants and ancestors.
785 orderedout.append(n)
800 orderedout.append(n)
786 if (ancestors is not None) and (n in heads):
801 if (ancestors is not None) and (n in heads):
787 # We're trying to figure out which heads are reachable
802 # We're trying to figure out which heads are reachable
788 # from roots.
803 # from roots.
789 # Mark this head as having been reached
804 # Mark this head as having been reached
790 heads[n] = True
805 heads[n] = True
791 elif ancestors is None:
806 elif ancestors is None:
792 # Otherwise, we're trying to discover the heads.
807 # Otherwise, we're trying to discover the heads.
793 # Assume this is a head because if it isn't, the next step
808 # Assume this is a head because if it isn't, the next step
794 # will eventually remove it.
809 # will eventually remove it.
795 heads[n] = True
810 heads[n] = True
796 # But, obviously its parents aren't.
811 # But, obviously its parents aren't.
797 for p in self.parents(n):
812 for p in self.parents(n):
798 heads.pop(p, None)
813 heads.pop(p, None)
799 heads = [n for n, flag in heads.iteritems() if flag]
814 heads = [n for n, flag in heads.iteritems() if flag]
800 roots = list(roots)
815 roots = list(roots)
801 assert orderedout
816 assert orderedout
802 assert roots
817 assert roots
803 assert heads
818 assert heads
804 return (orderedout, roots, heads)
819 return (orderedout, roots, heads)
805
820
806 def headrevs(self):
821 def headrevs(self):
807 try:
822 try:
808 return self.index.headrevs()
823 return self.index.headrevs()
809 except AttributeError:
824 except AttributeError:
810 return self._headrevs()
825 return self._headrevs()
811
826
812 def computephases(self, roots):
827 def computephases(self, roots):
813 return self.index.computephasesmapsets(roots)
828 return self.index.computephasesmapsets(roots)
814
829
815 def _headrevs(self):
830 def _headrevs(self):
816 count = len(self)
831 count = len(self)
817 if not count:
832 if not count:
818 return [nullrev]
833 return [nullrev]
819 # we won't iter over filtered rev so nobody is a head at start
834 # we won't iter over filtered rev so nobody is a head at start
820 ishead = [0] * (count + 1)
835 ishead = [0] * (count + 1)
821 index = self.index
836 index = self.index
822 for r in self:
837 for r in self:
823 ishead[r] = 1 # I may be an head
838 ishead[r] = 1 # I may be an head
824 e = index[r]
839 e = index[r]
825 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
826 return [r for r, val in enumerate(ishead) if val]
841 return [r for r, val in enumerate(ishead) if val]
827
842
828 def heads(self, start=None, stop=None):
843 def heads(self, start=None, stop=None):
829 """return the list of all nodes that have no children
844 """return the list of all nodes that have no children
830
845
831 if start is specified, only heads that are descendants of
846 if start is specified, only heads that are descendants of
832 start will be returned
847 start will be returned
833 if stop is specified, it will consider all the revs from stop
848 if stop is specified, it will consider all the revs from stop
834 as if they had no children
849 as if they had no children
835 """
850 """
836 if start is None and stop is None:
851 if start is None and stop is None:
837 if not len(self):
852 if not len(self):
838 return [nullid]
853 return [nullid]
839 return [self.node(r) for r in self.headrevs()]
854 return [self.node(r) for r in self.headrevs()]
840
855
841 if start is None:
856 if start is None:
842 start = nullid
857 start = nullid
843 if stop is None:
858 if stop is None:
844 stop = []
859 stop = []
845 stoprevs = set([self.rev(n) for n in stop])
860 stoprevs = set([self.rev(n) for n in stop])
846 startrev = self.rev(start)
861 startrev = self.rev(start)
847 reachable = set((startrev,))
862 reachable = set((startrev,))
848 heads = set((startrev,))
863 heads = set((startrev,))
849
864
850 parentrevs = self.parentrevs
865 parentrevs = self.parentrevs
851 for r in self.revs(start=startrev + 1):
866 for r in self.revs(start=startrev + 1):
852 for p in parentrevs(r):
867 for p in parentrevs(r):
853 if p in reachable:
868 if p in reachable:
854 if r not in stoprevs:
869 if r not in stoprevs:
855 reachable.add(r)
870 reachable.add(r)
856 heads.add(r)
871 heads.add(r)
857 if p in heads and p not in stoprevs:
872 if p in heads and p not in stoprevs:
858 heads.remove(p)
873 heads.remove(p)
859
874
860 return [self.node(r) for r in heads]
875 return [self.node(r) for r in heads]
861
876
862 def children(self, node):
877 def children(self, node):
863 """find the children of a given node"""
878 """find the children of a given node"""
864 c = []
879 c = []
865 p = self.rev(node)
880 p = self.rev(node)
866 for r in self.revs(start=p + 1):
881 for r in self.revs(start=p + 1):
867 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
868 if prevs:
883 if prevs:
869 for pr in prevs:
884 for pr in prevs:
870 if pr == p:
885 if pr == p:
871 c.append(self.node(r))
886 c.append(self.node(r))
872 elif p == nullrev:
887 elif p == nullrev:
873 c.append(self.node(r))
888 c.append(self.node(r))
874 return c
889 return c
875
890
876 def descendant(self, start, end):
891 def descendant(self, start, end):
877 if start == nullrev:
892 if start == nullrev:
878 return True
893 return True
879 for i in self.descendants([start]):
894 for i in self.descendants([start]):
880 if i == end:
895 if i == end:
881 return True
896 return True
882 elif i > end:
897 elif i > end:
883 break
898 break
884 return False
899 return False
885
900
886 def commonancestorsheads(self, a, b):
901 def commonancestorsheads(self, a, b):
887 """calculate all the heads of the common ancestors of nodes a and b"""
902 """calculate all the heads of the common ancestors of nodes a and b"""
888 a, b = self.rev(a), self.rev(b)
903 a, b = self.rev(a), self.rev(b)
889 try:
904 try:
890 ancs = self.index.commonancestorsheads(a, b)
905 ancs = self.index.commonancestorsheads(a, b)
891 except (AttributeError, OverflowError): # C implementation failed
906 except (AttributeError, OverflowError): # C implementation failed
892 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
893 return map(self.node, ancs)
908 return map(self.node, ancs)
894
909
895 def isancestor(self, a, b):
910 def isancestor(self, a, b):
896 """return True if node a is an ancestor of node b
911 """return True if node a is an ancestor of node b
897
912
898 The implementation of this is trivial but the use of
913 The implementation of this is trivial but the use of
899 commonancestorsheads is not."""
914 commonancestorsheads is not."""
900 return a in self.commonancestorsheads(a, b)
915 return a in self.commonancestorsheads(a, b)
901
916
902 def ancestor(self, a, b):
917 def ancestor(self, a, b):
903 """calculate the "best" common ancestor of nodes a and b"""
918 """calculate the "best" common ancestor of nodes a and b"""
904
919
905 a, b = self.rev(a), self.rev(b)
920 a, b = self.rev(a), self.rev(b)
906 try:
921 try:
907 ancs = self.index.ancestors(a, b)
922 ancs = self.index.ancestors(a, b)
908 except (AttributeError, OverflowError):
923 except (AttributeError, OverflowError):
909 ancs = ancestor.ancestors(self.parentrevs, a, b)
924 ancs = ancestor.ancestors(self.parentrevs, a, b)
910 if ancs:
925 if ancs:
911 # choose a consistent winner when there's a tie
926 # choose a consistent winner when there's a tie
912 return min(map(self.node, ancs))
927 return min(map(self.node, ancs))
913 return nullid
928 return nullid
914
929
915 def _match(self, id):
930 def _match(self, id):
916 if isinstance(id, int):
931 if isinstance(id, int):
917 # rev
932 # rev
918 return self.node(id)
933 return self.node(id)
919 if len(id) == 20:
934 if len(id) == 20:
920 # possibly a binary node
935 # possibly a binary node
921 # odds of a binary node being all hex in ASCII are 1 in 10**25
936 # odds of a binary node being all hex in ASCII are 1 in 10**25
922 try:
937 try:
923 node = id
938 node = id
924 self.rev(node) # quick search the index
939 self.rev(node) # quick search the index
925 return node
940 return node
926 except LookupError:
941 except LookupError:
927 pass # may be partial hex id
942 pass # may be partial hex id
928 try:
943 try:
929 # str(rev)
944 # str(rev)
930 rev = int(id)
945 rev = int(id)
931 if str(rev) != id:
946 if str(rev) != id:
932 raise ValueError
947 raise ValueError
933 if rev < 0:
948 if rev < 0:
934 rev = len(self) + rev
949 rev = len(self) + rev
935 if rev < 0 or rev >= len(self):
950 if rev < 0 or rev >= len(self):
936 raise ValueError
951 raise ValueError
937 return self.node(rev)
952 return self.node(rev)
938 except (ValueError, OverflowError):
953 except (ValueError, OverflowError):
939 pass
954 pass
940 if len(id) == 40:
955 if len(id) == 40:
941 try:
956 try:
942 # a full hex nodeid?
957 # a full hex nodeid?
943 node = bin(id)
958 node = bin(id)
944 self.rev(node)
959 self.rev(node)
945 return node
960 return node
946 except (TypeError, LookupError):
961 except (TypeError, LookupError):
947 pass
962 pass
948
963
949 def _partialmatch(self, id):
964 def _partialmatch(self, id):
950 try:
965 try:
951 n = self.index.partialmatch(id)
966 n = self.index.partialmatch(id)
952 if n and self.hasnode(n):
967 if n and self.hasnode(n):
953 return n
968 return n
954 return None
969 return None
955 except RevlogError:
970 except RevlogError:
956 # parsers.c radix tree lookup gave multiple matches
971 # parsers.c radix tree lookup gave multiple matches
957 # fast path: for unfiltered changelog, radix tree is accurate
972 # fast path: for unfiltered changelog, radix tree is accurate
958 if not getattr(self, 'filteredrevs', None):
973 if not getattr(self, 'filteredrevs', None):
959 raise LookupError(id, self.indexfile,
974 raise LookupError(id, self.indexfile,
960 _('ambiguous identifier'))
975 _('ambiguous identifier'))
961 # fall through to slow path that filters hidden revisions
976 # fall through to slow path that filters hidden revisions
962 except (AttributeError, ValueError):
977 except (AttributeError, ValueError):
963 # we are pure python, or key was too short to search radix tree
978 # we are pure python, or key was too short to search radix tree
964 pass
979 pass
965
980
966 if id in self._pcache:
981 if id in self._pcache:
967 return self._pcache[id]
982 return self._pcache[id]
968
983
969 if len(id) < 40:
984 if len(id) < 40:
970 try:
985 try:
971 # hex(node)[:...]
986 # hex(node)[:...]
972 l = len(id) // 2 # grab an even number of digits
987 l = len(id) // 2 # grab an even number of digits
973 prefix = bin(id[:l * 2])
988 prefix = bin(id[:l * 2])
974 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
975 nl = [n for n in nl if hex(n).startswith(id) and
990 nl = [n for n in nl if hex(n).startswith(id) and
976 self.hasnode(n)]
991 self.hasnode(n)]
977 if len(nl) > 0:
992 if len(nl) > 0:
978 if len(nl) == 1:
993 if len(nl) == 1:
979 self._pcache[id] = nl[0]
994 self._pcache[id] = nl[0]
980 return nl[0]
995 return nl[0]
981 raise LookupError(id, self.indexfile,
996 raise LookupError(id, self.indexfile,
982 _('ambiguous identifier'))
997 _('ambiguous identifier'))
983 return None
998 return None
984 except TypeError:
999 except TypeError:
985 pass
1000 pass
986
1001
987 def lookup(self, id):
1002 def lookup(self, id):
988 """locate a node based on:
1003 """locate a node based on:
989 - revision number or str(revision number)
1004 - revision number or str(revision number)
990 - nodeid or subset of hex nodeid
1005 - nodeid or subset of hex nodeid
991 """
1006 """
992 n = self._match(id)
1007 n = self._match(id)
993 if n is not None:
1008 if n is not None:
994 return n
1009 return n
995 n = self._partialmatch(id)
1010 n = self._partialmatch(id)
996 if n:
1011 if n:
997 return n
1012 return n
998
1013
999 raise LookupError(id, self.indexfile, _('no match found'))
1014 raise LookupError(id, self.indexfile, _('no match found'))
1000
1015
1001 def cmp(self, node, text):
1016 def cmp(self, node, text):
1002 """compare text with a given file revision
1017 """compare text with a given file revision
1003
1018
1004 returns True if text is different than what is stored.
1019 returns True if text is different than what is stored.
1005 """
1020 """
1006 p1, p2 = self.parents(node)
1021 p1, p2 = self.parents(node)
1007 return hash(text, p1, p2) != node
1022 return hash(text, p1, p2) != node
1008
1023
1009 def _addchunk(self, offset, data):
1024 def _addchunk(self, offset, data):
1010 """Add a segment to the revlog cache.
1025 """Add a segment to the revlog cache.
1011
1026
1012 Accepts an absolute offset and the data that is at that location.
1027 Accepts an absolute offset and the data that is at that location.
1013 """
1028 """
1014 o, d = self._chunkcache
1029 o, d = self._chunkcache
1015 # try to add to existing cache
1030 # try to add to existing cache
1016 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1017 self._chunkcache = o, d + data
1032 self._chunkcache = o, d + data
1018 else:
1033 else:
1019 self._chunkcache = offset, data
1034 self._chunkcache = offset, data
1020
1035
1021 def _loadchunk(self, offset, length, df=None):
1036 def _loadchunk(self, offset, length, df=None):
1022 """Load a segment of raw data from the revlog.
1037 """Load a segment of raw data from the revlog.
1023
1038
1024 Accepts an absolute offset, length to read, and an optional existing
1039 Accepts an absolute offset, length to read, and an optional existing
1025 file handle to read from.
1040 file handle to read from.
1026
1041
1027 If an existing file handle is passed, it will be seeked and the
1042 If an existing file handle is passed, it will be seeked and the
1028 original seek position will NOT be restored.
1043 original seek position will NOT be restored.
1029
1044
1030 Returns a str or buffer of raw byte data.
1045 Returns a str or buffer of raw byte data.
1031 """
1046 """
1032 if df is not None:
1047 if df is not None:
1033 closehandle = False
1048 closehandle = False
1034 else:
1049 else:
1035 if self._inline:
1050 if self._inline:
1036 df = self.opener(self.indexfile)
1051 df = self.opener(self.indexfile)
1037 else:
1052 else:
1038 df = self.opener(self.datafile)
1053 df = self.opener(self.datafile)
1039 closehandle = True
1054 closehandle = True
1040
1055
1041 # Cache data both forward and backward around the requested
1056 # Cache data both forward and backward around the requested
1042 # data, in a fixed size window. This helps speed up operations
1057 # data, in a fixed size window. This helps speed up operations
1043 # involving reading the revlog backwards.
1058 # involving reading the revlog backwards.
1044 cachesize = self._chunkcachesize
1059 cachesize = self._chunkcachesize
1045 realoffset = offset & ~(cachesize - 1)
1060 realoffset = offset & ~(cachesize - 1)
1046 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1047 - realoffset)
1062 - realoffset)
1048 df.seek(realoffset)
1063 df.seek(realoffset)
1049 d = df.read(reallength)
1064 d = df.read(reallength)
1050 if closehandle:
1065 if closehandle:
1051 df.close()
1066 df.close()
1052 self._addchunk(realoffset, d)
1067 self._addchunk(realoffset, d)
1053 if offset != realoffset or reallength != length:
1068 if offset != realoffset or reallength != length:
1054 return util.buffer(d, offset - realoffset, length)
1069 return util.buffer(d, offset - realoffset, length)
1055 return d
1070 return d
1056
1071
1057 def _getchunk(self, offset, length, df=None):
1072 def _getchunk(self, offset, length, df=None):
1058 """Obtain a segment of raw data from the revlog.
1073 """Obtain a segment of raw data from the revlog.
1059
1074
1060 Accepts an absolute offset, length of bytes to obtain, and an
1075 Accepts an absolute offset, length of bytes to obtain, and an
1061 optional file handle to the already-opened revlog. If the file
1076 optional file handle to the already-opened revlog. If the file
1062 handle is used, it's original seek position will not be preserved.
1077 handle is used, it's original seek position will not be preserved.
1063
1078
1064 Requests for data may be returned from a cache.
1079 Requests for data may be returned from a cache.
1065
1080
1066 Returns a str or a buffer instance of raw byte data.
1081 Returns a str or a buffer instance of raw byte data.
1067 """
1082 """
1068 o, d = self._chunkcache
1083 o, d = self._chunkcache
1069 l = len(d)
1084 l = len(d)
1070
1085
1071 # is it in the cache?
1086 # is it in the cache?
1072 cachestart = offset - o
1087 cachestart = offset - o
1073 cacheend = cachestart + length
1088 cacheend = cachestart + length
1074 if cachestart >= 0 and cacheend <= l:
1089 if cachestart >= 0 and cacheend <= l:
1075 if cachestart == 0 and cacheend == l:
1090 if cachestart == 0 and cacheend == l:
1076 return d # avoid a copy
1091 return d # avoid a copy
1077 return util.buffer(d, cachestart, cacheend - cachestart)
1092 return util.buffer(d, cachestart, cacheend - cachestart)
1078
1093
1079 return self._loadchunk(offset, length, df=df)
1094 return self._loadchunk(offset, length, df=df)
1080
1095
1081 def _chunkraw(self, startrev, endrev, df=None):
1096 def _chunkraw(self, startrev, endrev, df=None):
1082 """Obtain a segment of raw data corresponding to a range of revisions.
1097 """Obtain a segment of raw data corresponding to a range of revisions.
1083
1098
1084 Accepts the start and end revisions and an optional already-open
1099 Accepts the start and end revisions and an optional already-open
1085 file handle to be used for reading. If the file handle is read, its
1100 file handle to be used for reading. If the file handle is read, its
1086 seek position will not be preserved.
1101 seek position will not be preserved.
1087
1102
1088 Requests for data may be satisfied by a cache.
1103 Requests for data may be satisfied by a cache.
1089
1104
1090 Returns a 2-tuple of (offset, data) for the requested range of
1105 Returns a 2-tuple of (offset, data) for the requested range of
1091 revisions. Offset is the integer offset from the beginning of the
1106 revisions. Offset is the integer offset from the beginning of the
1092 revlog and data is a str or buffer of the raw byte data.
1107 revlog and data is a str or buffer of the raw byte data.
1093
1108
1094 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1095 to determine where each revision's data begins and ends.
1110 to determine where each revision's data begins and ends.
1096 """
1111 """
1097 start = self.start(startrev)
1112 start = self.start(startrev)
1098 end = self.end(endrev)
1113 end = self.end(endrev)
1099 if self._inline:
1114 if self._inline:
1100 start += (startrev + 1) * self._io.size
1115 start += (startrev + 1) * self._io.size
1101 end += (endrev + 1) * self._io.size
1116 end += (endrev + 1) * self._io.size
1102 length = end - start
1117 length = end - start
1103
1118
1104 return start, self._getchunk(start, length, df=df)
1119 return start, self._getchunk(start, length, df=df)
1105
1120
1106 def _chunk(self, rev, df=None):
1121 def _chunk(self, rev, df=None):
1107 """Obtain a single decompressed chunk for a revision.
1122 """Obtain a single decompressed chunk for a revision.
1108
1123
1109 Accepts an integer revision and an optional already-open file handle
1124 Accepts an integer revision and an optional already-open file handle
1110 to be used for reading. If used, the seek position of the file will not
1125 to be used for reading. If used, the seek position of the file will not
1111 be preserved.
1126 be preserved.
1112
1127
1113 Returns a str holding uncompressed data for the requested revision.
1128 Returns a str holding uncompressed data for the requested revision.
1114 """
1129 """
1115 return decompress(self._chunkraw(rev, rev, df=df)[1])
1130 return decompress(self._chunkraw(rev, rev, df=df)[1])
1116
1131
1117 def _chunks(self, revs, df=None):
1132 def _chunks(self, revs, df=None):
1118 """Obtain decompressed chunks for the specified revisions.
1133 """Obtain decompressed chunks for the specified revisions.
1119
1134
1120 Accepts an iterable of numeric revisions that are assumed to be in
1135 Accepts an iterable of numeric revisions that are assumed to be in
1121 ascending order. Also accepts an optional already-open file handle
1136 ascending order. Also accepts an optional already-open file handle
1122 to be used for reading. If used, the seek position of the file will
1137 to be used for reading. If used, the seek position of the file will
1123 not be preserved.
1138 not be preserved.
1124
1139
1125 This function is similar to calling ``self._chunk()`` multiple times,
1140 This function is similar to calling ``self._chunk()`` multiple times,
1126 but is faster.
1141 but is faster.
1127
1142
1128 Returns a list with decompressed data for each requested revision.
1143 Returns a list with decompressed data for each requested revision.
1129 """
1144 """
1130 if not revs:
1145 if not revs:
1131 return []
1146 return []
1132 start = self.start
1147 start = self.start
1133 length = self.length
1148 length = self.length
1134 inline = self._inline
1149 inline = self._inline
1135 iosize = self._io.size
1150 iosize = self._io.size
1136 buffer = util.buffer
1151 buffer = util.buffer
1137
1152
1138 l = []
1153 l = []
1139 ladd = l.append
1154 ladd = l.append
1140
1155
1141 try:
1156 try:
1142 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1157 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1143 except OverflowError:
1158 except OverflowError:
1144 # issue4215 - we can't cache a run of chunks greater than
1159 # issue4215 - we can't cache a run of chunks greater than
1145 # 2G on Windows
1160 # 2G on Windows
1146 return [self._chunk(rev, df=df) for rev in revs]
1161 return [self._chunk(rev, df=df) for rev in revs]
1147
1162
1148 for rev in revs:
1163 for rev in revs:
1149 chunkstart = start(rev)
1164 chunkstart = start(rev)
1150 if inline:
1165 if inline:
1151 chunkstart += (rev + 1) * iosize
1166 chunkstart += (rev + 1) * iosize
1152 chunklength = length(rev)
1167 chunklength = length(rev)
1153 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1168 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1154
1169
1155 return l
1170 return l
1156
1171
1157 def _chunkclear(self):
1172 def _chunkclear(self):
1158 """Clear the raw chunk cache."""
1173 """Clear the raw chunk cache."""
1159 self._chunkcache = (0, '')
1174 self._chunkcache = (0, '')
1160
1175
1161 def deltaparent(self, rev):
1176 def deltaparent(self, rev):
1162 """return deltaparent of the given revision"""
1177 """return deltaparent of the given revision"""
1163 base = self.index[rev][3]
1178 base = self.index[rev][3]
1164 if base == rev:
1179 if base == rev:
1165 return nullrev
1180 return nullrev
1166 elif self._generaldelta:
1181 elif self._generaldelta:
1167 return base
1182 return base
1168 else:
1183 else:
1169 return rev - 1
1184 return rev - 1
1170
1185
1171 def revdiff(self, rev1, rev2):
1186 def revdiff(self, rev1, rev2):
1172 """return or calculate a delta between two revisions"""
1187 """return or calculate a delta between two revisions"""
1173 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1188 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1174 return str(self._chunk(rev2))
1189 return str(self._chunk(rev2))
1175
1190
1176 return mdiff.textdiff(self.revision(rev1),
1191 return mdiff.textdiff(self.revision(rev1),
1177 self.revision(rev2))
1192 self.revision(rev2))
1178
1193
1179 def revision(self, nodeorrev, _df=None):
1194 def revision(self, nodeorrev, _df=None):
1180 """return an uncompressed revision of a given node or revision
1195 """return an uncompressed revision of a given node or revision
1181 number.
1196 number.
1182
1197
1183 _df is an existing file handle to read from. It is meant to only be
1198 _df is an existing file handle to read from. It is meant to only be
1184 used internally.
1199 used internally.
1185 """
1200 """
1186 if isinstance(nodeorrev, int):
1201 if isinstance(nodeorrev, int):
1187 rev = nodeorrev
1202 rev = nodeorrev
1188 node = self.node(rev)
1203 node = self.node(rev)
1189 else:
1204 else:
1190 node = nodeorrev
1205 node = nodeorrev
1191 rev = None
1206 rev = None
1192
1207
1193 cachedrev = None
1208 cachedrev = None
1194 if node == nullid:
1209 if node == nullid:
1195 return ""
1210 return ""
1196 if self._cache:
1211 if self._cache:
1197 if self._cache[0] == node:
1212 if self._cache[0] == node:
1198 return self._cache[2]
1213 return self._cache[2]
1199 cachedrev = self._cache[1]
1214 cachedrev = self._cache[1]
1200
1215
1201 # look up what we need to read
1216 # look up what we need to read
1202 text = None
1217 text = None
1203 if rev is None:
1218 if rev is None:
1204 rev = self.rev(node)
1219 rev = self.rev(node)
1205
1220
1206 # check rev flags
1221 # check rev flags
1207 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1222 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1208 raise RevlogError(_('incompatible revision flag %x') %
1223 raise RevlogError(_('incompatible revision flag %x') %
1209 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1224 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1210
1225
1211 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1226 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1212 if stopped:
1227 if stopped:
1213 text = self._cache[2]
1228 text = self._cache[2]
1214
1229
1215 # drop cache to save memory
1230 # drop cache to save memory
1216 self._cache = None
1231 self._cache = None
1217
1232
1218 bins = self._chunks(chain, df=_df)
1233 bins = self._chunks(chain, df=_df)
1219 if text is None:
1234 if text is None:
1220 text = str(bins[0])
1235 text = str(bins[0])
1221 bins = bins[1:]
1236 bins = bins[1:]
1222
1237
1223 text = mdiff.patches(text, bins)
1238 text = mdiff.patches(text, bins)
1224
1239
1225 text = self._checkhash(text, node, rev)
1240 text = self._checkhash(text, node, rev)
1226
1241
1227 self._cache = (node, rev, text)
1242 self._cache = (node, rev, text)
1228 return text
1243 return text
1229
1244
1230 def hash(self, text, p1, p2):
1245 def hash(self, text, p1, p2):
1231 """Compute a node hash.
1246 """Compute a node hash.
1232
1247
1233 Available as a function so that subclasses can replace the hash
1248 Available as a function so that subclasses can replace the hash
1234 as needed.
1249 as needed.
1235 """
1250 """
1236 return hash(text, p1, p2)
1251 return hash(text, p1, p2)
1237
1252
1238 def _checkhash(self, text, node, rev):
1253 def _checkhash(self, text, node, rev):
1239 p1, p2 = self.parents(node)
1254 p1, p2 = self.parents(node)
1240 self.checkhash(text, p1, p2, node, rev)
1255 self.checkhash(text, p1, p2, node, rev)
1241 return text
1256 return text
1242
1257
1243 def checkhash(self, text, p1, p2, node, rev=None):
1258 def checkhash(self, text, p1, p2, node, rev=None):
1244 if node != self.hash(text, p1, p2):
1259 if node != self.hash(text, p1, p2):
1245 revornode = rev
1260 revornode = rev
1246 if revornode is None:
1261 if revornode is None:
1247 revornode = templatefilters.short(hex(node))
1262 revornode = templatefilters.short(hex(node))
1248 raise RevlogError(_("integrity check failed on %s:%s")
1263 raise RevlogError(_("integrity check failed on %s:%s")
1249 % (self.indexfile, revornode))
1264 % (self.indexfile, revornode))
1250
1265
1251 def checkinlinesize(self, tr, fp=None):
1266 def checkinlinesize(self, tr, fp=None):
1252 """Check if the revlog is too big for inline and convert if so.
1267 """Check if the revlog is too big for inline and convert if so.
1253
1268
1254 This should be called after revisions are added to the revlog. If the
1269 This should be called after revisions are added to the revlog. If the
1255 revlog has grown too large to be an inline revlog, it will convert it
1270 revlog has grown too large to be an inline revlog, it will convert it
1256 to use multiple index and data files.
1271 to use multiple index and data files.
1257 """
1272 """
1258 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1273 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1259 return
1274 return
1260
1275
1261 trinfo = tr.find(self.indexfile)
1276 trinfo = tr.find(self.indexfile)
1262 if trinfo is None:
1277 if trinfo is None:
1263 raise RevlogError(_("%s not found in the transaction")
1278 raise RevlogError(_("%s not found in the transaction")
1264 % self.indexfile)
1279 % self.indexfile)
1265
1280
1266 trindex = trinfo[2]
1281 trindex = trinfo[2]
1267 if trindex is not None:
1282 if trindex is not None:
1268 dataoff = self.start(trindex)
1283 dataoff = self.start(trindex)
1269 else:
1284 else:
1270 # revlog was stripped at start of transaction, use all leftover data
1285 # revlog was stripped at start of transaction, use all leftover data
1271 trindex = len(self) - 1
1286 trindex = len(self) - 1
1272 dataoff = self.end(-2)
1287 dataoff = self.end(-2)
1273
1288
1274 tr.add(self.datafile, dataoff)
1289 tr.add(self.datafile, dataoff)
1275
1290
1276 if fp:
1291 if fp:
1277 fp.flush()
1292 fp.flush()
1278 fp.close()
1293 fp.close()
1279
1294
1280 df = self.opener(self.datafile, 'w')
1295 df = self.opener(self.datafile, 'w')
1281 try:
1296 try:
1282 for r in self:
1297 for r in self:
1283 df.write(self._chunkraw(r, r)[1])
1298 df.write(self._chunkraw(r, r)[1])
1284 finally:
1299 finally:
1285 df.close()
1300 df.close()
1286
1301
1287 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1302 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1288 checkambig=self._checkambig)
1303 checkambig=self._checkambig)
1289 self.version &= ~(REVLOGNGINLINEDATA)
1304 self.version &= ~(REVLOGNGINLINEDATA)
1290 self._inline = False
1305 self._inline = False
1291 for i in self:
1306 for i in self:
1292 e = self._io.packentry(self.index[i], self.node, self.version, i)
1307 e = self._io.packentry(self.index[i], self.node, self.version, i)
1293 fp.write(e)
1308 fp.write(e)
1294
1309
1295 # if we don't call close, the temp file will never replace the
1310 # if we don't call close, the temp file will never replace the
1296 # real index
1311 # real index
1297 fp.close()
1312 fp.close()
1298
1313
1299 tr.replace(self.indexfile, trindex * self._io.size)
1314 tr.replace(self.indexfile, trindex * self._io.size)
1300 self._chunkclear()
1315 self._chunkclear()
1301
1316
1302 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1317 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1303 node=None):
1318 node=None):
1304 """add a revision to the log
1319 """add a revision to the log
1305
1320
1306 text - the revision data to add
1321 text - the revision data to add
1307 transaction - the transaction object used for rollback
1322 transaction - the transaction object used for rollback
1308 link - the linkrev data to add
1323 link - the linkrev data to add
1309 p1, p2 - the parent nodeids of the revision
1324 p1, p2 - the parent nodeids of the revision
1310 cachedelta - an optional precomputed delta
1325 cachedelta - an optional precomputed delta
1311 node - nodeid of revision; typically node is not specified, and it is
1326 node - nodeid of revision; typically node is not specified, and it is
1312 computed by default as hash(text, p1, p2), however subclasses might
1327 computed by default as hash(text, p1, p2), however subclasses might
1313 use different hashing method (and override checkhash() in such case)
1328 use different hashing method (and override checkhash() in such case)
1314 """
1329 """
1315 if link == nullrev:
1330 if link == nullrev:
1316 raise RevlogError(_("attempted to add linkrev -1 to %s")
1331 raise RevlogError(_("attempted to add linkrev -1 to %s")
1317 % self.indexfile)
1332 % self.indexfile)
1318
1333
1319 if len(text) > _maxentrysize:
1334 if len(text) > _maxentrysize:
1320 raise RevlogError(
1335 raise RevlogError(
1321 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1336 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1322 % (self.indexfile, len(text)))
1337 % (self.indexfile, len(text)))
1323
1338
1324 node = node or self.hash(text, p1, p2)
1339 node = node or self.hash(text, p1, p2)
1325 if node in self.nodemap:
1340 if node in self.nodemap:
1326 return node
1341 return node
1327
1342
1328 dfh = None
1343 dfh = None
1329 if not self._inline:
1344 if not self._inline:
1330 dfh = self.opener(self.datafile, "a+")
1345 dfh = self.opener(self.datafile, "a+")
1331 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1346 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1332 try:
1347 try:
1333 return self._addrevision(node, text, transaction, link, p1, p2,
1348 return self._addrevision(node, text, transaction, link, p1, p2,
1334 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1349 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1335 finally:
1350 finally:
1336 if dfh:
1351 if dfh:
1337 dfh.close()
1352 dfh.close()
1338 ifh.close()
1353 ifh.close()
1339
1354
1340 def compress(self, text):
1355 def compress(self, text):
1341 """ generate a possibly-compressed representation of text """
1356 """ generate a possibly-compressed representation of text """
1342 if not text:
1357 if not text:
1343 return ("", text)
1358 return ("", text)
1344 l = len(text)
1359 l = len(text)
1345 bin = None
1360 bin = None
1346 if l < 44:
1361 if l < 44:
1347 pass
1362 pass
1348 elif l > 1000000:
1363 elif l > 1000000:
1349 # zlib makes an internal copy, thus doubling memory usage for
1364 # zlib makes an internal copy, thus doubling memory usage for
1350 # large files, so lets do this in pieces
1365 # large files, so lets do this in pieces
1351 z = zlib.compressobj()
1366 z = zlib.compressobj()
1352 p = []
1367 p = []
1353 pos = 0
1368 pos = 0
1354 while pos < l:
1369 while pos < l:
1355 pos2 = pos + 2**20
1370 pos2 = pos + 2**20
1356 p.append(z.compress(text[pos:pos2]))
1371 p.append(z.compress(text[pos:pos2]))
1357 pos = pos2
1372 pos = pos2
1358 p.append(z.flush())
1373 p.append(z.flush())
1359 if sum(map(len, p)) < l:
1374 if sum(map(len, p)) < l:
1360 bin = "".join(p)
1375 bin = "".join(p)
1361 else:
1376 else:
1362 bin = _compress(text)
1377 bin = _compress(text)
1363 if bin is None or len(bin) > l:
1378 if bin is None or len(bin) > l:
1364 if text[0] == '\0':
1379 if text[0] == '\0':
1365 return ("", text)
1380 return ("", text)
1366 return ('u', text)
1381 return ('u', text)
1367 return ("", bin)
1382 return ("", bin)
1368
1383
1369 def _isgooddelta(self, d, textlen):
1384 def _isgooddelta(self, d, textlen):
1370 """Returns True if the given delta is good. Good means that it is within
1385 """Returns True if the given delta is good. Good means that it is within
1371 the disk span, disk size, and chain length bounds that we know to be
1386 the disk span, disk size, and chain length bounds that we know to be
1372 performant."""
1387 performant."""
1373 if d is None:
1388 if d is None:
1374 return False
1389 return False
1375
1390
1376 # - 'dist' is the distance from the base revision -- bounding it limits
1391 # - 'dist' is the distance from the base revision -- bounding it limits
1377 # the amount of I/O we need to do.
1392 # the amount of I/O we need to do.
1378 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1393 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1379 # to apply -- bounding it limits the amount of CPU we consume.
1394 # to apply -- bounding it limits the amount of CPU we consume.
1380 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1395 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1381 if (dist > textlen * 4 or l > textlen or
1396 if (dist > textlen * 4 or l > textlen or
1382 compresseddeltalen > textlen * 2 or
1397 compresseddeltalen > textlen * 2 or
1383 (self._maxchainlen and chainlen > self._maxchainlen)):
1398 (self._maxchainlen and chainlen > self._maxchainlen)):
1384 return False
1399 return False
1385
1400
1386 return True
1401 return True
1387
1402
1388 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1403 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1389 cachedelta, ifh, dfh, alwayscache=False):
1404 cachedelta, ifh, dfh, alwayscache=False):
1390 """internal function to add revisions to the log
1405 """internal function to add revisions to the log
1391
1406
1392 see addrevision for argument descriptions.
1407 see addrevision for argument descriptions.
1393 invariants:
1408 invariants:
1394 - text is optional (can be None); if not set, cachedelta must be set.
1409 - text is optional (can be None); if not set, cachedelta must be set.
1395 if both are set, they must correspond to each other.
1410 if both are set, they must correspond to each other.
1396 """
1411 """
1397 btext = [text]
1412 btext = [text]
1398 def buildtext():
1413 def buildtext():
1399 if btext[0] is not None:
1414 if btext[0] is not None:
1400 return btext[0]
1415 return btext[0]
1401 baserev = cachedelta[0]
1416 baserev = cachedelta[0]
1402 delta = cachedelta[1]
1417 delta = cachedelta[1]
1403 # special case deltas which replace entire base; no need to decode
1418 # special case deltas which replace entire base; no need to decode
1404 # base revision. this neatly avoids censored bases, which throw when
1419 # base revision. this neatly avoids censored bases, which throw when
1405 # they're decoded.
1420 # they're decoded.
1406 hlen = struct.calcsize(">lll")
1421 hlen = struct.calcsize(">lll")
1407 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1422 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1408 len(delta) - hlen):
1423 len(delta) - hlen):
1409 btext[0] = delta[hlen:]
1424 btext[0] = delta[hlen:]
1410 else:
1425 else:
1411 if self._inline:
1426 if self._inline:
1412 fh = ifh
1427 fh = ifh
1413 else:
1428 else:
1414 fh = dfh
1429 fh = dfh
1415 basetext = self.revision(self.node(baserev), _df=fh)
1430 basetext = self.revision(self.node(baserev), _df=fh)
1416 btext[0] = mdiff.patch(basetext, delta)
1431 btext[0] = mdiff.patch(basetext, delta)
1417 try:
1432 try:
1418 self.checkhash(btext[0], p1, p2, node)
1433 self.checkhash(btext[0], p1, p2, node)
1419 if flags & REVIDX_ISCENSORED:
1434 if flags & REVIDX_ISCENSORED:
1420 raise RevlogError(_('node %s is not censored') % node)
1435 raise RevlogError(_('node %s is not censored') % node)
1421 except CensoredNodeError:
1436 except CensoredNodeError:
1422 # must pass the censored index flag to add censored revisions
1437 # must pass the censored index flag to add censored revisions
1423 if not flags & REVIDX_ISCENSORED:
1438 if not flags & REVIDX_ISCENSORED:
1424 raise
1439 raise
1425 return btext[0]
1440 return btext[0]
1426
1441
1427 def builddelta(rev):
1442 def builddelta(rev):
1428 # can we use the cached delta?
1443 # can we use the cached delta?
1429 if cachedelta and cachedelta[0] == rev:
1444 if cachedelta and cachedelta[0] == rev:
1430 delta = cachedelta[1]
1445 delta = cachedelta[1]
1431 else:
1446 else:
1432 t = buildtext()
1447 t = buildtext()
1433 if self.iscensored(rev):
1448 if self.iscensored(rev):
1434 # deltas based on a censored revision must replace the
1449 # deltas based on a censored revision must replace the
1435 # full content in one patch, so delta works everywhere
1450 # full content in one patch, so delta works everywhere
1436 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1451 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1437 delta = header + t
1452 delta = header + t
1438 else:
1453 else:
1439 if self._inline:
1454 if self._inline:
1440 fh = ifh
1455 fh = ifh
1441 else:
1456 else:
1442 fh = dfh
1457 fh = dfh
1443 ptext = self.revision(self.node(rev), _df=fh)
1458 ptext = self.revision(self.node(rev), _df=fh)
1444 delta = mdiff.textdiff(ptext, t)
1459 delta = mdiff.textdiff(ptext, t)
1445 header, data = self.compress(delta)
1460 header, data = self.compress(delta)
1446 deltalen = len(header) + len(data)
1461 deltalen = len(header) + len(data)
1447 chainbase = self.chainbase(rev)
1462 chainbase = self.chainbase(rev)
1448 dist = deltalen + offset - self.start(chainbase)
1463 dist = deltalen + offset - self.start(chainbase)
1449 if self._generaldelta:
1464 if self._generaldelta:
1450 base = rev
1465 base = rev
1451 else:
1466 else:
1452 base = chainbase
1467 base = chainbase
1453 chainlen, compresseddeltalen = self._chaininfo(rev)
1468 chainlen, compresseddeltalen = self._chaininfo(rev)
1454 chainlen += 1
1469 chainlen += 1
1455 compresseddeltalen += deltalen
1470 compresseddeltalen += deltalen
1456 return (dist, deltalen, (header, data), base,
1471 return (dist, deltalen, (header, data), base,
1457 chainbase, chainlen, compresseddeltalen)
1472 chainbase, chainlen, compresseddeltalen)
1458
1473
1459 curr = len(self)
1474 curr = len(self)
1460 prev = curr - 1
1475 prev = curr - 1
1461 offset = self.end(prev)
1476 offset = self.end(prev)
1462 delta = None
1477 delta = None
1463 p1r, p2r = self.rev(p1), self.rev(p2)
1478 p1r, p2r = self.rev(p1), self.rev(p2)
1464
1479
1465 # full versions are inserted when the needed deltas
1480 # full versions are inserted when the needed deltas
1466 # become comparable to the uncompressed text
1481 # become comparable to the uncompressed text
1467 if text is None:
1482 if text is None:
1468 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1483 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1469 cachedelta[1])
1484 cachedelta[1])
1470 else:
1485 else:
1471 textlen = len(text)
1486 textlen = len(text)
1472
1487
1473 # should we try to build a delta?
1488 # should we try to build a delta?
1474 if prev != nullrev and self.storedeltachains:
1489 if prev != nullrev and self.storedeltachains:
1475 tested = set()
1490 tested = set()
1476 # This condition is true most of the time when processing
1491 # This condition is true most of the time when processing
1477 # changegroup data into a generaldelta repo. The only time it
1492 # changegroup data into a generaldelta repo. The only time it
1478 # isn't true is if this is the first revision in a delta chain
1493 # isn't true is if this is the first revision in a delta chain
1479 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1494 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1480 if cachedelta and self._generaldelta and self._lazydeltabase:
1495 if cachedelta and self._generaldelta and self._lazydeltabase:
1481 # Assume what we received from the server is a good choice
1496 # Assume what we received from the server is a good choice
1482 # build delta will reuse the cache
1497 # build delta will reuse the cache
1483 candidatedelta = builddelta(cachedelta[0])
1498 candidatedelta = builddelta(cachedelta[0])
1484 tested.add(cachedelta[0])
1499 tested.add(cachedelta[0])
1485 if self._isgooddelta(candidatedelta, textlen):
1500 if self._isgooddelta(candidatedelta, textlen):
1486 delta = candidatedelta
1501 delta = candidatedelta
1487 if delta is None and self._generaldelta:
1502 if delta is None and self._generaldelta:
1488 # exclude already lazy tested base if any
1503 # exclude already lazy tested base if any
1489 parents = [p for p in (p1r, p2r)
1504 parents = [p for p in (p1r, p2r)
1490 if p != nullrev and p not in tested]
1505 if p != nullrev and p not in tested]
1491 if parents and not self._aggressivemergedeltas:
1506 if parents and not self._aggressivemergedeltas:
1492 # Pick whichever parent is closer to us (to minimize the
1507 # Pick whichever parent is closer to us (to minimize the
1493 # chance of having to build a fulltext).
1508 # chance of having to build a fulltext).
1494 parents = [max(parents)]
1509 parents = [max(parents)]
1495 tested.update(parents)
1510 tested.update(parents)
1496 pdeltas = []
1511 pdeltas = []
1497 for p in parents:
1512 for p in parents:
1498 pd = builddelta(p)
1513 pd = builddelta(p)
1499 if self._isgooddelta(pd, textlen):
1514 if self._isgooddelta(pd, textlen):
1500 pdeltas.append(pd)
1515 pdeltas.append(pd)
1501 if pdeltas:
1516 if pdeltas:
1502 delta = min(pdeltas, key=lambda x: x[1])
1517 delta = min(pdeltas, key=lambda x: x[1])
1503 if delta is None and prev not in tested:
1518 if delta is None and prev not in tested:
1504 # other approach failed try against prev to hopefully save us a
1519 # other approach failed try against prev to hopefully save us a
1505 # fulltext.
1520 # fulltext.
1506 candidatedelta = builddelta(prev)
1521 candidatedelta = builddelta(prev)
1507 if self._isgooddelta(candidatedelta, textlen):
1522 if self._isgooddelta(candidatedelta, textlen):
1508 delta = candidatedelta
1523 delta = candidatedelta
1509 if delta is not None:
1524 if delta is not None:
1510 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1525 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1511 else:
1526 else:
1512 text = buildtext()
1527 text = buildtext()
1513 data = self.compress(text)
1528 data = self.compress(text)
1514 l = len(data[1]) + len(data[0])
1529 l = len(data[1]) + len(data[0])
1515 base = chainbase = curr
1530 base = chainbase = curr
1516
1531
1517 e = (offset_type(offset, flags), l, textlen,
1532 e = (offset_type(offset, flags), l, textlen,
1518 base, link, p1r, p2r, node)
1533 base, link, p1r, p2r, node)
1519 self.index.insert(-1, e)
1534 self.index.insert(-1, e)
1520 self.nodemap[node] = curr
1535 self.nodemap[node] = curr
1521
1536
1522 entry = self._io.packentry(e, self.node, self.version, curr)
1537 entry = self._io.packentry(e, self.node, self.version, curr)
1523 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1538 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1524
1539
1525 if alwayscache and text is None:
1540 if alwayscache and text is None:
1526 text = buildtext()
1541 text = buildtext()
1527
1542
1528 if type(text) == str: # only accept immutable objects
1543 if type(text) == str: # only accept immutable objects
1529 self._cache = (node, curr, text)
1544 self._cache = (node, curr, text)
1530 self._chainbasecache[curr] = chainbase
1545 self._chainbasecache[curr] = chainbase
1531 return node
1546 return node
1532
1547
1533 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1548 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1534 # Files opened in a+ mode have inconsistent behavior on various
1549 # Files opened in a+ mode have inconsistent behavior on various
1535 # platforms. Windows requires that a file positioning call be made
1550 # platforms. Windows requires that a file positioning call be made
1536 # when the file handle transitions between reads and writes. See
1551 # when the file handle transitions between reads and writes. See
1537 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1552 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1538 # platforms, Python or the platform itself can be buggy. Some versions
1553 # platforms, Python or the platform itself can be buggy. Some versions
1539 # of Solaris have been observed to not append at the end of the file
1554 # of Solaris have been observed to not append at the end of the file
1540 # if the file was seeked to before the end. See issue4943 for more.
1555 # if the file was seeked to before the end. See issue4943 for more.
1541 #
1556 #
1542 # We work around this issue by inserting a seek() before writing.
1557 # We work around this issue by inserting a seek() before writing.
1543 # Note: This is likely not necessary on Python 3.
1558 # Note: This is likely not necessary on Python 3.
1544 ifh.seek(0, os.SEEK_END)
1559 ifh.seek(0, os.SEEK_END)
1545 if dfh:
1560 if dfh:
1546 dfh.seek(0, os.SEEK_END)
1561 dfh.seek(0, os.SEEK_END)
1547
1562
1548 curr = len(self) - 1
1563 curr = len(self) - 1
1549 if not self._inline:
1564 if not self._inline:
1550 transaction.add(self.datafile, offset)
1565 transaction.add(self.datafile, offset)
1551 transaction.add(self.indexfile, curr * len(entry))
1566 transaction.add(self.indexfile, curr * len(entry))
1552 if data[0]:
1567 if data[0]:
1553 dfh.write(data[0])
1568 dfh.write(data[0])
1554 dfh.write(data[1])
1569 dfh.write(data[1])
1555 ifh.write(entry)
1570 ifh.write(entry)
1556 else:
1571 else:
1557 offset += curr * self._io.size
1572 offset += curr * self._io.size
1558 transaction.add(self.indexfile, offset, curr)
1573 transaction.add(self.indexfile, offset, curr)
1559 ifh.write(entry)
1574 ifh.write(entry)
1560 ifh.write(data[0])
1575 ifh.write(data[0])
1561 ifh.write(data[1])
1576 ifh.write(data[1])
1562 self.checkinlinesize(transaction, ifh)
1577 self.checkinlinesize(transaction, ifh)
1563
1578
1564 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1579 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1565 """
1580 """
1566 add a delta group
1581 add a delta group
1567
1582
1568 given a set of deltas, add them to the revision log. the
1583 given a set of deltas, add them to the revision log. the
1569 first delta is against its parent, which should be in our
1584 first delta is against its parent, which should be in our
1570 log, the rest are against the previous delta.
1585 log, the rest are against the previous delta.
1571
1586
1572 If ``addrevisioncb`` is defined, it will be called with arguments of
1587 If ``addrevisioncb`` is defined, it will be called with arguments of
1573 this revlog and the node that was added.
1588 this revlog and the node that was added.
1574 """
1589 """
1575
1590
1576 # track the base of the current delta log
1591 # track the base of the current delta log
1577 content = []
1592 content = []
1578 node = None
1593 node = None
1579
1594
1580 r = len(self)
1595 r = len(self)
1581 end = 0
1596 end = 0
1582 if r:
1597 if r:
1583 end = self.end(r - 1)
1598 end = self.end(r - 1)
1584 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1599 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1585 isize = r * self._io.size
1600 isize = r * self._io.size
1586 if self._inline:
1601 if self._inline:
1587 transaction.add(self.indexfile, end + isize, r)
1602 transaction.add(self.indexfile, end + isize, r)
1588 dfh = None
1603 dfh = None
1589 else:
1604 else:
1590 transaction.add(self.indexfile, isize, r)
1605 transaction.add(self.indexfile, isize, r)
1591 transaction.add(self.datafile, end)
1606 transaction.add(self.datafile, end)
1592 dfh = self.opener(self.datafile, "a+")
1607 dfh = self.opener(self.datafile, "a+")
1593 def flush():
1608 def flush():
1594 if dfh:
1609 if dfh:
1595 dfh.flush()
1610 dfh.flush()
1596 ifh.flush()
1611 ifh.flush()
1597 try:
1612 try:
1598 # loop through our set of deltas
1613 # loop through our set of deltas
1599 chain = None
1614 chain = None
1600 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1615 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1601 node = chunkdata['node']
1616 node = chunkdata['node']
1602 p1 = chunkdata['p1']
1617 p1 = chunkdata['p1']
1603 p2 = chunkdata['p2']
1618 p2 = chunkdata['p2']
1604 cs = chunkdata['cs']
1619 cs = chunkdata['cs']
1605 deltabase = chunkdata['deltabase']
1620 deltabase = chunkdata['deltabase']
1606 delta = chunkdata['delta']
1621 delta = chunkdata['delta']
1607 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1622 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1608
1623
1609 content.append(node)
1624 content.append(node)
1610
1625
1611 link = linkmapper(cs)
1626 link = linkmapper(cs)
1612 if node in self.nodemap:
1627 if node in self.nodemap:
1613 # this can happen if two branches make the same change
1628 # this can happen if two branches make the same change
1614 chain = node
1629 chain = node
1615 continue
1630 continue
1616
1631
1617 for p in (p1, p2):
1632 for p in (p1, p2):
1618 if p not in self.nodemap:
1633 if p not in self.nodemap:
1619 raise LookupError(p, self.indexfile,
1634 raise LookupError(p, self.indexfile,
1620 _('unknown parent'))
1635 _('unknown parent'))
1621
1636
1622 if deltabase not in self.nodemap:
1637 if deltabase not in self.nodemap:
1623 raise LookupError(deltabase, self.indexfile,
1638 raise LookupError(deltabase, self.indexfile,
1624 _('unknown delta base'))
1639 _('unknown delta base'))
1625
1640
1626 baserev = self.rev(deltabase)
1641 baserev = self.rev(deltabase)
1627
1642
1628 if baserev != nullrev and self.iscensored(baserev):
1643 if baserev != nullrev and self.iscensored(baserev):
1629 # if base is censored, delta must be full replacement in a
1644 # if base is censored, delta must be full replacement in a
1630 # single patch operation
1645 # single patch operation
1631 hlen = struct.calcsize(">lll")
1646 hlen = struct.calcsize(">lll")
1632 oldlen = self.rawsize(baserev)
1647 oldlen = self.rawsize(baserev)
1633 newlen = len(delta) - hlen
1648 newlen = len(delta) - hlen
1634 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1649 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1635 raise error.CensoredBaseError(self.indexfile,
1650 raise error.CensoredBaseError(self.indexfile,
1636 self.node(baserev))
1651 self.node(baserev))
1637
1652
1638 if not flags and self._peek_iscensored(baserev, delta, flush):
1653 if not flags and self._peek_iscensored(baserev, delta, flush):
1639 flags |= REVIDX_ISCENSORED
1654 flags |= REVIDX_ISCENSORED
1640
1655
1641 # We assume consumers of addrevisioncb will want to retrieve
1656 # We assume consumers of addrevisioncb will want to retrieve
1642 # the added revision, which will require a call to
1657 # the added revision, which will require a call to
1643 # revision(). revision() will fast path if there is a cache
1658 # revision(). revision() will fast path if there is a cache
1644 # hit. So, we tell _addrevision() to always cache in this case.
1659 # hit. So, we tell _addrevision() to always cache in this case.
1645 chain = self._addrevision(node, None, transaction, link,
1660 chain = self._addrevision(node, None, transaction, link,
1646 p1, p2, flags, (baserev, delta),
1661 p1, p2, flags, (baserev, delta),
1647 ifh, dfh,
1662 ifh, dfh,
1648 alwayscache=bool(addrevisioncb))
1663 alwayscache=bool(addrevisioncb))
1649
1664
1650 if addrevisioncb:
1665 if addrevisioncb:
1651 addrevisioncb(self, chain)
1666 addrevisioncb(self, chain)
1652
1667
1653 if not dfh and not self._inline:
1668 if not dfh and not self._inline:
1654 # addrevision switched from inline to conventional
1669 # addrevision switched from inline to conventional
1655 # reopen the index
1670 # reopen the index
1656 ifh.close()
1671 ifh.close()
1657 dfh = self.opener(self.datafile, "a+")
1672 dfh = self.opener(self.datafile, "a+")
1658 ifh = self.opener(self.indexfile, "a+",
1673 ifh = self.opener(self.indexfile, "a+",
1659 checkambig=self._checkambig)
1674 checkambig=self._checkambig)
1660 finally:
1675 finally:
1661 if dfh:
1676 if dfh:
1662 dfh.close()
1677 dfh.close()
1663 ifh.close()
1678 ifh.close()
1664
1679
1665 return content
1680 return content
1666
1681
1667 def iscensored(self, rev):
1682 def iscensored(self, rev):
1668 """Check if a file revision is censored."""
1683 """Check if a file revision is censored."""
1669 return False
1684 return False
1670
1685
1671 def _peek_iscensored(self, baserev, delta, flush):
1686 def _peek_iscensored(self, baserev, delta, flush):
1672 """Quickly check if a delta produces a censored revision."""
1687 """Quickly check if a delta produces a censored revision."""
1673 return False
1688 return False
1674
1689
1675 def getstrippoint(self, minlink):
1690 def getstrippoint(self, minlink):
1676 """find the minimum rev that must be stripped to strip the linkrev
1691 """find the minimum rev that must be stripped to strip the linkrev
1677
1692
1678 Returns a tuple containing the minimum rev and a set of all revs that
1693 Returns a tuple containing the minimum rev and a set of all revs that
1679 have linkrevs that will be broken by this strip.
1694 have linkrevs that will be broken by this strip.
1680 """
1695 """
1681 brokenrevs = set()
1696 brokenrevs = set()
1682 strippoint = len(self)
1697 strippoint = len(self)
1683
1698
1684 heads = {}
1699 heads = {}
1685 futurelargelinkrevs = set()
1700 futurelargelinkrevs = set()
1686 for head in self.headrevs():
1701 for head in self.headrevs():
1687 headlinkrev = self.linkrev(head)
1702 headlinkrev = self.linkrev(head)
1688 heads[head] = headlinkrev
1703 heads[head] = headlinkrev
1689 if headlinkrev >= minlink:
1704 if headlinkrev >= minlink:
1690 futurelargelinkrevs.add(headlinkrev)
1705 futurelargelinkrevs.add(headlinkrev)
1691
1706
1692 # This algorithm involves walking down the rev graph, starting at the
1707 # This algorithm involves walking down the rev graph, starting at the
1693 # heads. Since the revs are topologically sorted according to linkrev,
1708 # heads. Since the revs are topologically sorted according to linkrev,
1694 # once all head linkrevs are below the minlink, we know there are
1709 # once all head linkrevs are below the minlink, we know there are
1695 # no more revs that could have a linkrev greater than minlink.
1710 # no more revs that could have a linkrev greater than minlink.
1696 # So we can stop walking.
1711 # So we can stop walking.
1697 while futurelargelinkrevs:
1712 while futurelargelinkrevs:
1698 strippoint -= 1
1713 strippoint -= 1
1699 linkrev = heads.pop(strippoint)
1714 linkrev = heads.pop(strippoint)
1700
1715
1701 if linkrev < minlink:
1716 if linkrev < minlink:
1702 brokenrevs.add(strippoint)
1717 brokenrevs.add(strippoint)
1703 else:
1718 else:
1704 futurelargelinkrevs.remove(linkrev)
1719 futurelargelinkrevs.remove(linkrev)
1705
1720
1706 for p in self.parentrevs(strippoint):
1721 for p in self.parentrevs(strippoint):
1707 if p != nullrev:
1722 if p != nullrev:
1708 plinkrev = self.linkrev(p)
1723 plinkrev = self.linkrev(p)
1709 heads[p] = plinkrev
1724 heads[p] = plinkrev
1710 if plinkrev >= minlink:
1725 if plinkrev >= minlink:
1711 futurelargelinkrevs.add(plinkrev)
1726 futurelargelinkrevs.add(plinkrev)
1712
1727
1713 return strippoint, brokenrevs
1728 return strippoint, brokenrevs
1714
1729
1715 def strip(self, minlink, transaction):
1730 def strip(self, minlink, transaction):
1716 """truncate the revlog on the first revision with a linkrev >= minlink
1731 """truncate the revlog on the first revision with a linkrev >= minlink
1717
1732
1718 This function is called when we're stripping revision minlink and
1733 This function is called when we're stripping revision minlink and
1719 its descendants from the repository.
1734 its descendants from the repository.
1720
1735
1721 We have to remove all revisions with linkrev >= minlink, because
1736 We have to remove all revisions with linkrev >= minlink, because
1722 the equivalent changelog revisions will be renumbered after the
1737 the equivalent changelog revisions will be renumbered after the
1723 strip.
1738 strip.
1724
1739
1725 So we truncate the revlog on the first of these revisions, and
1740 So we truncate the revlog on the first of these revisions, and
1726 trust that the caller has saved the revisions that shouldn't be
1741 trust that the caller has saved the revisions that shouldn't be
1727 removed and that it'll re-add them after this truncation.
1742 removed and that it'll re-add them after this truncation.
1728 """
1743 """
1729 if len(self) == 0:
1744 if len(self) == 0:
1730 return
1745 return
1731
1746
1732 rev, _ = self.getstrippoint(minlink)
1747 rev, _ = self.getstrippoint(minlink)
1733 if rev == len(self):
1748 if rev == len(self):
1734 return
1749 return
1735
1750
1736 # first truncate the files on disk
1751 # first truncate the files on disk
1737 end = self.start(rev)
1752 end = self.start(rev)
1738 if not self._inline:
1753 if not self._inline:
1739 transaction.add(self.datafile, end)
1754 transaction.add(self.datafile, end)
1740 end = rev * self._io.size
1755 end = rev * self._io.size
1741 else:
1756 else:
1742 end += rev * self._io.size
1757 end += rev * self._io.size
1743
1758
1744 transaction.add(self.indexfile, end)
1759 transaction.add(self.indexfile, end)
1745
1760
1746 # then reset internal state in memory to forget those revisions
1761 # then reset internal state in memory to forget those revisions
1747 self._cache = None
1762 self._cache = None
1748 self._chaininfocache = {}
1763 self._chaininfocache = {}
1749 self._chunkclear()
1764 self._chunkclear()
1750 for x in xrange(rev, len(self)):
1765 for x in xrange(rev, len(self)):
1751 del self.nodemap[self.node(x)]
1766 del self.nodemap[self.node(x)]
1752
1767
1753 del self.index[rev:-1]
1768 del self.index[rev:-1]
1754
1769
1755 def checksize(self):
1770 def checksize(self):
1756 expected = 0
1771 expected = 0
1757 if len(self):
1772 if len(self):
1758 expected = max(0, self.end(len(self) - 1))
1773 expected = max(0, self.end(len(self) - 1))
1759
1774
1760 try:
1775 try:
1761 f = self.opener(self.datafile)
1776 f = self.opener(self.datafile)
1762 f.seek(0, 2)
1777 f.seek(0, 2)
1763 actual = f.tell()
1778 actual = f.tell()
1764 f.close()
1779 f.close()
1765 dd = actual - expected
1780 dd = actual - expected
1766 except IOError as inst:
1781 except IOError as inst:
1767 if inst.errno != errno.ENOENT:
1782 if inst.errno != errno.ENOENT:
1768 raise
1783 raise
1769 dd = 0
1784 dd = 0
1770
1785
1771 try:
1786 try:
1772 f = self.opener(self.indexfile)
1787 f = self.opener(self.indexfile)
1773 f.seek(0, 2)
1788 f.seek(0, 2)
1774 actual = f.tell()
1789 actual = f.tell()
1775 f.close()
1790 f.close()
1776 s = self._io.size
1791 s = self._io.size
1777 i = max(0, actual // s)
1792 i = max(0, actual // s)
1778 di = actual - (i * s)
1793 di = actual - (i * s)
1779 if self._inline:
1794 if self._inline:
1780 databytes = 0
1795 databytes = 0
1781 for r in self:
1796 for r in self:
1782 databytes += max(0, self.length(r))
1797 databytes += max(0, self.length(r))
1783 dd = 0
1798 dd = 0
1784 di = actual - len(self) * s - databytes
1799 di = actual - len(self) * s - databytes
1785 except IOError as inst:
1800 except IOError as inst:
1786 if inst.errno != errno.ENOENT:
1801 if inst.errno != errno.ENOENT:
1787 raise
1802 raise
1788 di = 0
1803 di = 0
1789
1804
1790 return (dd, di)
1805 return (dd, di)
1791
1806
1792 def files(self):
1807 def files(self):
1793 res = [self.indexfile]
1808 res = [self.indexfile]
1794 if not self._inline:
1809 if not self._inline:
1795 res.append(self.datafile)
1810 res.append(self.datafile)
1796 return res
1811 return res
General Comments 0
You need to be logged in to leave comments. Login now