##// END OF EJS Templates
addrevision: only use the incoming base if it is a good delta (issue4975)...
Pierre-Yves David -
r27180:8e7db961 default
parent child Browse files
Show More
@@ -1,1738 +1,1743 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 import collections
15 import collections
16 from node import bin, hex, nullid, nullrev
16 from node import bin, hex, nullid, nullrev
17 from i18n import _
17 from i18n import _
18 import ancestor, mdiff, parsers, error, util, templatefilters
18 import ancestor, mdiff, parsers, error, util, templatefilters
19 import struct, zlib, errno
19 import struct, zlib, errno
20
20
21 _pack = struct.pack
21 _pack = struct.pack
22 _unpack = struct.unpack
22 _unpack = struct.unpack
23 _compress = zlib.compress
23 _compress = zlib.compress
24 _decompress = zlib.decompress
24 _decompress = zlib.decompress
25 _sha = util.sha1
25 _sha = util.sha1
26
26
27 # revlog header flags
27 # revlog header flags
28 REVLOGV0 = 0
28 REVLOGV0 = 0
29 REVLOGNG = 1
29 REVLOGNG = 1
30 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGGENERALDELTA = (1 << 17)
31 REVLOGGENERALDELTA = (1 << 17)
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36
36
37 # revlog index flags
37 # revlog index flags
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41
41
42 # max size of revlog with inline data
42 # max size of revlog with inline data
43 _maxinline = 131072
43 _maxinline = 131072
44 _chunksize = 1048576
44 _chunksize = 1048576
45
45
46 RevlogError = error.RevlogError
46 RevlogError = error.RevlogError
47 LookupError = error.LookupError
47 LookupError = error.LookupError
48 CensoredNodeError = error.CensoredNodeError
48 CensoredNodeError = error.CensoredNodeError
49
49
50 def getoffset(q):
50 def getoffset(q):
51 return int(q >> 16)
51 return int(q >> 16)
52
52
53 def gettype(q):
53 def gettype(q):
54 return int(q & 0xFFFF)
54 return int(q & 0xFFFF)
55
55
56 def offset_type(offset, type):
56 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
57 return long(long(offset) << 16 | type)
58
58
59 _nullhash = _sha(nullid)
59 _nullhash = _sha(nullid)
60
60
61 def hash(text, p1, p2):
61 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
62 """generate a hash from the given text and its parent hashes
63
63
64 This hash combines both the current file contents and its history
64 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
65 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
66 content in the revision graph.
67 """
67 """
68 # As of now, if one of the parent node is null, p2 is null
68 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
69 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
70 # deep copy of a hash is faster than creating one
71 s = _nullhash.copy()
71 s = _nullhash.copy()
72 s.update(p1)
72 s.update(p1)
73 else:
73 else:
74 # none of the parent nodes are nullid
74 # none of the parent nodes are nullid
75 l = [p1, p2]
75 l = [p1, p2]
76 l.sort()
76 l.sort()
77 s = _sha(l[0])
77 s = _sha(l[0])
78 s.update(l[1])
78 s.update(l[1])
79 s.update(text)
79 s.update(text)
80 return s.digest()
80 return s.digest()
81
81
82 def decompress(bin):
82 def decompress(bin):
83 """ decompress the given input """
83 """ decompress the given input """
84 if not bin:
84 if not bin:
85 return bin
85 return bin
86 t = bin[0]
86 t = bin[0]
87 if t == '\0':
87 if t == '\0':
88 return bin
88 return bin
89 if t == 'x':
89 if t == 'x':
90 try:
90 try:
91 return _decompress(bin)
91 return _decompress(bin)
92 except zlib.error as e:
92 except zlib.error as e:
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 if t == 'u':
94 if t == 'u':
95 return bin[1:]
95 return bin[1:]
96 raise RevlogError(_("unknown compression type %r") % t)
96 raise RevlogError(_("unknown compression type %r") % t)
97
97
98 # index v0:
98 # index v0:
99 # 4 bytes: offset
99 # 4 bytes: offset
100 # 4 bytes: compressed length
100 # 4 bytes: compressed length
101 # 4 bytes: base rev
101 # 4 bytes: base rev
102 # 4 bytes: link rev
102 # 4 bytes: link rev
103 # 20 bytes: parent 1 nodeid
103 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 2 nodeid
104 # 20 bytes: parent 2 nodeid
105 # 20 bytes: nodeid
105 # 20 bytes: nodeid
106 indexformatv0 = ">4l20s20s20s"
106 indexformatv0 = ">4l20s20s20s"
107
107
108 class revlogoldio(object):
108 class revlogoldio(object):
109 def __init__(self):
109 def __init__(self):
110 self.size = struct.calcsize(indexformatv0)
110 self.size = struct.calcsize(indexformatv0)
111
111
112 def parseindex(self, data, inline):
112 def parseindex(self, data, inline):
113 s = self.size
113 s = self.size
114 index = []
114 index = []
115 nodemap = {nullid: nullrev}
115 nodemap = {nullid: nullrev}
116 n = off = 0
116 n = off = 0
117 l = len(data)
117 l = len(data)
118 while off + s <= l:
118 while off + s <= l:
119 cur = data[off:off + s]
119 cur = data[off:off + s]
120 off += s
120 off += s
121 e = _unpack(indexformatv0, cur)
121 e = _unpack(indexformatv0, cur)
122 # transform to revlogv1 format
122 # transform to revlogv1 format
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 index.append(e2)
125 index.append(e2)
126 nodemap[e[6]] = n
126 nodemap[e[6]] = n
127 n += 1
127 n += 1
128
128
129 # add the magic null revision at -1
129 # add the magic null revision at -1
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131
131
132 return index, nodemap, None
132 return index, nodemap, None
133
133
134 def packentry(self, entry, node, version, rev):
134 def packentry(self, entry, node, version, rev):
135 if gettype(entry[0]):
135 if gettype(entry[0]):
136 raise RevlogError(_("index entry flags need RevlogNG"))
136 raise RevlogError(_("index entry flags need RevlogNG"))
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 node(entry[5]), node(entry[6]), entry[7])
138 node(entry[5]), node(entry[6]), entry[7])
139 return _pack(indexformatv0, *e2)
139 return _pack(indexformatv0, *e2)
140
140
141 # index ng:
141 # index ng:
142 # 6 bytes: offset
142 # 6 bytes: offset
143 # 2 bytes: flags
143 # 2 bytes: flags
144 # 4 bytes: compressed length
144 # 4 bytes: compressed length
145 # 4 bytes: uncompressed length
145 # 4 bytes: uncompressed length
146 # 4 bytes: base rev
146 # 4 bytes: base rev
147 # 4 bytes: link rev
147 # 4 bytes: link rev
148 # 4 bytes: parent 1 rev
148 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 2 rev
149 # 4 bytes: parent 2 rev
150 # 32 bytes: nodeid
150 # 32 bytes: nodeid
151 indexformatng = ">Qiiiiii20s12x"
151 indexformatng = ">Qiiiiii20s12x"
152 versionformat = ">I"
152 versionformat = ">I"
153
153
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # signed integer)
155 # signed integer)
156 _maxentrysize = 0x7fffffff
156 _maxentrysize = 0x7fffffff
157
157
158 class revlogio(object):
158 class revlogio(object):
159 def __init__(self):
159 def __init__(self):
160 self.size = struct.calcsize(indexformatng)
160 self.size = struct.calcsize(indexformatng)
161
161
162 def parseindex(self, data, inline):
162 def parseindex(self, data, inline):
163 # call the C implementation to parse the index data
163 # call the C implementation to parse the index data
164 index, cache = parsers.parse_index2(data, inline)
164 index, cache = parsers.parse_index2(data, inline)
165 return index, getattr(index, 'nodemap', None), cache
165 return index, getattr(index, 'nodemap', None), cache
166
166
167 def packentry(self, entry, node, version, rev):
167 def packentry(self, entry, node, version, rev):
168 p = _pack(indexformatng, *entry)
168 p = _pack(indexformatng, *entry)
169 if rev == 0:
169 if rev == 0:
170 p = _pack(versionformat, version) + p[4:]
170 p = _pack(versionformat, version) + p[4:]
171 return p
171 return p
172
172
173 class revlog(object):
173 class revlog(object):
174 """
174 """
175 the underlying revision storage object
175 the underlying revision storage object
176
176
177 A revlog consists of two parts, an index and the revision data.
177 A revlog consists of two parts, an index and the revision data.
178
178
179 The index is a file with a fixed record size containing
179 The index is a file with a fixed record size containing
180 information on each revision, including its nodeid (hash), the
180 information on each revision, including its nodeid (hash), the
181 nodeids of its parents, the position and offset of its data within
181 nodeids of its parents, the position and offset of its data within
182 the data file, and the revision it's based on. Finally, each entry
182 the data file, and the revision it's based on. Finally, each entry
183 contains a linkrev entry that can serve as a pointer to external
183 contains a linkrev entry that can serve as a pointer to external
184 data.
184 data.
185
185
186 The revision data itself is a linear collection of data chunks.
186 The revision data itself is a linear collection of data chunks.
187 Each chunk represents a revision and is usually represented as a
187 Each chunk represents a revision and is usually represented as a
188 delta against the previous chunk. To bound lookup time, runs of
188 delta against the previous chunk. To bound lookup time, runs of
189 deltas are limited to about 2 times the length of the original
189 deltas are limited to about 2 times the length of the original
190 version data. This makes retrieval of a version proportional to
190 version data. This makes retrieval of a version proportional to
191 its size, or O(1) relative to the number of revisions.
191 its size, or O(1) relative to the number of revisions.
192
192
193 Both pieces of the revlog are written to in an append-only
193 Both pieces of the revlog are written to in an append-only
194 fashion, which means we never need to rewrite a file to insert or
194 fashion, which means we never need to rewrite a file to insert or
195 remove data, and can use some simple techniques to avoid the need
195 remove data, and can use some simple techniques to avoid the need
196 for locking while reading.
196 for locking while reading.
197 """
197 """
198 def __init__(self, opener, indexfile):
198 def __init__(self, opener, indexfile):
199 """
199 """
200 create a revlog object
200 create a revlog object
201
201
202 opener is a function that abstracts the file opening operation
202 opener is a function that abstracts the file opening operation
203 and can be used to implement COW semantics or the like.
203 and can be used to implement COW semantics or the like.
204 """
204 """
205 self.indexfile = indexfile
205 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d"
206 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener
207 self.opener = opener
208 # 3-tuple of (node, rev, text) for a raw revision.
208 # 3-tuple of (node, rev, text) for a raw revision.
209 self._cache = None
209 self._cache = None
210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
211 # begins at for a revision.
211 # begins at for a revision.
212 self._basecache = None
212 self._basecache = None
213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
214 self._chunkcache = (0, '')
214 self._chunkcache = (0, '')
215 # How much data to read and cache into the raw revlog data cache.
215 # How much data to read and cache into the raw revlog data cache.
216 self._chunkcachesize = 65536
216 self._chunkcachesize = 65536
217 self._maxchainlen = None
217 self._maxchainlen = None
218 self._aggressivemergedeltas = False
218 self._aggressivemergedeltas = False
219 self.index = []
219 self.index = []
220 # Mapping of partial identifiers to full nodes.
220 # Mapping of partial identifiers to full nodes.
221 self._pcache = {}
221 self._pcache = {}
222 # Mapping of revision integer to full node.
222 # Mapping of revision integer to full node.
223 self._nodecache = {nullid: nullrev}
223 self._nodecache = {nullid: nullrev}
224 self._nodepos = None
224 self._nodepos = None
225
225
226 v = REVLOG_DEFAULT_VERSION
226 v = REVLOG_DEFAULT_VERSION
227 opts = getattr(opener, 'options', None)
227 opts = getattr(opener, 'options', None)
228 if opts is not None:
228 if opts is not None:
229 if 'revlogv1' in opts:
229 if 'revlogv1' in opts:
230 if 'generaldelta' in opts:
230 if 'generaldelta' in opts:
231 v |= REVLOGGENERALDELTA
231 v |= REVLOGGENERALDELTA
232 else:
232 else:
233 v = 0
233 v = 0
234 if 'chunkcachesize' in opts:
234 if 'chunkcachesize' in opts:
235 self._chunkcachesize = opts['chunkcachesize']
235 self._chunkcachesize = opts['chunkcachesize']
236 if 'maxchainlen' in opts:
236 if 'maxchainlen' in opts:
237 self._maxchainlen = opts['maxchainlen']
237 self._maxchainlen = opts['maxchainlen']
238 if 'aggressivemergedeltas' in opts:
238 if 'aggressivemergedeltas' in opts:
239 self._aggressivemergedeltas = opts['aggressivemergedeltas']
239 self._aggressivemergedeltas = opts['aggressivemergedeltas']
240 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
240 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
241
241
242 if self._chunkcachesize <= 0:
242 if self._chunkcachesize <= 0:
243 raise RevlogError(_('revlog chunk cache size %r is not greater '
243 raise RevlogError(_('revlog chunk cache size %r is not greater '
244 'than 0') % self._chunkcachesize)
244 'than 0') % self._chunkcachesize)
245 elif self._chunkcachesize & (self._chunkcachesize - 1):
245 elif self._chunkcachesize & (self._chunkcachesize - 1):
246 raise RevlogError(_('revlog chunk cache size %r is not a power '
246 raise RevlogError(_('revlog chunk cache size %r is not a power '
247 'of 2') % self._chunkcachesize)
247 'of 2') % self._chunkcachesize)
248
248
249 indexdata = ''
249 indexdata = ''
250 self._initempty = True
250 self._initempty = True
251 try:
251 try:
252 f = self.opener(self.indexfile)
252 f = self.opener(self.indexfile)
253 indexdata = f.read()
253 indexdata = f.read()
254 f.close()
254 f.close()
255 if len(indexdata) > 0:
255 if len(indexdata) > 0:
256 v = struct.unpack(versionformat, indexdata[:4])[0]
256 v = struct.unpack(versionformat, indexdata[:4])[0]
257 self._initempty = False
257 self._initempty = False
258 except IOError as inst:
258 except IOError as inst:
259 if inst.errno != errno.ENOENT:
259 if inst.errno != errno.ENOENT:
260 raise
260 raise
261
261
262 self.version = v
262 self.version = v
263 self._inline = v & REVLOGNGINLINEDATA
263 self._inline = v & REVLOGNGINLINEDATA
264 self._generaldelta = v & REVLOGGENERALDELTA
264 self._generaldelta = v & REVLOGGENERALDELTA
265 flags = v & ~0xFFFF
265 flags = v & ~0xFFFF
266 fmt = v & 0xFFFF
266 fmt = v & 0xFFFF
267 if fmt == REVLOGV0 and flags:
267 if fmt == REVLOGV0 and flags:
268 raise RevlogError(_("index %s unknown flags %#04x for format v0")
268 raise RevlogError(_("index %s unknown flags %#04x for format v0")
269 % (self.indexfile, flags >> 16))
269 % (self.indexfile, flags >> 16))
270 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
270 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
271 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
271 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
272 % (self.indexfile, flags >> 16))
272 % (self.indexfile, flags >> 16))
273 elif fmt > REVLOGNG:
273 elif fmt > REVLOGNG:
274 raise RevlogError(_("index %s unknown format %d")
274 raise RevlogError(_("index %s unknown format %d")
275 % (self.indexfile, fmt))
275 % (self.indexfile, fmt))
276
276
277 self._io = revlogio()
277 self._io = revlogio()
278 if self.version == REVLOGV0:
278 if self.version == REVLOGV0:
279 self._io = revlogoldio()
279 self._io = revlogoldio()
280 try:
280 try:
281 d = self._io.parseindex(indexdata, self._inline)
281 d = self._io.parseindex(indexdata, self._inline)
282 except (ValueError, IndexError):
282 except (ValueError, IndexError):
283 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
283 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
284 self.index, nodemap, self._chunkcache = d
284 self.index, nodemap, self._chunkcache = d
285 if nodemap is not None:
285 if nodemap is not None:
286 self.nodemap = self._nodecache = nodemap
286 self.nodemap = self._nodecache = nodemap
287 if not self._chunkcache:
287 if not self._chunkcache:
288 self._chunkclear()
288 self._chunkclear()
289 # revnum -> (chain-length, sum-delta-length)
289 # revnum -> (chain-length, sum-delta-length)
290 self._chaininfocache = {}
290 self._chaininfocache = {}
291
291
292 def tip(self):
292 def tip(self):
293 return self.node(len(self.index) - 2)
293 return self.node(len(self.index) - 2)
294 def __contains__(self, rev):
294 def __contains__(self, rev):
295 return 0 <= rev < len(self)
295 return 0 <= rev < len(self)
296 def __len__(self):
296 def __len__(self):
297 return len(self.index) - 1
297 return len(self.index) - 1
298 def __iter__(self):
298 def __iter__(self):
299 return iter(xrange(len(self)))
299 return iter(xrange(len(self)))
300 def revs(self, start=0, stop=None):
300 def revs(self, start=0, stop=None):
301 """iterate over all rev in this revlog (from start to stop)"""
301 """iterate over all rev in this revlog (from start to stop)"""
302 step = 1
302 step = 1
303 if stop is not None:
303 if stop is not None:
304 if start > stop:
304 if start > stop:
305 step = -1
305 step = -1
306 stop += step
306 stop += step
307 else:
307 else:
308 stop = len(self)
308 stop = len(self)
309 return xrange(start, stop, step)
309 return xrange(start, stop, step)
310
310
311 @util.propertycache
311 @util.propertycache
312 def nodemap(self):
312 def nodemap(self):
313 self.rev(self.node(0))
313 self.rev(self.node(0))
314 return self._nodecache
314 return self._nodecache
315
315
316 def hasnode(self, node):
316 def hasnode(self, node):
317 try:
317 try:
318 self.rev(node)
318 self.rev(node)
319 return True
319 return True
320 except KeyError:
320 except KeyError:
321 return False
321 return False
322
322
323 def clearcaches(self):
323 def clearcaches(self):
324 try:
324 try:
325 self._nodecache.clearcaches()
325 self._nodecache.clearcaches()
326 except AttributeError:
326 except AttributeError:
327 self._nodecache = {nullid: nullrev}
327 self._nodecache = {nullid: nullrev}
328 self._nodepos = None
328 self._nodepos = None
329
329
330 def rev(self, node):
330 def rev(self, node):
331 try:
331 try:
332 return self._nodecache[node]
332 return self._nodecache[node]
333 except TypeError:
333 except TypeError:
334 raise
334 raise
335 except RevlogError:
335 except RevlogError:
336 # parsers.c radix tree lookup failed
336 # parsers.c radix tree lookup failed
337 raise LookupError(node, self.indexfile, _('no node'))
337 raise LookupError(node, self.indexfile, _('no node'))
338 except KeyError:
338 except KeyError:
339 # pure python cache lookup failed
339 # pure python cache lookup failed
340 n = self._nodecache
340 n = self._nodecache
341 i = self.index
341 i = self.index
342 p = self._nodepos
342 p = self._nodepos
343 if p is None:
343 if p is None:
344 p = len(i) - 2
344 p = len(i) - 2
345 for r in xrange(p, -1, -1):
345 for r in xrange(p, -1, -1):
346 v = i[r][7]
346 v = i[r][7]
347 n[v] = r
347 n[v] = r
348 if v == node:
348 if v == node:
349 self._nodepos = r - 1
349 self._nodepos = r - 1
350 return r
350 return r
351 raise LookupError(node, self.indexfile, _('no node'))
351 raise LookupError(node, self.indexfile, _('no node'))
352
352
353 def node(self, rev):
353 def node(self, rev):
354 return self.index[rev][7]
354 return self.index[rev][7]
355 def linkrev(self, rev):
355 def linkrev(self, rev):
356 return self.index[rev][4]
356 return self.index[rev][4]
357 def parents(self, node):
357 def parents(self, node):
358 i = self.index
358 i = self.index
359 d = i[self.rev(node)]
359 d = i[self.rev(node)]
360 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
360 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
361 def parentrevs(self, rev):
361 def parentrevs(self, rev):
362 return self.index[rev][5:7]
362 return self.index[rev][5:7]
363 def start(self, rev):
363 def start(self, rev):
364 return int(self.index[rev][0] >> 16)
364 return int(self.index[rev][0] >> 16)
365 def end(self, rev):
365 def end(self, rev):
366 return self.start(rev) + self.length(rev)
366 return self.start(rev) + self.length(rev)
367 def length(self, rev):
367 def length(self, rev):
368 return self.index[rev][1]
368 return self.index[rev][1]
369 def chainbase(self, rev):
369 def chainbase(self, rev):
370 index = self.index
370 index = self.index
371 base = index[rev][3]
371 base = index[rev][3]
372 while base != rev:
372 while base != rev:
373 rev = base
373 rev = base
374 base = index[rev][3]
374 base = index[rev][3]
375 return base
375 return base
376 def chainlen(self, rev):
376 def chainlen(self, rev):
377 return self._chaininfo(rev)[0]
377 return self._chaininfo(rev)[0]
378
378
379 def _chaininfo(self, rev):
379 def _chaininfo(self, rev):
380 chaininfocache = self._chaininfocache
380 chaininfocache = self._chaininfocache
381 if rev in chaininfocache:
381 if rev in chaininfocache:
382 return chaininfocache[rev]
382 return chaininfocache[rev]
383 index = self.index
383 index = self.index
384 generaldelta = self._generaldelta
384 generaldelta = self._generaldelta
385 iterrev = rev
385 iterrev = rev
386 e = index[iterrev]
386 e = index[iterrev]
387 clen = 0
387 clen = 0
388 compresseddeltalen = 0
388 compresseddeltalen = 0
389 while iterrev != e[3]:
389 while iterrev != e[3]:
390 clen += 1
390 clen += 1
391 compresseddeltalen += e[1]
391 compresseddeltalen += e[1]
392 if generaldelta:
392 if generaldelta:
393 iterrev = e[3]
393 iterrev = e[3]
394 else:
394 else:
395 iterrev -= 1
395 iterrev -= 1
396 if iterrev in chaininfocache:
396 if iterrev in chaininfocache:
397 t = chaininfocache[iterrev]
397 t = chaininfocache[iterrev]
398 clen += t[0]
398 clen += t[0]
399 compresseddeltalen += t[1]
399 compresseddeltalen += t[1]
400 break
400 break
401 e = index[iterrev]
401 e = index[iterrev]
402 else:
402 else:
403 # Add text length of base since decompressing that also takes
403 # Add text length of base since decompressing that also takes
404 # work. For cache hits the length is already included.
404 # work. For cache hits the length is already included.
405 compresseddeltalen += e[1]
405 compresseddeltalen += e[1]
406 r = (clen, compresseddeltalen)
406 r = (clen, compresseddeltalen)
407 chaininfocache[rev] = r
407 chaininfocache[rev] = r
408 return r
408 return r
409
409
410 def flags(self, rev):
410 def flags(self, rev):
411 return self.index[rev][0] & 0xFFFF
411 return self.index[rev][0] & 0xFFFF
412 def rawsize(self, rev):
412 def rawsize(self, rev):
413 """return the length of the uncompressed text for a given revision"""
413 """return the length of the uncompressed text for a given revision"""
414 l = self.index[rev][2]
414 l = self.index[rev][2]
415 if l >= 0:
415 if l >= 0:
416 return l
416 return l
417
417
418 t = self.revision(self.node(rev))
418 t = self.revision(self.node(rev))
419 return len(t)
419 return len(t)
420 size = rawsize
420 size = rawsize
421
421
422 def ancestors(self, revs, stoprev=0, inclusive=False):
422 def ancestors(self, revs, stoprev=0, inclusive=False):
423 """Generate the ancestors of 'revs' in reverse topological order.
423 """Generate the ancestors of 'revs' in reverse topological order.
424 Does not generate revs lower than stoprev.
424 Does not generate revs lower than stoprev.
425
425
426 See the documentation for ancestor.lazyancestors for more details."""
426 See the documentation for ancestor.lazyancestors for more details."""
427
427
428 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
428 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
429 inclusive=inclusive)
429 inclusive=inclusive)
430
430
431 def descendants(self, revs):
431 def descendants(self, revs):
432 """Generate the descendants of 'revs' in revision order.
432 """Generate the descendants of 'revs' in revision order.
433
433
434 Yield a sequence of revision numbers starting with a child of
434 Yield a sequence of revision numbers starting with a child of
435 some rev in revs, i.e., each revision is *not* considered a
435 some rev in revs, i.e., each revision is *not* considered a
436 descendant of itself. Results are ordered by revision number (a
436 descendant of itself. Results are ordered by revision number (a
437 topological sort)."""
437 topological sort)."""
438 first = min(revs)
438 first = min(revs)
439 if first == nullrev:
439 if first == nullrev:
440 for i in self:
440 for i in self:
441 yield i
441 yield i
442 return
442 return
443
443
444 seen = set(revs)
444 seen = set(revs)
445 for i in self.revs(start=first + 1):
445 for i in self.revs(start=first + 1):
446 for x in self.parentrevs(i):
446 for x in self.parentrevs(i):
447 if x != nullrev and x in seen:
447 if x != nullrev and x in seen:
448 seen.add(i)
448 seen.add(i)
449 yield i
449 yield i
450 break
450 break
451
451
452 def findcommonmissing(self, common=None, heads=None):
452 def findcommonmissing(self, common=None, heads=None):
453 """Return a tuple of the ancestors of common and the ancestors of heads
453 """Return a tuple of the ancestors of common and the ancestors of heads
454 that are not ancestors of common. In revset terminology, we return the
454 that are not ancestors of common. In revset terminology, we return the
455 tuple:
455 tuple:
456
456
457 ::common, (::heads) - (::common)
457 ::common, (::heads) - (::common)
458
458
459 The list is sorted by revision number, meaning it is
459 The list is sorted by revision number, meaning it is
460 topologically sorted.
460 topologically sorted.
461
461
462 'heads' and 'common' are both lists of node IDs. If heads is
462 'heads' and 'common' are both lists of node IDs. If heads is
463 not supplied, uses all of the revlog's heads. If common is not
463 not supplied, uses all of the revlog's heads. If common is not
464 supplied, uses nullid."""
464 supplied, uses nullid."""
465 if common is None:
465 if common is None:
466 common = [nullid]
466 common = [nullid]
467 if heads is None:
467 if heads is None:
468 heads = self.heads()
468 heads = self.heads()
469
469
470 common = [self.rev(n) for n in common]
470 common = [self.rev(n) for n in common]
471 heads = [self.rev(n) for n in heads]
471 heads = [self.rev(n) for n in heads]
472
472
473 # we want the ancestors, but inclusive
473 # we want the ancestors, but inclusive
474 class lazyset(object):
474 class lazyset(object):
475 def __init__(self, lazyvalues):
475 def __init__(self, lazyvalues):
476 self.addedvalues = set()
476 self.addedvalues = set()
477 self.lazyvalues = lazyvalues
477 self.lazyvalues = lazyvalues
478
478
479 def __contains__(self, value):
479 def __contains__(self, value):
480 return value in self.addedvalues or value in self.lazyvalues
480 return value in self.addedvalues or value in self.lazyvalues
481
481
482 def __iter__(self):
482 def __iter__(self):
483 added = self.addedvalues
483 added = self.addedvalues
484 for r in added:
484 for r in added:
485 yield r
485 yield r
486 for r in self.lazyvalues:
486 for r in self.lazyvalues:
487 if not r in added:
487 if not r in added:
488 yield r
488 yield r
489
489
490 def add(self, value):
490 def add(self, value):
491 self.addedvalues.add(value)
491 self.addedvalues.add(value)
492
492
493 def update(self, values):
493 def update(self, values):
494 self.addedvalues.update(values)
494 self.addedvalues.update(values)
495
495
496 has = lazyset(self.ancestors(common))
496 has = lazyset(self.ancestors(common))
497 has.add(nullrev)
497 has.add(nullrev)
498 has.update(common)
498 has.update(common)
499
499
500 # take all ancestors from heads that aren't in has
500 # take all ancestors from heads that aren't in has
501 missing = set()
501 missing = set()
502 visit = collections.deque(r for r in heads if r not in has)
502 visit = collections.deque(r for r in heads if r not in has)
503 while visit:
503 while visit:
504 r = visit.popleft()
504 r = visit.popleft()
505 if r in missing:
505 if r in missing:
506 continue
506 continue
507 else:
507 else:
508 missing.add(r)
508 missing.add(r)
509 for p in self.parentrevs(r):
509 for p in self.parentrevs(r):
510 if p not in has:
510 if p not in has:
511 visit.append(p)
511 visit.append(p)
512 missing = list(missing)
512 missing = list(missing)
513 missing.sort()
513 missing.sort()
514 return has, [self.node(r) for r in missing]
514 return has, [self.node(r) for r in missing]
515
515
516 def incrementalmissingrevs(self, common=None):
516 def incrementalmissingrevs(self, common=None):
517 """Return an object that can be used to incrementally compute the
517 """Return an object that can be used to incrementally compute the
518 revision numbers of the ancestors of arbitrary sets that are not
518 revision numbers of the ancestors of arbitrary sets that are not
519 ancestors of common. This is an ancestor.incrementalmissingancestors
519 ancestors of common. This is an ancestor.incrementalmissingancestors
520 object.
520 object.
521
521
522 'common' is a list of revision numbers. If common is not supplied, uses
522 'common' is a list of revision numbers. If common is not supplied, uses
523 nullrev.
523 nullrev.
524 """
524 """
525 if common is None:
525 if common is None:
526 common = [nullrev]
526 common = [nullrev]
527
527
528 return ancestor.incrementalmissingancestors(self.parentrevs, common)
528 return ancestor.incrementalmissingancestors(self.parentrevs, common)
529
529
530 def findmissingrevs(self, common=None, heads=None):
530 def findmissingrevs(self, common=None, heads=None):
531 """Return the revision numbers of the ancestors of heads that
531 """Return the revision numbers of the ancestors of heads that
532 are not ancestors of common.
532 are not ancestors of common.
533
533
534 More specifically, return a list of revision numbers corresponding to
534 More specifically, return a list of revision numbers corresponding to
535 nodes N such that every N satisfies the following constraints:
535 nodes N such that every N satisfies the following constraints:
536
536
537 1. N is an ancestor of some node in 'heads'
537 1. N is an ancestor of some node in 'heads'
538 2. N is not an ancestor of any node in 'common'
538 2. N is not an ancestor of any node in 'common'
539
539
540 The list is sorted by revision number, meaning it is
540 The list is sorted by revision number, meaning it is
541 topologically sorted.
541 topologically sorted.
542
542
543 'heads' and 'common' are both lists of revision numbers. If heads is
543 'heads' and 'common' are both lists of revision numbers. If heads is
544 not supplied, uses all of the revlog's heads. If common is not
544 not supplied, uses all of the revlog's heads. If common is not
545 supplied, uses nullid."""
545 supplied, uses nullid."""
546 if common is None:
546 if common is None:
547 common = [nullrev]
547 common = [nullrev]
548 if heads is None:
548 if heads is None:
549 heads = self.headrevs()
549 heads = self.headrevs()
550
550
551 inc = self.incrementalmissingrevs(common=common)
551 inc = self.incrementalmissingrevs(common=common)
552 return inc.missingancestors(heads)
552 return inc.missingancestors(heads)
553
553
554 def findmissing(self, common=None, heads=None):
554 def findmissing(self, common=None, heads=None):
555 """Return the ancestors of heads that are not ancestors of common.
555 """Return the ancestors of heads that are not ancestors of common.
556
556
557 More specifically, return a list of nodes N such that every N
557 More specifically, return a list of nodes N such that every N
558 satisfies the following constraints:
558 satisfies the following constraints:
559
559
560 1. N is an ancestor of some node in 'heads'
560 1. N is an ancestor of some node in 'heads'
561 2. N is not an ancestor of any node in 'common'
561 2. N is not an ancestor of any node in 'common'
562
562
563 The list is sorted by revision number, meaning it is
563 The list is sorted by revision number, meaning it is
564 topologically sorted.
564 topologically sorted.
565
565
566 'heads' and 'common' are both lists of node IDs. If heads is
566 'heads' and 'common' are both lists of node IDs. If heads is
567 not supplied, uses all of the revlog's heads. If common is not
567 not supplied, uses all of the revlog's heads. If common is not
568 supplied, uses nullid."""
568 supplied, uses nullid."""
569 if common is None:
569 if common is None:
570 common = [nullid]
570 common = [nullid]
571 if heads is None:
571 if heads is None:
572 heads = self.heads()
572 heads = self.heads()
573
573
574 common = [self.rev(n) for n in common]
574 common = [self.rev(n) for n in common]
575 heads = [self.rev(n) for n in heads]
575 heads = [self.rev(n) for n in heads]
576
576
577 inc = self.incrementalmissingrevs(common=common)
577 inc = self.incrementalmissingrevs(common=common)
578 return [self.node(r) for r in inc.missingancestors(heads)]
578 return [self.node(r) for r in inc.missingancestors(heads)]
579
579
580 def nodesbetween(self, roots=None, heads=None):
580 def nodesbetween(self, roots=None, heads=None):
581 """Return a topological path from 'roots' to 'heads'.
581 """Return a topological path from 'roots' to 'heads'.
582
582
583 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
583 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
584 topologically sorted list of all nodes N that satisfy both of
584 topologically sorted list of all nodes N that satisfy both of
585 these constraints:
585 these constraints:
586
586
587 1. N is a descendant of some node in 'roots'
587 1. N is a descendant of some node in 'roots'
588 2. N is an ancestor of some node in 'heads'
588 2. N is an ancestor of some node in 'heads'
589
589
590 Every node is considered to be both a descendant and an ancestor
590 Every node is considered to be both a descendant and an ancestor
591 of itself, so every reachable node in 'roots' and 'heads' will be
591 of itself, so every reachable node in 'roots' and 'heads' will be
592 included in 'nodes'.
592 included in 'nodes'.
593
593
594 'outroots' is the list of reachable nodes in 'roots', i.e., the
594 'outroots' is the list of reachable nodes in 'roots', i.e., the
595 subset of 'roots' that is returned in 'nodes'. Likewise,
595 subset of 'roots' that is returned in 'nodes'. Likewise,
596 'outheads' is the subset of 'heads' that is also in 'nodes'.
596 'outheads' is the subset of 'heads' that is also in 'nodes'.
597
597
598 'roots' and 'heads' are both lists of node IDs. If 'roots' is
598 'roots' and 'heads' are both lists of node IDs. If 'roots' is
599 unspecified, uses nullid as the only root. If 'heads' is
599 unspecified, uses nullid as the only root. If 'heads' is
600 unspecified, uses list of all of the revlog's heads."""
600 unspecified, uses list of all of the revlog's heads."""
601 nonodes = ([], [], [])
601 nonodes = ([], [], [])
602 if roots is not None:
602 if roots is not None:
603 roots = list(roots)
603 roots = list(roots)
604 if not roots:
604 if not roots:
605 return nonodes
605 return nonodes
606 lowestrev = min([self.rev(n) for n in roots])
606 lowestrev = min([self.rev(n) for n in roots])
607 else:
607 else:
608 roots = [nullid] # Everybody's a descendant of nullid
608 roots = [nullid] # Everybody's a descendant of nullid
609 lowestrev = nullrev
609 lowestrev = nullrev
610 if (lowestrev == nullrev) and (heads is None):
610 if (lowestrev == nullrev) and (heads is None):
611 # We want _all_ the nodes!
611 # We want _all_ the nodes!
612 return ([self.node(r) for r in self], [nullid], list(self.heads()))
612 return ([self.node(r) for r in self], [nullid], list(self.heads()))
613 if heads is None:
613 if heads is None:
614 # All nodes are ancestors, so the latest ancestor is the last
614 # All nodes are ancestors, so the latest ancestor is the last
615 # node.
615 # node.
616 highestrev = len(self) - 1
616 highestrev = len(self) - 1
617 # Set ancestors to None to signal that every node is an ancestor.
617 # Set ancestors to None to signal that every node is an ancestor.
618 ancestors = None
618 ancestors = None
619 # Set heads to an empty dictionary for later discovery of heads
619 # Set heads to an empty dictionary for later discovery of heads
620 heads = {}
620 heads = {}
621 else:
621 else:
622 heads = list(heads)
622 heads = list(heads)
623 if not heads:
623 if not heads:
624 return nonodes
624 return nonodes
625 ancestors = set()
625 ancestors = set()
626 # Turn heads into a dictionary so we can remove 'fake' heads.
626 # Turn heads into a dictionary so we can remove 'fake' heads.
627 # Also, later we will be using it to filter out the heads we can't
627 # Also, later we will be using it to filter out the heads we can't
628 # find from roots.
628 # find from roots.
629 heads = dict.fromkeys(heads, False)
629 heads = dict.fromkeys(heads, False)
630 # Start at the top and keep marking parents until we're done.
630 # Start at the top and keep marking parents until we're done.
631 nodestotag = set(heads)
631 nodestotag = set(heads)
632 # Remember where the top was so we can use it as a limit later.
632 # Remember where the top was so we can use it as a limit later.
633 highestrev = max([self.rev(n) for n in nodestotag])
633 highestrev = max([self.rev(n) for n in nodestotag])
634 while nodestotag:
634 while nodestotag:
635 # grab a node to tag
635 # grab a node to tag
636 n = nodestotag.pop()
636 n = nodestotag.pop()
637 # Never tag nullid
637 # Never tag nullid
638 if n == nullid:
638 if n == nullid:
639 continue
639 continue
640 # A node's revision number represents its place in a
640 # A node's revision number represents its place in a
641 # topologically sorted list of nodes.
641 # topologically sorted list of nodes.
642 r = self.rev(n)
642 r = self.rev(n)
643 if r >= lowestrev:
643 if r >= lowestrev:
644 if n not in ancestors:
644 if n not in ancestors:
645 # If we are possibly a descendant of one of the roots
645 # If we are possibly a descendant of one of the roots
646 # and we haven't already been marked as an ancestor
646 # and we haven't already been marked as an ancestor
647 ancestors.add(n) # Mark as ancestor
647 ancestors.add(n) # Mark as ancestor
648 # Add non-nullid parents to list of nodes to tag.
648 # Add non-nullid parents to list of nodes to tag.
649 nodestotag.update([p for p in self.parents(n) if
649 nodestotag.update([p for p in self.parents(n) if
650 p != nullid])
650 p != nullid])
651 elif n in heads: # We've seen it before, is it a fake head?
651 elif n in heads: # We've seen it before, is it a fake head?
652 # So it is, real heads should not be the ancestors of
652 # So it is, real heads should not be the ancestors of
653 # any other heads.
653 # any other heads.
654 heads.pop(n)
654 heads.pop(n)
655 if not ancestors:
655 if not ancestors:
656 return nonodes
656 return nonodes
657 # Now that we have our set of ancestors, we want to remove any
657 # Now that we have our set of ancestors, we want to remove any
658 # roots that are not ancestors.
658 # roots that are not ancestors.
659
659
660 # If one of the roots was nullid, everything is included anyway.
660 # If one of the roots was nullid, everything is included anyway.
661 if lowestrev > nullrev:
661 if lowestrev > nullrev:
662 # But, since we weren't, let's recompute the lowest rev to not
662 # But, since we weren't, let's recompute the lowest rev to not
663 # include roots that aren't ancestors.
663 # include roots that aren't ancestors.
664
664
665 # Filter out roots that aren't ancestors of heads
665 # Filter out roots that aren't ancestors of heads
666 roots = [n for n in roots if n in ancestors]
666 roots = [n for n in roots if n in ancestors]
667 # Recompute the lowest revision
667 # Recompute the lowest revision
668 if roots:
668 if roots:
669 lowestrev = min([self.rev(n) for n in roots])
669 lowestrev = min([self.rev(n) for n in roots])
670 else:
670 else:
671 # No more roots? Return empty list
671 # No more roots? Return empty list
672 return nonodes
672 return nonodes
673 else:
673 else:
674 # We are descending from nullid, and don't need to care about
674 # We are descending from nullid, and don't need to care about
675 # any other roots.
675 # any other roots.
676 lowestrev = nullrev
676 lowestrev = nullrev
677 roots = [nullid]
677 roots = [nullid]
678 # Transform our roots list into a set.
678 # Transform our roots list into a set.
679 descendants = set(roots)
679 descendants = set(roots)
680 # Also, keep the original roots so we can filter out roots that aren't
680 # Also, keep the original roots so we can filter out roots that aren't
681 # 'real' roots (i.e. are descended from other roots).
681 # 'real' roots (i.e. are descended from other roots).
682 roots = descendants.copy()
682 roots = descendants.copy()
683 # Our topologically sorted list of output nodes.
683 # Our topologically sorted list of output nodes.
684 orderedout = []
684 orderedout = []
685 # Don't start at nullid since we don't want nullid in our output list,
685 # Don't start at nullid since we don't want nullid in our output list,
686 # and if nullid shows up in descendants, empty parents will look like
686 # and if nullid shows up in descendants, empty parents will look like
687 # they're descendants.
687 # they're descendants.
688 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
688 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
689 n = self.node(r)
689 n = self.node(r)
690 isdescendant = False
690 isdescendant = False
691 if lowestrev == nullrev: # Everybody is a descendant of nullid
691 if lowestrev == nullrev: # Everybody is a descendant of nullid
692 isdescendant = True
692 isdescendant = True
693 elif n in descendants:
693 elif n in descendants:
694 # n is already a descendant
694 # n is already a descendant
695 isdescendant = True
695 isdescendant = True
696 # This check only needs to be done here because all the roots
696 # This check only needs to be done here because all the roots
697 # will start being marked is descendants before the loop.
697 # will start being marked is descendants before the loop.
698 if n in roots:
698 if n in roots:
699 # If n was a root, check if it's a 'real' root.
699 # If n was a root, check if it's a 'real' root.
700 p = tuple(self.parents(n))
700 p = tuple(self.parents(n))
701 # If any of its parents are descendants, it's not a root.
701 # If any of its parents are descendants, it's not a root.
702 if (p[0] in descendants) or (p[1] in descendants):
702 if (p[0] in descendants) or (p[1] in descendants):
703 roots.remove(n)
703 roots.remove(n)
704 else:
704 else:
705 p = tuple(self.parents(n))
705 p = tuple(self.parents(n))
706 # A node is a descendant if either of its parents are
706 # A node is a descendant if either of its parents are
707 # descendants. (We seeded the dependents list with the roots
707 # descendants. (We seeded the dependents list with the roots
708 # up there, remember?)
708 # up there, remember?)
709 if (p[0] in descendants) or (p[1] in descendants):
709 if (p[0] in descendants) or (p[1] in descendants):
710 descendants.add(n)
710 descendants.add(n)
711 isdescendant = True
711 isdescendant = True
712 if isdescendant and ((ancestors is None) or (n in ancestors)):
712 if isdescendant and ((ancestors is None) or (n in ancestors)):
713 # Only include nodes that are both descendants and ancestors.
713 # Only include nodes that are both descendants and ancestors.
714 orderedout.append(n)
714 orderedout.append(n)
715 if (ancestors is not None) and (n in heads):
715 if (ancestors is not None) and (n in heads):
716 # We're trying to figure out which heads are reachable
716 # We're trying to figure out which heads are reachable
717 # from roots.
717 # from roots.
718 # Mark this head as having been reached
718 # Mark this head as having been reached
719 heads[n] = True
719 heads[n] = True
720 elif ancestors is None:
720 elif ancestors is None:
721 # Otherwise, we're trying to discover the heads.
721 # Otherwise, we're trying to discover the heads.
722 # Assume this is a head because if it isn't, the next step
722 # Assume this is a head because if it isn't, the next step
723 # will eventually remove it.
723 # will eventually remove it.
724 heads[n] = True
724 heads[n] = True
725 # But, obviously its parents aren't.
725 # But, obviously its parents aren't.
726 for p in self.parents(n):
726 for p in self.parents(n):
727 heads.pop(p, None)
727 heads.pop(p, None)
728 heads = [n for n, flag in heads.iteritems() if flag]
728 heads = [n for n, flag in heads.iteritems() if flag]
729 roots = list(roots)
729 roots = list(roots)
730 assert orderedout
730 assert orderedout
731 assert roots
731 assert roots
732 assert heads
732 assert heads
733 return (orderedout, roots, heads)
733 return (orderedout, roots, heads)
734
734
735 def headrevs(self):
735 def headrevs(self):
736 try:
736 try:
737 return self.index.headrevs()
737 return self.index.headrevs()
738 except AttributeError:
738 except AttributeError:
739 return self._headrevs()
739 return self._headrevs()
740
740
741 def computephases(self, roots):
741 def computephases(self, roots):
742 return self.index.computephasesmapsets(roots)
742 return self.index.computephasesmapsets(roots)
743
743
744 def _headrevs(self):
744 def _headrevs(self):
745 count = len(self)
745 count = len(self)
746 if not count:
746 if not count:
747 return [nullrev]
747 return [nullrev]
748 # we won't iter over filtered rev so nobody is a head at start
748 # we won't iter over filtered rev so nobody is a head at start
749 ishead = [0] * (count + 1)
749 ishead = [0] * (count + 1)
750 index = self.index
750 index = self.index
751 for r in self:
751 for r in self:
752 ishead[r] = 1 # I may be an head
752 ishead[r] = 1 # I may be an head
753 e = index[r]
753 e = index[r]
754 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
754 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
755 return [r for r, val in enumerate(ishead) if val]
755 return [r for r, val in enumerate(ishead) if val]
756
756
757 def heads(self, start=None, stop=None):
757 def heads(self, start=None, stop=None):
758 """return the list of all nodes that have no children
758 """return the list of all nodes that have no children
759
759
760 if start is specified, only heads that are descendants of
760 if start is specified, only heads that are descendants of
761 start will be returned
761 start will be returned
762 if stop is specified, it will consider all the revs from stop
762 if stop is specified, it will consider all the revs from stop
763 as if they had no children
763 as if they had no children
764 """
764 """
765 if start is None and stop is None:
765 if start is None and stop is None:
766 if not len(self):
766 if not len(self):
767 return [nullid]
767 return [nullid]
768 return [self.node(r) for r in self.headrevs()]
768 return [self.node(r) for r in self.headrevs()]
769
769
770 if start is None:
770 if start is None:
771 start = nullid
771 start = nullid
772 if stop is None:
772 if stop is None:
773 stop = []
773 stop = []
774 stoprevs = set([self.rev(n) for n in stop])
774 stoprevs = set([self.rev(n) for n in stop])
775 startrev = self.rev(start)
775 startrev = self.rev(start)
776 reachable = set((startrev,))
776 reachable = set((startrev,))
777 heads = set((startrev,))
777 heads = set((startrev,))
778
778
779 parentrevs = self.parentrevs
779 parentrevs = self.parentrevs
780 for r in self.revs(start=startrev + 1):
780 for r in self.revs(start=startrev + 1):
781 for p in parentrevs(r):
781 for p in parentrevs(r):
782 if p in reachable:
782 if p in reachable:
783 if r not in stoprevs:
783 if r not in stoprevs:
784 reachable.add(r)
784 reachable.add(r)
785 heads.add(r)
785 heads.add(r)
786 if p in heads and p not in stoprevs:
786 if p in heads and p not in stoprevs:
787 heads.remove(p)
787 heads.remove(p)
788
788
789 return [self.node(r) for r in heads]
789 return [self.node(r) for r in heads]
790
790
791 def children(self, node):
791 def children(self, node):
792 """find the children of a given node"""
792 """find the children of a given node"""
793 c = []
793 c = []
794 p = self.rev(node)
794 p = self.rev(node)
795 for r in self.revs(start=p + 1):
795 for r in self.revs(start=p + 1):
796 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
796 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
797 if prevs:
797 if prevs:
798 for pr in prevs:
798 for pr in prevs:
799 if pr == p:
799 if pr == p:
800 c.append(self.node(r))
800 c.append(self.node(r))
801 elif p == nullrev:
801 elif p == nullrev:
802 c.append(self.node(r))
802 c.append(self.node(r))
803 return c
803 return c
804
804
805 def descendant(self, start, end):
805 def descendant(self, start, end):
806 if start == nullrev:
806 if start == nullrev:
807 return True
807 return True
808 for i in self.descendants([start]):
808 for i in self.descendants([start]):
809 if i == end:
809 if i == end:
810 return True
810 return True
811 elif i > end:
811 elif i > end:
812 break
812 break
813 return False
813 return False
814
814
815 def commonancestorsheads(self, a, b):
815 def commonancestorsheads(self, a, b):
816 """calculate all the heads of the common ancestors of nodes a and b"""
816 """calculate all the heads of the common ancestors of nodes a and b"""
817 a, b = self.rev(a), self.rev(b)
817 a, b = self.rev(a), self.rev(b)
818 try:
818 try:
819 ancs = self.index.commonancestorsheads(a, b)
819 ancs = self.index.commonancestorsheads(a, b)
820 except (AttributeError, OverflowError): # C implementation failed
820 except (AttributeError, OverflowError): # C implementation failed
821 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
821 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
822 return map(self.node, ancs)
822 return map(self.node, ancs)
823
823
824 def isancestor(self, a, b):
824 def isancestor(self, a, b):
825 """return True if node a is an ancestor of node b
825 """return True if node a is an ancestor of node b
826
826
827 The implementation of this is trivial but the use of
827 The implementation of this is trivial but the use of
828 commonancestorsheads is not."""
828 commonancestorsheads is not."""
829 return a in self.commonancestorsheads(a, b)
829 return a in self.commonancestorsheads(a, b)
830
830
831 def ancestor(self, a, b):
831 def ancestor(self, a, b):
832 """calculate the "best" common ancestor of nodes a and b"""
832 """calculate the "best" common ancestor of nodes a and b"""
833
833
834 a, b = self.rev(a), self.rev(b)
834 a, b = self.rev(a), self.rev(b)
835 try:
835 try:
836 ancs = self.index.ancestors(a, b)
836 ancs = self.index.ancestors(a, b)
837 except (AttributeError, OverflowError):
837 except (AttributeError, OverflowError):
838 ancs = ancestor.ancestors(self.parentrevs, a, b)
838 ancs = ancestor.ancestors(self.parentrevs, a, b)
839 if ancs:
839 if ancs:
840 # choose a consistent winner when there's a tie
840 # choose a consistent winner when there's a tie
841 return min(map(self.node, ancs))
841 return min(map(self.node, ancs))
842 return nullid
842 return nullid
843
843
844 def _match(self, id):
844 def _match(self, id):
845 if isinstance(id, int):
845 if isinstance(id, int):
846 # rev
846 # rev
847 return self.node(id)
847 return self.node(id)
848 if len(id) == 20:
848 if len(id) == 20:
849 # possibly a binary node
849 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
851 try:
852 node = id
852 node = id
853 self.rev(node) # quick search the index
853 self.rev(node) # quick search the index
854 return node
854 return node
855 except LookupError:
855 except LookupError:
856 pass # may be partial hex id
856 pass # may be partial hex id
857 try:
857 try:
858 # str(rev)
858 # str(rev)
859 rev = int(id)
859 rev = int(id)
860 if str(rev) != id:
860 if str(rev) != id:
861 raise ValueError
861 raise ValueError
862 if rev < 0:
862 if rev < 0:
863 rev = len(self) + rev
863 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
864 if rev < 0 or rev >= len(self):
865 raise ValueError
865 raise ValueError
866 return self.node(rev)
866 return self.node(rev)
867 except (ValueError, OverflowError):
867 except (ValueError, OverflowError):
868 pass
868 pass
869 if len(id) == 40:
869 if len(id) == 40:
870 try:
870 try:
871 # a full hex nodeid?
871 # a full hex nodeid?
872 node = bin(id)
872 node = bin(id)
873 self.rev(node)
873 self.rev(node)
874 return node
874 return node
875 except (TypeError, LookupError):
875 except (TypeError, LookupError):
876 pass
876 pass
877
877
878 def _partialmatch(self, id):
878 def _partialmatch(self, id):
879 try:
879 try:
880 n = self.index.partialmatch(id)
880 n = self.index.partialmatch(id)
881 if n and self.hasnode(n):
881 if n and self.hasnode(n):
882 return n
882 return n
883 return None
883 return None
884 except RevlogError:
884 except RevlogError:
885 # parsers.c radix tree lookup gave multiple matches
885 # parsers.c radix tree lookup gave multiple matches
886 # fall through to slow path that filters hidden revisions
886 # fall through to slow path that filters hidden revisions
887 pass
887 pass
888 except (AttributeError, ValueError):
888 except (AttributeError, ValueError):
889 # we are pure python, or key was too short to search radix tree
889 # we are pure python, or key was too short to search radix tree
890 pass
890 pass
891
891
892 if id in self._pcache:
892 if id in self._pcache:
893 return self._pcache[id]
893 return self._pcache[id]
894
894
895 if len(id) < 40:
895 if len(id) < 40:
896 try:
896 try:
897 # hex(node)[:...]
897 # hex(node)[:...]
898 l = len(id) // 2 # grab an even number of digits
898 l = len(id) // 2 # grab an even number of digits
899 prefix = bin(id[:l * 2])
899 prefix = bin(id[:l * 2])
900 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
900 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
901 nl = [n for n in nl if hex(n).startswith(id) and
901 nl = [n for n in nl if hex(n).startswith(id) and
902 self.hasnode(n)]
902 self.hasnode(n)]
903 if len(nl) > 0:
903 if len(nl) > 0:
904 if len(nl) == 1:
904 if len(nl) == 1:
905 self._pcache[id] = nl[0]
905 self._pcache[id] = nl[0]
906 return nl[0]
906 return nl[0]
907 raise LookupError(id, self.indexfile,
907 raise LookupError(id, self.indexfile,
908 _('ambiguous identifier'))
908 _('ambiguous identifier'))
909 return None
909 return None
910 except TypeError:
910 except TypeError:
911 pass
911 pass
912
912
913 def lookup(self, id):
913 def lookup(self, id):
914 """locate a node based on:
914 """locate a node based on:
915 - revision number or str(revision number)
915 - revision number or str(revision number)
916 - nodeid or subset of hex nodeid
916 - nodeid or subset of hex nodeid
917 """
917 """
918 n = self._match(id)
918 n = self._match(id)
919 if n is not None:
919 if n is not None:
920 return n
920 return n
921 n = self._partialmatch(id)
921 n = self._partialmatch(id)
922 if n:
922 if n:
923 return n
923 return n
924
924
925 raise LookupError(id, self.indexfile, _('no match found'))
925 raise LookupError(id, self.indexfile, _('no match found'))
926
926
927 def cmp(self, node, text):
927 def cmp(self, node, text):
928 """compare text with a given file revision
928 """compare text with a given file revision
929
929
930 returns True if text is different than what is stored.
930 returns True if text is different than what is stored.
931 """
931 """
932 p1, p2 = self.parents(node)
932 p1, p2 = self.parents(node)
933 return hash(text, p1, p2) != node
933 return hash(text, p1, p2) != node
934
934
935 def _addchunk(self, offset, data):
935 def _addchunk(self, offset, data):
936 """Add a segment to the revlog cache.
936 """Add a segment to the revlog cache.
937
937
938 Accepts an absolute offset and the data that is at that location.
938 Accepts an absolute offset and the data that is at that location.
939 """
939 """
940 o, d = self._chunkcache
940 o, d = self._chunkcache
941 # try to add to existing cache
941 # try to add to existing cache
942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
943 self._chunkcache = o, d + data
943 self._chunkcache = o, d + data
944 else:
944 else:
945 self._chunkcache = offset, data
945 self._chunkcache = offset, data
946
946
947 def _loadchunk(self, offset, length, df=None):
947 def _loadchunk(self, offset, length, df=None):
948 """Load a segment of raw data from the revlog.
948 """Load a segment of raw data from the revlog.
949
949
950 Accepts an absolute offset, length to read, and an optional existing
950 Accepts an absolute offset, length to read, and an optional existing
951 file handle to read from.
951 file handle to read from.
952
952
953 If an existing file handle is passed, it will be seeked and the
953 If an existing file handle is passed, it will be seeked and the
954 original seek position will NOT be restored.
954 original seek position will NOT be restored.
955
955
956 Returns a str or buffer of raw byte data.
956 Returns a str or buffer of raw byte data.
957 """
957 """
958 if df is not None:
958 if df is not None:
959 closehandle = False
959 closehandle = False
960 else:
960 else:
961 if self._inline:
961 if self._inline:
962 df = self.opener(self.indexfile)
962 df = self.opener(self.indexfile)
963 else:
963 else:
964 df = self.opener(self.datafile)
964 df = self.opener(self.datafile)
965 closehandle = True
965 closehandle = True
966
966
967 # Cache data both forward and backward around the requested
967 # Cache data both forward and backward around the requested
968 # data, in a fixed size window. This helps speed up operations
968 # data, in a fixed size window. This helps speed up operations
969 # involving reading the revlog backwards.
969 # involving reading the revlog backwards.
970 cachesize = self._chunkcachesize
970 cachesize = self._chunkcachesize
971 realoffset = offset & ~(cachesize - 1)
971 realoffset = offset & ~(cachesize - 1)
972 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
972 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
973 - realoffset)
973 - realoffset)
974 df.seek(realoffset)
974 df.seek(realoffset)
975 d = df.read(reallength)
975 d = df.read(reallength)
976 if closehandle:
976 if closehandle:
977 df.close()
977 df.close()
978 self._addchunk(realoffset, d)
978 self._addchunk(realoffset, d)
979 if offset != realoffset or reallength != length:
979 if offset != realoffset or reallength != length:
980 return util.buffer(d, offset - realoffset, length)
980 return util.buffer(d, offset - realoffset, length)
981 return d
981 return d
982
982
983 def _getchunk(self, offset, length, df=None):
983 def _getchunk(self, offset, length, df=None):
984 """Obtain a segment of raw data from the revlog.
984 """Obtain a segment of raw data from the revlog.
985
985
986 Accepts an absolute offset, length of bytes to obtain, and an
986 Accepts an absolute offset, length of bytes to obtain, and an
987 optional file handle to the already-opened revlog. If the file
987 optional file handle to the already-opened revlog. If the file
988 handle is used, it's original seek position will not be preserved.
988 handle is used, it's original seek position will not be preserved.
989
989
990 Requests for data may be returned from a cache.
990 Requests for data may be returned from a cache.
991
991
992 Returns a str or a buffer instance of raw byte data.
992 Returns a str or a buffer instance of raw byte data.
993 """
993 """
994 o, d = self._chunkcache
994 o, d = self._chunkcache
995 l = len(d)
995 l = len(d)
996
996
997 # is it in the cache?
997 # is it in the cache?
998 cachestart = offset - o
998 cachestart = offset - o
999 cacheend = cachestart + length
999 cacheend = cachestart + length
1000 if cachestart >= 0 and cacheend <= l:
1000 if cachestart >= 0 and cacheend <= l:
1001 if cachestart == 0 and cacheend == l:
1001 if cachestart == 0 and cacheend == l:
1002 return d # avoid a copy
1002 return d # avoid a copy
1003 return util.buffer(d, cachestart, cacheend - cachestart)
1003 return util.buffer(d, cachestart, cacheend - cachestart)
1004
1004
1005 return self._loadchunk(offset, length, df=df)
1005 return self._loadchunk(offset, length, df=df)
1006
1006
1007 def _chunkraw(self, startrev, endrev, df=None):
1007 def _chunkraw(self, startrev, endrev, df=None):
1008 """Obtain a segment of raw data corresponding to a range of revisions.
1008 """Obtain a segment of raw data corresponding to a range of revisions.
1009
1009
1010 Accepts the start and end revisions and an optional already-open
1010 Accepts the start and end revisions and an optional already-open
1011 file handle to be used for reading. If the file handle is read, its
1011 file handle to be used for reading. If the file handle is read, its
1012 seek position will not be preserved.
1012 seek position will not be preserved.
1013
1013
1014 Requests for data may be satisfied by a cache.
1014 Requests for data may be satisfied by a cache.
1015
1015
1016 Returns a str or a buffer instance of raw byte data. Callers will
1016 Returns a str or a buffer instance of raw byte data. Callers will
1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1018 where each revision's data begins and ends.
1018 where each revision's data begins and ends.
1019 """
1019 """
1020 start = self.start(startrev)
1020 start = self.start(startrev)
1021 end = self.end(endrev)
1021 end = self.end(endrev)
1022 if self._inline:
1022 if self._inline:
1023 start += (startrev + 1) * self._io.size
1023 start += (startrev + 1) * self._io.size
1024 end += (endrev + 1) * self._io.size
1024 end += (endrev + 1) * self._io.size
1025 length = end - start
1025 length = end - start
1026 return self._getchunk(start, length, df=df)
1026 return self._getchunk(start, length, df=df)
1027
1027
1028 def _chunk(self, rev, df=None):
1028 def _chunk(self, rev, df=None):
1029 """Obtain a single decompressed chunk for a revision.
1029 """Obtain a single decompressed chunk for a revision.
1030
1030
1031 Accepts an integer revision and an optional already-open file handle
1031 Accepts an integer revision and an optional already-open file handle
1032 to be used for reading. If used, the seek position of the file will not
1032 to be used for reading. If used, the seek position of the file will not
1033 be preserved.
1033 be preserved.
1034
1034
1035 Returns a str holding uncompressed data for the requested revision.
1035 Returns a str holding uncompressed data for the requested revision.
1036 """
1036 """
1037 return decompress(self._chunkraw(rev, rev, df=df))
1037 return decompress(self._chunkraw(rev, rev, df=df))
1038
1038
1039 def _chunks(self, revs, df=None):
1039 def _chunks(self, revs, df=None):
1040 """Obtain decompressed chunks for the specified revisions.
1040 """Obtain decompressed chunks for the specified revisions.
1041
1041
1042 Accepts an iterable of numeric revisions that are assumed to be in
1042 Accepts an iterable of numeric revisions that are assumed to be in
1043 ascending order. Also accepts an optional already-open file handle
1043 ascending order. Also accepts an optional already-open file handle
1044 to be used for reading. If used, the seek position of the file will
1044 to be used for reading. If used, the seek position of the file will
1045 not be preserved.
1045 not be preserved.
1046
1046
1047 This function is similar to calling ``self._chunk()`` multiple times,
1047 This function is similar to calling ``self._chunk()`` multiple times,
1048 but is faster.
1048 but is faster.
1049
1049
1050 Returns a list with decompressed data for each requested revision.
1050 Returns a list with decompressed data for each requested revision.
1051 """
1051 """
1052 if not revs:
1052 if not revs:
1053 return []
1053 return []
1054 start = self.start
1054 start = self.start
1055 length = self.length
1055 length = self.length
1056 inline = self._inline
1056 inline = self._inline
1057 iosize = self._io.size
1057 iosize = self._io.size
1058 buffer = util.buffer
1058 buffer = util.buffer
1059
1059
1060 l = []
1060 l = []
1061 ladd = l.append
1061 ladd = l.append
1062
1062
1063 # preload the cache
1063 # preload the cache
1064 try:
1064 try:
1065 while True:
1065 while True:
1066 # ensure that the cache doesn't change out from under us
1066 # ensure that the cache doesn't change out from under us
1067 _cache = self._chunkcache
1067 _cache = self._chunkcache
1068 self._chunkraw(revs[0], revs[-1], df=df)
1068 self._chunkraw(revs[0], revs[-1], df=df)
1069 if _cache == self._chunkcache:
1069 if _cache == self._chunkcache:
1070 break
1070 break
1071 offset, data = _cache
1071 offset, data = _cache
1072 except OverflowError:
1072 except OverflowError:
1073 # issue4215 - we can't cache a run of chunks greater than
1073 # issue4215 - we can't cache a run of chunks greater than
1074 # 2G on Windows
1074 # 2G on Windows
1075 return [self._chunk(rev, df=df) for rev in revs]
1075 return [self._chunk(rev, df=df) for rev in revs]
1076
1076
1077 for rev in revs:
1077 for rev in revs:
1078 chunkstart = start(rev)
1078 chunkstart = start(rev)
1079 if inline:
1079 if inline:
1080 chunkstart += (rev + 1) * iosize
1080 chunkstart += (rev + 1) * iosize
1081 chunklength = length(rev)
1081 chunklength = length(rev)
1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1083
1083
1084 return l
1084 return l
1085
1085
1086 def _chunkclear(self):
1086 def _chunkclear(self):
1087 """Clear the raw chunk cache."""
1087 """Clear the raw chunk cache."""
1088 self._chunkcache = (0, '')
1088 self._chunkcache = (0, '')
1089
1089
1090 def deltaparent(self, rev):
1090 def deltaparent(self, rev):
1091 """return deltaparent of the given revision"""
1091 """return deltaparent of the given revision"""
1092 base = self.index[rev][3]
1092 base = self.index[rev][3]
1093 if base == rev:
1093 if base == rev:
1094 return nullrev
1094 return nullrev
1095 elif self._generaldelta:
1095 elif self._generaldelta:
1096 return base
1096 return base
1097 else:
1097 else:
1098 return rev - 1
1098 return rev - 1
1099
1099
1100 def revdiff(self, rev1, rev2):
1100 def revdiff(self, rev1, rev2):
1101 """return or calculate a delta between two revisions"""
1101 """return or calculate a delta between two revisions"""
1102 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1102 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1103 return str(self._chunk(rev2))
1103 return str(self._chunk(rev2))
1104
1104
1105 return mdiff.textdiff(self.revision(rev1),
1105 return mdiff.textdiff(self.revision(rev1),
1106 self.revision(rev2))
1106 self.revision(rev2))
1107
1107
1108 def revision(self, nodeorrev, _df=None):
1108 def revision(self, nodeorrev, _df=None):
1109 """return an uncompressed revision of a given node or revision
1109 """return an uncompressed revision of a given node or revision
1110 number.
1110 number.
1111
1111
1112 _df is an existing file handle to read from. It is meant to only be
1112 _df is an existing file handle to read from. It is meant to only be
1113 used internally.
1113 used internally.
1114 """
1114 """
1115 if isinstance(nodeorrev, int):
1115 if isinstance(nodeorrev, int):
1116 rev = nodeorrev
1116 rev = nodeorrev
1117 node = self.node(rev)
1117 node = self.node(rev)
1118 else:
1118 else:
1119 node = nodeorrev
1119 node = nodeorrev
1120 rev = None
1120 rev = None
1121
1121
1122 cachedrev = None
1122 cachedrev = None
1123 if node == nullid:
1123 if node == nullid:
1124 return ""
1124 return ""
1125 if self._cache:
1125 if self._cache:
1126 if self._cache[0] == node:
1126 if self._cache[0] == node:
1127 return self._cache[2]
1127 return self._cache[2]
1128 cachedrev = self._cache[1]
1128 cachedrev = self._cache[1]
1129
1129
1130 # look up what we need to read
1130 # look up what we need to read
1131 text = None
1131 text = None
1132 if rev is None:
1132 if rev is None:
1133 rev = self.rev(node)
1133 rev = self.rev(node)
1134
1134
1135 # check rev flags
1135 # check rev flags
1136 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1136 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1137 raise RevlogError(_('incompatible revision flag %x') %
1137 raise RevlogError(_('incompatible revision flag %x') %
1138 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1138 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1139
1139
1140 # build delta chain
1140 # build delta chain
1141 chain = []
1141 chain = []
1142 index = self.index # for performance
1142 index = self.index # for performance
1143 generaldelta = self._generaldelta
1143 generaldelta = self._generaldelta
1144 iterrev = rev
1144 iterrev = rev
1145 e = index[iterrev]
1145 e = index[iterrev]
1146 while iterrev != e[3] and iterrev != cachedrev:
1146 while iterrev != e[3] and iterrev != cachedrev:
1147 chain.append(iterrev)
1147 chain.append(iterrev)
1148 if generaldelta:
1148 if generaldelta:
1149 iterrev = e[3]
1149 iterrev = e[3]
1150 else:
1150 else:
1151 iterrev -= 1
1151 iterrev -= 1
1152 e = index[iterrev]
1152 e = index[iterrev]
1153
1153
1154 if iterrev == cachedrev:
1154 if iterrev == cachedrev:
1155 # cache hit
1155 # cache hit
1156 text = self._cache[2]
1156 text = self._cache[2]
1157 else:
1157 else:
1158 chain.append(iterrev)
1158 chain.append(iterrev)
1159 chain.reverse()
1159 chain.reverse()
1160
1160
1161 # drop cache to save memory
1161 # drop cache to save memory
1162 self._cache = None
1162 self._cache = None
1163
1163
1164 bins = self._chunks(chain, df=_df)
1164 bins = self._chunks(chain, df=_df)
1165 if text is None:
1165 if text is None:
1166 text = str(bins[0])
1166 text = str(bins[0])
1167 bins = bins[1:]
1167 bins = bins[1:]
1168
1168
1169 text = mdiff.patches(text, bins)
1169 text = mdiff.patches(text, bins)
1170
1170
1171 text = self._checkhash(text, node, rev)
1171 text = self._checkhash(text, node, rev)
1172
1172
1173 self._cache = (node, rev, text)
1173 self._cache = (node, rev, text)
1174 return text
1174 return text
1175
1175
1176 def hash(self, text, p1, p2):
1176 def hash(self, text, p1, p2):
1177 """Compute a node hash.
1177 """Compute a node hash.
1178
1178
1179 Available as a function so that subclasses can replace the hash
1179 Available as a function so that subclasses can replace the hash
1180 as needed.
1180 as needed.
1181 """
1181 """
1182 return hash(text, p1, p2)
1182 return hash(text, p1, p2)
1183
1183
1184 def _checkhash(self, text, node, rev):
1184 def _checkhash(self, text, node, rev):
1185 p1, p2 = self.parents(node)
1185 p1, p2 = self.parents(node)
1186 self.checkhash(text, p1, p2, node, rev)
1186 self.checkhash(text, p1, p2, node, rev)
1187 return text
1187 return text
1188
1188
1189 def checkhash(self, text, p1, p2, node, rev=None):
1189 def checkhash(self, text, p1, p2, node, rev=None):
1190 if node != self.hash(text, p1, p2):
1190 if node != self.hash(text, p1, p2):
1191 revornode = rev
1191 revornode = rev
1192 if revornode is None:
1192 if revornode is None:
1193 revornode = templatefilters.short(hex(node))
1193 revornode = templatefilters.short(hex(node))
1194 raise RevlogError(_("integrity check failed on %s:%s")
1194 raise RevlogError(_("integrity check failed on %s:%s")
1195 % (self.indexfile, revornode))
1195 % (self.indexfile, revornode))
1196
1196
1197 def checkinlinesize(self, tr, fp=None):
1197 def checkinlinesize(self, tr, fp=None):
1198 """Check if the revlog is too big for inline and convert if so.
1198 """Check if the revlog is too big for inline and convert if so.
1199
1199
1200 This should be called after revisions are added to the revlog. If the
1200 This should be called after revisions are added to the revlog. If the
1201 revlog has grown too large to be an inline revlog, it will convert it
1201 revlog has grown too large to be an inline revlog, it will convert it
1202 to use multiple index and data files.
1202 to use multiple index and data files.
1203 """
1203 """
1204 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1204 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1205 return
1205 return
1206
1206
1207 trinfo = tr.find(self.indexfile)
1207 trinfo = tr.find(self.indexfile)
1208 if trinfo is None:
1208 if trinfo is None:
1209 raise RevlogError(_("%s not found in the transaction")
1209 raise RevlogError(_("%s not found in the transaction")
1210 % self.indexfile)
1210 % self.indexfile)
1211
1211
1212 trindex = trinfo[2]
1212 trindex = trinfo[2]
1213 if trindex is not None:
1213 if trindex is not None:
1214 dataoff = self.start(trindex)
1214 dataoff = self.start(trindex)
1215 else:
1215 else:
1216 # revlog was stripped at start of transaction, use all leftover data
1216 # revlog was stripped at start of transaction, use all leftover data
1217 trindex = len(self) - 1
1217 trindex = len(self) - 1
1218 dataoff = self.end(-2)
1218 dataoff = self.end(-2)
1219
1219
1220 tr.add(self.datafile, dataoff)
1220 tr.add(self.datafile, dataoff)
1221
1221
1222 if fp:
1222 if fp:
1223 fp.flush()
1223 fp.flush()
1224 fp.close()
1224 fp.close()
1225
1225
1226 df = self.opener(self.datafile, 'w')
1226 df = self.opener(self.datafile, 'w')
1227 try:
1227 try:
1228 for r in self:
1228 for r in self:
1229 df.write(self._chunkraw(r, r))
1229 df.write(self._chunkraw(r, r))
1230 finally:
1230 finally:
1231 df.close()
1231 df.close()
1232
1232
1233 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1233 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1234 self.version &= ~(REVLOGNGINLINEDATA)
1234 self.version &= ~(REVLOGNGINLINEDATA)
1235 self._inline = False
1235 self._inline = False
1236 for i in self:
1236 for i in self:
1237 e = self._io.packentry(self.index[i], self.node, self.version, i)
1237 e = self._io.packentry(self.index[i], self.node, self.version, i)
1238 fp.write(e)
1238 fp.write(e)
1239
1239
1240 # if we don't call close, the temp file will never replace the
1240 # if we don't call close, the temp file will never replace the
1241 # real index
1241 # real index
1242 fp.close()
1242 fp.close()
1243
1243
1244 tr.replace(self.indexfile, trindex * self._io.size)
1244 tr.replace(self.indexfile, trindex * self._io.size)
1245 self._chunkclear()
1245 self._chunkclear()
1246
1246
1247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1248 node=None):
1248 node=None):
1249 """add a revision to the log
1249 """add a revision to the log
1250
1250
1251 text - the revision data to add
1251 text - the revision data to add
1252 transaction - the transaction object used for rollback
1252 transaction - the transaction object used for rollback
1253 link - the linkrev data to add
1253 link - the linkrev data to add
1254 p1, p2 - the parent nodeids of the revision
1254 p1, p2 - the parent nodeids of the revision
1255 cachedelta - an optional precomputed delta
1255 cachedelta - an optional precomputed delta
1256 node - nodeid of revision; typically node is not specified, and it is
1256 node - nodeid of revision; typically node is not specified, and it is
1257 computed by default as hash(text, p1, p2), however subclasses might
1257 computed by default as hash(text, p1, p2), however subclasses might
1258 use different hashing method (and override checkhash() in such case)
1258 use different hashing method (and override checkhash() in such case)
1259 """
1259 """
1260 if link == nullrev:
1260 if link == nullrev:
1261 raise RevlogError(_("attempted to add linkrev -1 to %s")
1261 raise RevlogError(_("attempted to add linkrev -1 to %s")
1262 % self.indexfile)
1262 % self.indexfile)
1263
1263
1264 if len(text) > _maxentrysize:
1264 if len(text) > _maxentrysize:
1265 raise RevlogError(
1265 raise RevlogError(
1266 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1266 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1267 % (self.indexfile, len(text)))
1267 % (self.indexfile, len(text)))
1268
1268
1269 node = node or self.hash(text, p1, p2)
1269 node = node or self.hash(text, p1, p2)
1270 if node in self.nodemap:
1270 if node in self.nodemap:
1271 return node
1271 return node
1272
1272
1273 dfh = None
1273 dfh = None
1274 if not self._inline:
1274 if not self._inline:
1275 dfh = self.opener(self.datafile, "a+")
1275 dfh = self.opener(self.datafile, "a+")
1276 ifh = self.opener(self.indexfile, "a+")
1276 ifh = self.opener(self.indexfile, "a+")
1277 try:
1277 try:
1278 return self._addrevision(node, text, transaction, link, p1, p2,
1278 return self._addrevision(node, text, transaction, link, p1, p2,
1279 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1279 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1280 finally:
1280 finally:
1281 if dfh:
1281 if dfh:
1282 dfh.close()
1282 dfh.close()
1283 ifh.close()
1283 ifh.close()
1284
1284
1285 def compress(self, text):
1285 def compress(self, text):
1286 """ generate a possibly-compressed representation of text """
1286 """ generate a possibly-compressed representation of text """
1287 if not text:
1287 if not text:
1288 return ("", text)
1288 return ("", text)
1289 l = len(text)
1289 l = len(text)
1290 bin = None
1290 bin = None
1291 if l < 44:
1291 if l < 44:
1292 pass
1292 pass
1293 elif l > 1000000:
1293 elif l > 1000000:
1294 # zlib makes an internal copy, thus doubling memory usage for
1294 # zlib makes an internal copy, thus doubling memory usage for
1295 # large files, so lets do this in pieces
1295 # large files, so lets do this in pieces
1296 z = zlib.compressobj()
1296 z = zlib.compressobj()
1297 p = []
1297 p = []
1298 pos = 0
1298 pos = 0
1299 while pos < l:
1299 while pos < l:
1300 pos2 = pos + 2**20
1300 pos2 = pos + 2**20
1301 p.append(z.compress(text[pos:pos2]))
1301 p.append(z.compress(text[pos:pos2]))
1302 pos = pos2
1302 pos = pos2
1303 p.append(z.flush())
1303 p.append(z.flush())
1304 if sum(map(len, p)) < l:
1304 if sum(map(len, p)) < l:
1305 bin = "".join(p)
1305 bin = "".join(p)
1306 else:
1306 else:
1307 bin = _compress(text)
1307 bin = _compress(text)
1308 if bin is None or len(bin) > l:
1308 if bin is None or len(bin) > l:
1309 if text[0] == '\0':
1309 if text[0] == '\0':
1310 return ("", text)
1310 return ("", text)
1311 return ('u', text)
1311 return ('u', text)
1312 return ("", bin)
1312 return ("", bin)
1313
1313
1314 def _isgooddelta(self, d, textlen):
1314 def _isgooddelta(self, d, textlen):
1315 """Returns True if the given delta is good. Good means that it is within
1315 """Returns True if the given delta is good. Good means that it is within
1316 the disk span, disk size, and chain length bounds that we know to be
1316 the disk span, disk size, and chain length bounds that we know to be
1317 performant."""
1317 performant."""
1318 if d is None:
1318 if d is None:
1319 return False
1319 return False
1320
1320
1321 # - 'dist' is the distance from the base revision -- bounding it limits
1321 # - 'dist' is the distance from the base revision -- bounding it limits
1322 # the amount of I/O we need to do.
1322 # the amount of I/O we need to do.
1323 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1323 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1324 # to apply -- bounding it limits the amount of CPU we consume.
1324 # to apply -- bounding it limits the amount of CPU we consume.
1325 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1325 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1326 if (dist > textlen * 4 or l > textlen or
1326 if (dist > textlen * 4 or l > textlen or
1327 compresseddeltalen > textlen * 2 or
1327 compresseddeltalen > textlen * 2 or
1328 (self._maxchainlen and chainlen > self._maxchainlen)):
1328 (self._maxchainlen and chainlen > self._maxchainlen)):
1329 return False
1329 return False
1330
1330
1331 return True
1331 return True
1332
1332
1333 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1333 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1334 cachedelta, ifh, dfh, alwayscache=False):
1334 cachedelta, ifh, dfh, alwayscache=False):
1335 """internal function to add revisions to the log
1335 """internal function to add revisions to the log
1336
1336
1337 see addrevision for argument descriptions.
1337 see addrevision for argument descriptions.
1338 invariants:
1338 invariants:
1339 - text is optional (can be None); if not set, cachedelta must be set.
1339 - text is optional (can be None); if not set, cachedelta must be set.
1340 if both are set, they must correspond to each other.
1340 if both are set, they must correspond to each other.
1341 """
1341 """
1342 btext = [text]
1342 btext = [text]
1343 def buildtext():
1343 def buildtext():
1344 if btext[0] is not None:
1344 if btext[0] is not None:
1345 return btext[0]
1345 return btext[0]
1346 baserev = cachedelta[0]
1346 baserev = cachedelta[0]
1347 delta = cachedelta[1]
1347 delta = cachedelta[1]
1348 # special case deltas which replace entire base; no need to decode
1348 # special case deltas which replace entire base; no need to decode
1349 # base revision. this neatly avoids censored bases, which throw when
1349 # base revision. this neatly avoids censored bases, which throw when
1350 # they're decoded.
1350 # they're decoded.
1351 hlen = struct.calcsize(">lll")
1351 hlen = struct.calcsize(">lll")
1352 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1352 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1353 len(delta) - hlen):
1353 len(delta) - hlen):
1354 btext[0] = delta[hlen:]
1354 btext[0] = delta[hlen:]
1355 else:
1355 else:
1356 if self._inline:
1356 if self._inline:
1357 fh = ifh
1357 fh = ifh
1358 else:
1358 else:
1359 fh = dfh
1359 fh = dfh
1360 basetext = self.revision(self.node(baserev), _df=fh)
1360 basetext = self.revision(self.node(baserev), _df=fh)
1361 btext[0] = mdiff.patch(basetext, delta)
1361 btext[0] = mdiff.patch(basetext, delta)
1362 try:
1362 try:
1363 self.checkhash(btext[0], p1, p2, node)
1363 self.checkhash(btext[0], p1, p2, node)
1364 if flags & REVIDX_ISCENSORED:
1364 if flags & REVIDX_ISCENSORED:
1365 raise RevlogError(_('node %s is not censored') % node)
1365 raise RevlogError(_('node %s is not censored') % node)
1366 except CensoredNodeError:
1366 except CensoredNodeError:
1367 # must pass the censored index flag to add censored revisions
1367 # must pass the censored index flag to add censored revisions
1368 if not flags & REVIDX_ISCENSORED:
1368 if not flags & REVIDX_ISCENSORED:
1369 raise
1369 raise
1370 return btext[0]
1370 return btext[0]
1371
1371
1372 def builddelta(rev):
1372 def builddelta(rev):
1373 # can we use the cached delta?
1373 # can we use the cached delta?
1374 if cachedelta and cachedelta[0] == rev:
1374 if cachedelta and cachedelta[0] == rev:
1375 delta = cachedelta[1]
1375 delta = cachedelta[1]
1376 else:
1376 else:
1377 t = buildtext()
1377 t = buildtext()
1378 if self.iscensored(rev):
1378 if self.iscensored(rev):
1379 # deltas based on a censored revision must replace the
1379 # deltas based on a censored revision must replace the
1380 # full content in one patch, so delta works everywhere
1380 # full content in one patch, so delta works everywhere
1381 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1381 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1382 delta = header + t
1382 delta = header + t
1383 else:
1383 else:
1384 if self._inline:
1384 if self._inline:
1385 fh = ifh
1385 fh = ifh
1386 else:
1386 else:
1387 fh = dfh
1387 fh = dfh
1388 ptext = self.revision(self.node(rev), _df=fh)
1388 ptext = self.revision(self.node(rev), _df=fh)
1389 delta = mdiff.textdiff(ptext, t)
1389 delta = mdiff.textdiff(ptext, t)
1390 data = self.compress(delta)
1390 data = self.compress(delta)
1391 l = len(data[1]) + len(data[0])
1391 l = len(data[1]) + len(data[0])
1392 if basecache[0] == rev:
1392 if basecache[0] == rev:
1393 chainbase = basecache[1]
1393 chainbase = basecache[1]
1394 else:
1394 else:
1395 chainbase = self.chainbase(rev)
1395 chainbase = self.chainbase(rev)
1396 dist = l + offset - self.start(chainbase)
1396 dist = l + offset - self.start(chainbase)
1397 if self._generaldelta:
1397 if self._generaldelta:
1398 base = rev
1398 base = rev
1399 else:
1399 else:
1400 base = chainbase
1400 base = chainbase
1401 chainlen, compresseddeltalen = self._chaininfo(rev)
1401 chainlen, compresseddeltalen = self._chaininfo(rev)
1402 chainlen += 1
1402 chainlen += 1
1403 compresseddeltalen += l
1403 compresseddeltalen += l
1404 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1404 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1405
1405
1406 curr = len(self)
1406 curr = len(self)
1407 prev = curr - 1
1407 prev = curr - 1
1408 base = chainbase = curr
1408 base = chainbase = curr
1409 chainlen = None
1409 chainlen = None
1410 offset = self.end(prev)
1410 offset = self.end(prev)
1411 delta = None
1411 delta = None
1412 if self._basecache is None:
1412 if self._basecache is None:
1413 self._basecache = (prev, self.chainbase(prev))
1413 self._basecache = (prev, self.chainbase(prev))
1414 basecache = self._basecache
1414 basecache = self._basecache
1415 p1r, p2r = self.rev(p1), self.rev(p2)
1415 p1r, p2r = self.rev(p1), self.rev(p2)
1416
1416
1417 # full versions are inserted when the needed deltas
1417 # full versions are inserted when the needed deltas
1418 # become comparable to the uncompressed text
1418 # become comparable to the uncompressed text
1419 if text is None:
1419 if text is None:
1420 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1420 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1421 cachedelta[1])
1421 cachedelta[1])
1422 else:
1422 else:
1423 textlen = len(text)
1423 textlen = len(text)
1424
1424
1425 # should we try to build a delta?
1425 # should we try to build a delta?
1426 if prev != nullrev:
1426 if prev != nullrev:
1427 if cachedelta and self._generaldelta and self._lazydeltabase:
1427 if cachedelta and self._generaldelta and self._lazydeltabase:
1428 # Assume what we received from the server is a good choice
1428 # Assume what we received from the server is a good choice
1429 # build delta will reuse the cache
1429 # build delta will reuse the cache
1430 delta = builddelta(cachedelta[0])
1430 candidatedelta = builddelta(cachedelta[0])
1431 if self._isgooddelta(candidatedelta, textlen):
1432 delta = candidatedelta
1433 elif prev != candidatedelta[3]:
1434 # Try against prev to hopefully save us a fulltext.
1435 delta = builddelta(prev)
1431 elif self._generaldelta:
1436 elif self._generaldelta:
1432 if p2r != nullrev and self._aggressivemergedeltas:
1437 if p2r != nullrev and self._aggressivemergedeltas:
1433 delta = builddelta(p1r)
1438 delta = builddelta(p1r)
1434 delta2 = builddelta(p2r)
1439 delta2 = builddelta(p2r)
1435 p1good = self._isgooddelta(delta, textlen)
1440 p1good = self._isgooddelta(delta, textlen)
1436 p2good = self._isgooddelta(delta2, textlen)
1441 p2good = self._isgooddelta(delta2, textlen)
1437 if p1good and p2good:
1442 if p1good and p2good:
1438 # If both are good deltas, choose the smallest
1443 # If both are good deltas, choose the smallest
1439 if delta2[1] < delta[1]:
1444 if delta2[1] < delta[1]:
1440 delta = delta2
1445 delta = delta2
1441 elif p2good:
1446 elif p2good:
1442 # If only p2 is good, use it
1447 # If only p2 is good, use it
1443 delta = delta2
1448 delta = delta2
1444 elif p1good:
1449 elif p1good:
1445 pass
1450 pass
1446 else:
1451 else:
1447 # Neither is good, try against prev to hopefully save us
1452 # Neither is good, try against prev to hopefully save us
1448 # a fulltext.
1453 # a fulltext.
1449 delta = builddelta(prev)
1454 delta = builddelta(prev)
1450 else:
1455 else:
1451 # Pick whichever parent is closer to us (to minimize the
1456 # Pick whichever parent is closer to us (to minimize the
1452 # chance of having to build a fulltext). Since
1457 # chance of having to build a fulltext). Since
1453 # nullrev == -1, any non-merge commit will always pick p1r.
1458 # nullrev == -1, any non-merge commit will always pick p1r.
1454 drev = p2r if p2r > p1r else p1r
1459 drev = p2r if p2r > p1r else p1r
1455 delta = builddelta(drev)
1460 delta = builddelta(drev)
1456 # If the chosen delta will result in us making a full text,
1461 # If the chosen delta will result in us making a full text,
1457 # give it one last try against prev.
1462 # give it one last try against prev.
1458 if drev != prev and not self._isgooddelta(delta, textlen):
1463 if drev != prev and not self._isgooddelta(delta, textlen):
1459 delta = builddelta(prev)
1464 delta = builddelta(prev)
1460 else:
1465 else:
1461 delta = builddelta(prev)
1466 delta = builddelta(prev)
1462 if delta is not None:
1467 if delta is not None:
1463 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1468 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1464
1469
1465 if not self._isgooddelta(delta, textlen):
1470 if not self._isgooddelta(delta, textlen):
1466 text = buildtext()
1471 text = buildtext()
1467 data = self.compress(text)
1472 data = self.compress(text)
1468 l = len(data[1]) + len(data[0])
1473 l = len(data[1]) + len(data[0])
1469 base = chainbase = curr
1474 base = chainbase = curr
1470
1475
1471 e = (offset_type(offset, flags), l, textlen,
1476 e = (offset_type(offset, flags), l, textlen,
1472 base, link, p1r, p2r, node)
1477 base, link, p1r, p2r, node)
1473 self.index.insert(-1, e)
1478 self.index.insert(-1, e)
1474 self.nodemap[node] = curr
1479 self.nodemap[node] = curr
1475
1480
1476 entry = self._io.packentry(e, self.node, self.version, curr)
1481 entry = self._io.packentry(e, self.node, self.version, curr)
1477 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1482 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1478
1483
1479 if alwayscache and text is None:
1484 if alwayscache and text is None:
1480 text = buildtext()
1485 text = buildtext()
1481
1486
1482 if type(text) == str: # only accept immutable objects
1487 if type(text) == str: # only accept immutable objects
1483 self._cache = (node, curr, text)
1488 self._cache = (node, curr, text)
1484 self._basecache = (curr, chainbase)
1489 self._basecache = (curr, chainbase)
1485 return node
1490 return node
1486
1491
1487 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1492 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1488 curr = len(self) - 1
1493 curr = len(self) - 1
1489 if not self._inline:
1494 if not self._inline:
1490 transaction.add(self.datafile, offset)
1495 transaction.add(self.datafile, offset)
1491 transaction.add(self.indexfile, curr * len(entry))
1496 transaction.add(self.indexfile, curr * len(entry))
1492 if data[0]:
1497 if data[0]:
1493 dfh.write(data[0])
1498 dfh.write(data[0])
1494 dfh.write(data[1])
1499 dfh.write(data[1])
1495 ifh.write(entry)
1500 ifh.write(entry)
1496 else:
1501 else:
1497 offset += curr * self._io.size
1502 offset += curr * self._io.size
1498 transaction.add(self.indexfile, offset, curr)
1503 transaction.add(self.indexfile, offset, curr)
1499 ifh.write(entry)
1504 ifh.write(entry)
1500 ifh.write(data[0])
1505 ifh.write(data[0])
1501 ifh.write(data[1])
1506 ifh.write(data[1])
1502 self.checkinlinesize(transaction, ifh)
1507 self.checkinlinesize(transaction, ifh)
1503
1508
1504 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1509 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1505 """
1510 """
1506 add a delta group
1511 add a delta group
1507
1512
1508 given a set of deltas, add them to the revision log. the
1513 given a set of deltas, add them to the revision log. the
1509 first delta is against its parent, which should be in our
1514 first delta is against its parent, which should be in our
1510 log, the rest are against the previous delta.
1515 log, the rest are against the previous delta.
1511
1516
1512 If ``addrevisioncb`` is defined, it will be called with arguments of
1517 If ``addrevisioncb`` is defined, it will be called with arguments of
1513 this revlog and the node that was added.
1518 this revlog and the node that was added.
1514 """
1519 """
1515
1520
1516 # track the base of the current delta log
1521 # track the base of the current delta log
1517 content = []
1522 content = []
1518 node = None
1523 node = None
1519
1524
1520 r = len(self)
1525 r = len(self)
1521 end = 0
1526 end = 0
1522 if r:
1527 if r:
1523 end = self.end(r - 1)
1528 end = self.end(r - 1)
1524 ifh = self.opener(self.indexfile, "a+")
1529 ifh = self.opener(self.indexfile, "a+")
1525 isize = r * self._io.size
1530 isize = r * self._io.size
1526 if self._inline:
1531 if self._inline:
1527 transaction.add(self.indexfile, end + isize, r)
1532 transaction.add(self.indexfile, end + isize, r)
1528 dfh = None
1533 dfh = None
1529 else:
1534 else:
1530 transaction.add(self.indexfile, isize, r)
1535 transaction.add(self.indexfile, isize, r)
1531 transaction.add(self.datafile, end)
1536 transaction.add(self.datafile, end)
1532 dfh = self.opener(self.datafile, "a+")
1537 dfh = self.opener(self.datafile, "a+")
1533 def flush():
1538 def flush():
1534 if dfh:
1539 if dfh:
1535 dfh.flush()
1540 dfh.flush()
1536 ifh.flush()
1541 ifh.flush()
1537 try:
1542 try:
1538 # loop through our set of deltas
1543 # loop through our set of deltas
1539 chain = None
1544 chain = None
1540 while True:
1545 while True:
1541 chunkdata = cg.deltachunk(chain)
1546 chunkdata = cg.deltachunk(chain)
1542 if not chunkdata:
1547 if not chunkdata:
1543 break
1548 break
1544 node = chunkdata['node']
1549 node = chunkdata['node']
1545 p1 = chunkdata['p1']
1550 p1 = chunkdata['p1']
1546 p2 = chunkdata['p2']
1551 p2 = chunkdata['p2']
1547 cs = chunkdata['cs']
1552 cs = chunkdata['cs']
1548 deltabase = chunkdata['deltabase']
1553 deltabase = chunkdata['deltabase']
1549 delta = chunkdata['delta']
1554 delta = chunkdata['delta']
1550
1555
1551 content.append(node)
1556 content.append(node)
1552
1557
1553 link = linkmapper(cs)
1558 link = linkmapper(cs)
1554 if node in self.nodemap:
1559 if node in self.nodemap:
1555 # this can happen if two branches make the same change
1560 # this can happen if two branches make the same change
1556 chain = node
1561 chain = node
1557 continue
1562 continue
1558
1563
1559 for p in (p1, p2):
1564 for p in (p1, p2):
1560 if p not in self.nodemap:
1565 if p not in self.nodemap:
1561 raise LookupError(p, self.indexfile,
1566 raise LookupError(p, self.indexfile,
1562 _('unknown parent'))
1567 _('unknown parent'))
1563
1568
1564 if deltabase not in self.nodemap:
1569 if deltabase not in self.nodemap:
1565 raise LookupError(deltabase, self.indexfile,
1570 raise LookupError(deltabase, self.indexfile,
1566 _('unknown delta base'))
1571 _('unknown delta base'))
1567
1572
1568 baserev = self.rev(deltabase)
1573 baserev = self.rev(deltabase)
1569
1574
1570 if baserev != nullrev and self.iscensored(baserev):
1575 if baserev != nullrev and self.iscensored(baserev):
1571 # if base is censored, delta must be full replacement in a
1576 # if base is censored, delta must be full replacement in a
1572 # single patch operation
1577 # single patch operation
1573 hlen = struct.calcsize(">lll")
1578 hlen = struct.calcsize(">lll")
1574 oldlen = self.rawsize(baserev)
1579 oldlen = self.rawsize(baserev)
1575 newlen = len(delta) - hlen
1580 newlen = len(delta) - hlen
1576 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1581 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1577 raise error.CensoredBaseError(self.indexfile,
1582 raise error.CensoredBaseError(self.indexfile,
1578 self.node(baserev))
1583 self.node(baserev))
1579
1584
1580 flags = REVIDX_DEFAULT_FLAGS
1585 flags = REVIDX_DEFAULT_FLAGS
1581 if self._peek_iscensored(baserev, delta, flush):
1586 if self._peek_iscensored(baserev, delta, flush):
1582 flags |= REVIDX_ISCENSORED
1587 flags |= REVIDX_ISCENSORED
1583
1588
1584 # We assume consumers of addrevisioncb will want to retrieve
1589 # We assume consumers of addrevisioncb will want to retrieve
1585 # the added revision, which will require a call to
1590 # the added revision, which will require a call to
1586 # revision(). revision() will fast path if there is a cache
1591 # revision(). revision() will fast path if there is a cache
1587 # hit. So, we tell _addrevision() to always cache in this case.
1592 # hit. So, we tell _addrevision() to always cache in this case.
1588 chain = self._addrevision(node, None, transaction, link,
1593 chain = self._addrevision(node, None, transaction, link,
1589 p1, p2, flags, (baserev, delta),
1594 p1, p2, flags, (baserev, delta),
1590 ifh, dfh,
1595 ifh, dfh,
1591 alwayscache=bool(addrevisioncb))
1596 alwayscache=bool(addrevisioncb))
1592
1597
1593 if addrevisioncb:
1598 if addrevisioncb:
1594 addrevisioncb(self, chain)
1599 addrevisioncb(self, chain)
1595
1600
1596 if not dfh and not self._inline:
1601 if not dfh and not self._inline:
1597 # addrevision switched from inline to conventional
1602 # addrevision switched from inline to conventional
1598 # reopen the index
1603 # reopen the index
1599 ifh.close()
1604 ifh.close()
1600 dfh = self.opener(self.datafile, "a+")
1605 dfh = self.opener(self.datafile, "a+")
1601 ifh = self.opener(self.indexfile, "a+")
1606 ifh = self.opener(self.indexfile, "a+")
1602 finally:
1607 finally:
1603 if dfh:
1608 if dfh:
1604 dfh.close()
1609 dfh.close()
1605 ifh.close()
1610 ifh.close()
1606
1611
1607 return content
1612 return content
1608
1613
1609 def iscensored(self, rev):
1614 def iscensored(self, rev):
1610 """Check if a file revision is censored."""
1615 """Check if a file revision is censored."""
1611 return False
1616 return False
1612
1617
1613 def _peek_iscensored(self, baserev, delta, flush):
1618 def _peek_iscensored(self, baserev, delta, flush):
1614 """Quickly check if a delta produces a censored revision."""
1619 """Quickly check if a delta produces a censored revision."""
1615 return False
1620 return False
1616
1621
1617 def getstrippoint(self, minlink):
1622 def getstrippoint(self, minlink):
1618 """find the minimum rev that must be stripped to strip the linkrev
1623 """find the minimum rev that must be stripped to strip the linkrev
1619
1624
1620 Returns a tuple containing the minimum rev and a set of all revs that
1625 Returns a tuple containing the minimum rev and a set of all revs that
1621 have linkrevs that will be broken by this strip.
1626 have linkrevs that will be broken by this strip.
1622 """
1627 """
1623 brokenrevs = set()
1628 brokenrevs = set()
1624 strippoint = len(self)
1629 strippoint = len(self)
1625
1630
1626 heads = {}
1631 heads = {}
1627 futurelargelinkrevs = set()
1632 futurelargelinkrevs = set()
1628 for head in self.headrevs():
1633 for head in self.headrevs():
1629 headlinkrev = self.linkrev(head)
1634 headlinkrev = self.linkrev(head)
1630 heads[head] = headlinkrev
1635 heads[head] = headlinkrev
1631 if headlinkrev >= minlink:
1636 if headlinkrev >= minlink:
1632 futurelargelinkrevs.add(headlinkrev)
1637 futurelargelinkrevs.add(headlinkrev)
1633
1638
1634 # This algorithm involves walking down the rev graph, starting at the
1639 # This algorithm involves walking down the rev graph, starting at the
1635 # heads. Since the revs are topologically sorted according to linkrev,
1640 # heads. Since the revs are topologically sorted according to linkrev,
1636 # once all head linkrevs are below the minlink, we know there are
1641 # once all head linkrevs are below the minlink, we know there are
1637 # no more revs that could have a linkrev greater than minlink.
1642 # no more revs that could have a linkrev greater than minlink.
1638 # So we can stop walking.
1643 # So we can stop walking.
1639 while futurelargelinkrevs:
1644 while futurelargelinkrevs:
1640 strippoint -= 1
1645 strippoint -= 1
1641 linkrev = heads.pop(strippoint)
1646 linkrev = heads.pop(strippoint)
1642
1647
1643 if linkrev < minlink:
1648 if linkrev < minlink:
1644 brokenrevs.add(strippoint)
1649 brokenrevs.add(strippoint)
1645 else:
1650 else:
1646 futurelargelinkrevs.remove(linkrev)
1651 futurelargelinkrevs.remove(linkrev)
1647
1652
1648 for p in self.parentrevs(strippoint):
1653 for p in self.parentrevs(strippoint):
1649 if p != nullrev:
1654 if p != nullrev:
1650 plinkrev = self.linkrev(p)
1655 plinkrev = self.linkrev(p)
1651 heads[p] = plinkrev
1656 heads[p] = plinkrev
1652 if plinkrev >= minlink:
1657 if plinkrev >= minlink:
1653 futurelargelinkrevs.add(plinkrev)
1658 futurelargelinkrevs.add(plinkrev)
1654
1659
1655 return strippoint, brokenrevs
1660 return strippoint, brokenrevs
1656
1661
1657 def strip(self, minlink, transaction):
1662 def strip(self, minlink, transaction):
1658 """truncate the revlog on the first revision with a linkrev >= minlink
1663 """truncate the revlog on the first revision with a linkrev >= minlink
1659
1664
1660 This function is called when we're stripping revision minlink and
1665 This function is called when we're stripping revision minlink and
1661 its descendants from the repository.
1666 its descendants from the repository.
1662
1667
1663 We have to remove all revisions with linkrev >= minlink, because
1668 We have to remove all revisions with linkrev >= minlink, because
1664 the equivalent changelog revisions will be renumbered after the
1669 the equivalent changelog revisions will be renumbered after the
1665 strip.
1670 strip.
1666
1671
1667 So we truncate the revlog on the first of these revisions, and
1672 So we truncate the revlog on the first of these revisions, and
1668 trust that the caller has saved the revisions that shouldn't be
1673 trust that the caller has saved the revisions that shouldn't be
1669 removed and that it'll re-add them after this truncation.
1674 removed and that it'll re-add them after this truncation.
1670 """
1675 """
1671 if len(self) == 0:
1676 if len(self) == 0:
1672 return
1677 return
1673
1678
1674 rev, _ = self.getstrippoint(minlink)
1679 rev, _ = self.getstrippoint(minlink)
1675 if rev == len(self):
1680 if rev == len(self):
1676 return
1681 return
1677
1682
1678 # first truncate the files on disk
1683 # first truncate the files on disk
1679 end = self.start(rev)
1684 end = self.start(rev)
1680 if not self._inline:
1685 if not self._inline:
1681 transaction.add(self.datafile, end)
1686 transaction.add(self.datafile, end)
1682 end = rev * self._io.size
1687 end = rev * self._io.size
1683 else:
1688 else:
1684 end += rev * self._io.size
1689 end += rev * self._io.size
1685
1690
1686 transaction.add(self.indexfile, end)
1691 transaction.add(self.indexfile, end)
1687
1692
1688 # then reset internal state in memory to forget those revisions
1693 # then reset internal state in memory to forget those revisions
1689 self._cache = None
1694 self._cache = None
1690 self._chaininfocache = {}
1695 self._chaininfocache = {}
1691 self._chunkclear()
1696 self._chunkclear()
1692 for x in xrange(rev, len(self)):
1697 for x in xrange(rev, len(self)):
1693 del self.nodemap[self.node(x)]
1698 del self.nodemap[self.node(x)]
1694
1699
1695 del self.index[rev:-1]
1700 del self.index[rev:-1]
1696
1701
1697 def checksize(self):
1702 def checksize(self):
1698 expected = 0
1703 expected = 0
1699 if len(self):
1704 if len(self):
1700 expected = max(0, self.end(len(self) - 1))
1705 expected = max(0, self.end(len(self) - 1))
1701
1706
1702 try:
1707 try:
1703 f = self.opener(self.datafile)
1708 f = self.opener(self.datafile)
1704 f.seek(0, 2)
1709 f.seek(0, 2)
1705 actual = f.tell()
1710 actual = f.tell()
1706 f.close()
1711 f.close()
1707 dd = actual - expected
1712 dd = actual - expected
1708 except IOError as inst:
1713 except IOError as inst:
1709 if inst.errno != errno.ENOENT:
1714 if inst.errno != errno.ENOENT:
1710 raise
1715 raise
1711 dd = 0
1716 dd = 0
1712
1717
1713 try:
1718 try:
1714 f = self.opener(self.indexfile)
1719 f = self.opener(self.indexfile)
1715 f.seek(0, 2)
1720 f.seek(0, 2)
1716 actual = f.tell()
1721 actual = f.tell()
1717 f.close()
1722 f.close()
1718 s = self._io.size
1723 s = self._io.size
1719 i = max(0, actual // s)
1724 i = max(0, actual // s)
1720 di = actual - (i * s)
1725 di = actual - (i * s)
1721 if self._inline:
1726 if self._inline:
1722 databytes = 0
1727 databytes = 0
1723 for r in self:
1728 for r in self:
1724 databytes += max(0, self.length(r))
1729 databytes += max(0, self.length(r))
1725 dd = 0
1730 dd = 0
1726 di = actual - len(self) * s - databytes
1731 di = actual - len(self) * s - databytes
1727 except IOError as inst:
1732 except IOError as inst:
1728 if inst.errno != errno.ENOENT:
1733 if inst.errno != errno.ENOENT:
1729 raise
1734 raise
1730 di = 0
1735 di = 0
1731
1736
1732 return (dd, di)
1737 return (dd, di)
1733
1738
1734 def files(self):
1739 def files(self):
1735 res = [self.indexfile]
1740 res = [self.indexfile]
1736 if not self._inline:
1741 if not self._inline:
1737 res.append(self.datafile)
1742 res.append(self.datafile)
1738 return res
1743 return res
General Comments 0
You need to be logged in to leave comments. Login now