##// END OF EJS Templates
revlog: support using an existing file handle when reading revlogs...
Gregory Szorc -
r26377:dfef0d3b default
parent child Browse files
Show More
@@ -1,1660 +1,1676 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 import collections
15 import collections
16 from node import bin, hex, nullid, nullrev
16 from node import bin, hex, nullid, nullrev
17 from i18n import _
17 from i18n import _
18 import ancestor, mdiff, parsers, error, util, templatefilters
18 import ancestor, mdiff, parsers, error, util, templatefilters
19 import struct, zlib, errno
19 import struct, zlib, errno
20
20
21 _pack = struct.pack
21 _pack = struct.pack
22 _unpack = struct.unpack
22 _unpack = struct.unpack
23 _compress = zlib.compress
23 _compress = zlib.compress
24 _decompress = zlib.decompress
24 _decompress = zlib.decompress
25 _sha = util.sha1
25 _sha = util.sha1
26
26
27 # revlog header flags
27 # revlog header flags
28 REVLOGV0 = 0
28 REVLOGV0 = 0
29 REVLOGNG = 1
29 REVLOGNG = 1
30 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGGENERALDELTA = (1 << 17)
31 REVLOGGENERALDELTA = (1 << 17)
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36
36
37 # revlog index flags
37 # revlog index flags
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41
41
42 # max size of revlog with inline data
42 # max size of revlog with inline data
43 _maxinline = 131072
43 _maxinline = 131072
44 _chunksize = 1048576
44 _chunksize = 1048576
45
45
46 RevlogError = error.RevlogError
46 RevlogError = error.RevlogError
47 LookupError = error.LookupError
47 LookupError = error.LookupError
48 CensoredNodeError = error.CensoredNodeError
48 CensoredNodeError = error.CensoredNodeError
49
49
50 def getoffset(q):
50 def getoffset(q):
51 return int(q >> 16)
51 return int(q >> 16)
52
52
53 def gettype(q):
53 def gettype(q):
54 return int(q & 0xFFFF)
54 return int(q & 0xFFFF)
55
55
56 def offset_type(offset, type):
56 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
57 return long(long(offset) << 16 | type)
58
58
59 _nullhash = _sha(nullid)
59 _nullhash = _sha(nullid)
60
60
61 def hash(text, p1, p2):
61 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
62 """generate a hash from the given text and its parent hashes
63
63
64 This hash combines both the current file contents and its history
64 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
65 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
66 content in the revision graph.
67 """
67 """
68 # As of now, if one of the parent node is null, p2 is null
68 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
69 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
70 # deep copy of a hash is faster than creating one
71 s = _nullhash.copy()
71 s = _nullhash.copy()
72 s.update(p1)
72 s.update(p1)
73 else:
73 else:
74 # none of the parent nodes are nullid
74 # none of the parent nodes are nullid
75 l = [p1, p2]
75 l = [p1, p2]
76 l.sort()
76 l.sort()
77 s = _sha(l[0])
77 s = _sha(l[0])
78 s.update(l[1])
78 s.update(l[1])
79 s.update(text)
79 s.update(text)
80 return s.digest()
80 return s.digest()
81
81
82 def decompress(bin):
82 def decompress(bin):
83 """ decompress the given input """
83 """ decompress the given input """
84 if not bin:
84 if not bin:
85 return bin
85 return bin
86 t = bin[0]
86 t = bin[0]
87 if t == '\0':
87 if t == '\0':
88 return bin
88 return bin
89 if t == 'x':
89 if t == 'x':
90 try:
90 try:
91 return _decompress(bin)
91 return _decompress(bin)
92 except zlib.error as e:
92 except zlib.error as e:
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 if t == 'u':
94 if t == 'u':
95 return bin[1:]
95 return bin[1:]
96 raise RevlogError(_("unknown compression type %r") % t)
96 raise RevlogError(_("unknown compression type %r") % t)
97
97
98 # index v0:
98 # index v0:
99 # 4 bytes: offset
99 # 4 bytes: offset
100 # 4 bytes: compressed length
100 # 4 bytes: compressed length
101 # 4 bytes: base rev
101 # 4 bytes: base rev
102 # 4 bytes: link rev
102 # 4 bytes: link rev
103 # 20 bytes: parent 1 nodeid
103 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 2 nodeid
104 # 20 bytes: parent 2 nodeid
105 # 20 bytes: nodeid
105 # 20 bytes: nodeid
106 indexformatv0 = ">4l20s20s20s"
106 indexformatv0 = ">4l20s20s20s"
107
107
108 class revlogoldio(object):
108 class revlogoldio(object):
109 def __init__(self):
109 def __init__(self):
110 self.size = struct.calcsize(indexformatv0)
110 self.size = struct.calcsize(indexformatv0)
111
111
112 def parseindex(self, data, inline):
112 def parseindex(self, data, inline):
113 s = self.size
113 s = self.size
114 index = []
114 index = []
115 nodemap = {nullid: nullrev}
115 nodemap = {nullid: nullrev}
116 n = off = 0
116 n = off = 0
117 l = len(data)
117 l = len(data)
118 while off + s <= l:
118 while off + s <= l:
119 cur = data[off:off + s]
119 cur = data[off:off + s]
120 off += s
120 off += s
121 e = _unpack(indexformatv0, cur)
121 e = _unpack(indexformatv0, cur)
122 # transform to revlogv1 format
122 # transform to revlogv1 format
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 index.append(e2)
125 index.append(e2)
126 nodemap[e[6]] = n
126 nodemap[e[6]] = n
127 n += 1
127 n += 1
128
128
129 # add the magic null revision at -1
129 # add the magic null revision at -1
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131
131
132 return index, nodemap, None
132 return index, nodemap, None
133
133
134 def packentry(self, entry, node, version, rev):
134 def packentry(self, entry, node, version, rev):
135 if gettype(entry[0]):
135 if gettype(entry[0]):
136 raise RevlogError(_("index entry flags need RevlogNG"))
136 raise RevlogError(_("index entry flags need RevlogNG"))
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 node(entry[5]), node(entry[6]), entry[7])
138 node(entry[5]), node(entry[6]), entry[7])
139 return _pack(indexformatv0, *e2)
139 return _pack(indexformatv0, *e2)
140
140
141 # index ng:
141 # index ng:
142 # 6 bytes: offset
142 # 6 bytes: offset
143 # 2 bytes: flags
143 # 2 bytes: flags
144 # 4 bytes: compressed length
144 # 4 bytes: compressed length
145 # 4 bytes: uncompressed length
145 # 4 bytes: uncompressed length
146 # 4 bytes: base rev
146 # 4 bytes: base rev
147 # 4 bytes: link rev
147 # 4 bytes: link rev
148 # 4 bytes: parent 1 rev
148 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 2 rev
149 # 4 bytes: parent 2 rev
150 # 32 bytes: nodeid
150 # 32 bytes: nodeid
151 indexformatng = ">Qiiiiii20s12x"
151 indexformatng = ">Qiiiiii20s12x"
152 versionformat = ">I"
152 versionformat = ">I"
153
153
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # signed integer)
155 # signed integer)
156 _maxentrysize = 0x7fffffff
156 _maxentrysize = 0x7fffffff
157
157
158 class revlogio(object):
158 class revlogio(object):
159 def __init__(self):
159 def __init__(self):
160 self.size = struct.calcsize(indexformatng)
160 self.size = struct.calcsize(indexformatng)
161
161
162 def parseindex(self, data, inline):
162 def parseindex(self, data, inline):
163 # call the C implementation to parse the index data
163 # call the C implementation to parse the index data
164 index, cache = parsers.parse_index2(data, inline)
164 index, cache = parsers.parse_index2(data, inline)
165 return index, getattr(index, 'nodemap', None), cache
165 return index, getattr(index, 'nodemap', None), cache
166
166
167 def packentry(self, entry, node, version, rev):
167 def packentry(self, entry, node, version, rev):
168 p = _pack(indexformatng, *entry)
168 p = _pack(indexformatng, *entry)
169 if rev == 0:
169 if rev == 0:
170 p = _pack(versionformat, version) + p[4:]
170 p = _pack(versionformat, version) + p[4:]
171 return p
171 return p
172
172
173 class revlog(object):
173 class revlog(object):
174 """
174 """
175 the underlying revision storage object
175 the underlying revision storage object
176
176
177 A revlog consists of two parts, an index and the revision data.
177 A revlog consists of two parts, an index and the revision data.
178
178
179 The index is a file with a fixed record size containing
179 The index is a file with a fixed record size containing
180 information on each revision, including its nodeid (hash), the
180 information on each revision, including its nodeid (hash), the
181 nodeids of its parents, the position and offset of its data within
181 nodeids of its parents, the position and offset of its data within
182 the data file, and the revision it's based on. Finally, each entry
182 the data file, and the revision it's based on. Finally, each entry
183 contains a linkrev entry that can serve as a pointer to external
183 contains a linkrev entry that can serve as a pointer to external
184 data.
184 data.
185
185
186 The revision data itself is a linear collection of data chunks.
186 The revision data itself is a linear collection of data chunks.
187 Each chunk represents a revision and is usually represented as a
187 Each chunk represents a revision and is usually represented as a
188 delta against the previous chunk. To bound lookup time, runs of
188 delta against the previous chunk. To bound lookup time, runs of
189 deltas are limited to about 2 times the length of the original
189 deltas are limited to about 2 times the length of the original
190 version data. This makes retrieval of a version proportional to
190 version data. This makes retrieval of a version proportional to
191 its size, or O(1) relative to the number of revisions.
191 its size, or O(1) relative to the number of revisions.
192
192
193 Both pieces of the revlog are written to in an append-only
193 Both pieces of the revlog are written to in an append-only
194 fashion, which means we never need to rewrite a file to insert or
194 fashion, which means we never need to rewrite a file to insert or
195 remove data, and can use some simple techniques to avoid the need
195 remove data, and can use some simple techniques to avoid the need
196 for locking while reading.
196 for locking while reading.
197 """
197 """
198 def __init__(self, opener, indexfile):
198 def __init__(self, opener, indexfile):
199 """
199 """
200 create a revlog object
200 create a revlog object
201
201
202 opener is a function that abstracts the file opening operation
202 opener is a function that abstracts the file opening operation
203 and can be used to implement COW semantics or the like.
203 and can be used to implement COW semantics or the like.
204 """
204 """
205 self.indexfile = indexfile
205 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d"
206 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener
207 self.opener = opener
208 self._cache = None
208 self._cache = None
209 self._basecache = None
209 self._basecache = None
210 self._chunkcache = (0, '')
210 self._chunkcache = (0, '')
211 self._chunkcachesize = 65536
211 self._chunkcachesize = 65536
212 self._maxchainlen = None
212 self._maxchainlen = None
213 self._aggressivemergedeltas = False
213 self._aggressivemergedeltas = False
214 self.index = []
214 self.index = []
215 self._pcache = {}
215 self._pcache = {}
216 self._nodecache = {nullid: nullrev}
216 self._nodecache = {nullid: nullrev}
217 self._nodepos = None
217 self._nodepos = None
218
218
219 v = REVLOG_DEFAULT_VERSION
219 v = REVLOG_DEFAULT_VERSION
220 opts = getattr(opener, 'options', None)
220 opts = getattr(opener, 'options', None)
221 if opts is not None:
221 if opts is not None:
222 if 'revlogv1' in opts:
222 if 'revlogv1' in opts:
223 if 'generaldelta' in opts:
223 if 'generaldelta' in opts:
224 v |= REVLOGGENERALDELTA
224 v |= REVLOGGENERALDELTA
225 else:
225 else:
226 v = 0
226 v = 0
227 if 'chunkcachesize' in opts:
227 if 'chunkcachesize' in opts:
228 self._chunkcachesize = opts['chunkcachesize']
228 self._chunkcachesize = opts['chunkcachesize']
229 if 'maxchainlen' in opts:
229 if 'maxchainlen' in opts:
230 self._maxchainlen = opts['maxchainlen']
230 self._maxchainlen = opts['maxchainlen']
231 if 'aggressivemergedeltas' in opts:
231 if 'aggressivemergedeltas' in opts:
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233
233
234 if self._chunkcachesize <= 0:
234 if self._chunkcachesize <= 0:
235 raise RevlogError(_('revlog chunk cache size %r is not greater '
235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 'than 0') % self._chunkcachesize)
236 'than 0') % self._chunkcachesize)
237 elif self._chunkcachesize & (self._chunkcachesize - 1):
237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 raise RevlogError(_('revlog chunk cache size %r is not a power '
238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 'of 2') % self._chunkcachesize)
239 'of 2') % self._chunkcachesize)
240
240
241 indexdata = ''
241 indexdata = ''
242 self._initempty = True
242 self._initempty = True
243 try:
243 try:
244 f = self.opener(self.indexfile)
244 f = self.opener(self.indexfile)
245 indexdata = f.read()
245 indexdata = f.read()
246 f.close()
246 f.close()
247 if len(indexdata) > 0:
247 if len(indexdata) > 0:
248 v = struct.unpack(versionformat, indexdata[:4])[0]
248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 self._initempty = False
249 self._initempty = False
250 except IOError as inst:
250 except IOError as inst:
251 if inst.errno != errno.ENOENT:
251 if inst.errno != errno.ENOENT:
252 raise
252 raise
253
253
254 self.version = v
254 self.version = v
255 self._inline = v & REVLOGNGINLINEDATA
255 self._inline = v & REVLOGNGINLINEDATA
256 self._generaldelta = v & REVLOGGENERALDELTA
256 self._generaldelta = v & REVLOGGENERALDELTA
257 flags = v & ~0xFFFF
257 flags = v & ~0xFFFF
258 fmt = v & 0xFFFF
258 fmt = v & 0xFFFF
259 if fmt == REVLOGV0 and flags:
259 if fmt == REVLOGV0 and flags:
260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 % (self.indexfile, flags >> 16))
261 % (self.indexfile, flags >> 16))
262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 % (self.indexfile, flags >> 16))
264 % (self.indexfile, flags >> 16))
265 elif fmt > REVLOGNG:
265 elif fmt > REVLOGNG:
266 raise RevlogError(_("index %s unknown format %d")
266 raise RevlogError(_("index %s unknown format %d")
267 % (self.indexfile, fmt))
267 % (self.indexfile, fmt))
268
268
269 self._io = revlogio()
269 self._io = revlogio()
270 if self.version == REVLOGV0:
270 if self.version == REVLOGV0:
271 self._io = revlogoldio()
271 self._io = revlogoldio()
272 try:
272 try:
273 d = self._io.parseindex(indexdata, self._inline)
273 d = self._io.parseindex(indexdata, self._inline)
274 except (ValueError, IndexError):
274 except (ValueError, IndexError):
275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 self.index, nodemap, self._chunkcache = d
276 self.index, nodemap, self._chunkcache = d
277 if nodemap is not None:
277 if nodemap is not None:
278 self.nodemap = self._nodecache = nodemap
278 self.nodemap = self._nodecache = nodemap
279 if not self._chunkcache:
279 if not self._chunkcache:
280 self._chunkclear()
280 self._chunkclear()
281 # revnum -> (chain-length, sum-delta-length)
281 # revnum -> (chain-length, sum-delta-length)
282 self._chaininfocache = {}
282 self._chaininfocache = {}
283
283
284 def tip(self):
284 def tip(self):
285 return self.node(len(self.index) - 2)
285 return self.node(len(self.index) - 2)
286 def __contains__(self, rev):
286 def __contains__(self, rev):
287 return 0 <= rev < len(self)
287 return 0 <= rev < len(self)
288 def __len__(self):
288 def __len__(self):
289 return len(self.index) - 1
289 return len(self.index) - 1
290 def __iter__(self):
290 def __iter__(self):
291 return iter(xrange(len(self)))
291 return iter(xrange(len(self)))
292 def revs(self, start=0, stop=None):
292 def revs(self, start=0, stop=None):
293 """iterate over all rev in this revlog (from start to stop)"""
293 """iterate over all rev in this revlog (from start to stop)"""
294 step = 1
294 step = 1
295 if stop is not None:
295 if stop is not None:
296 if start > stop:
296 if start > stop:
297 step = -1
297 step = -1
298 stop += step
298 stop += step
299 else:
299 else:
300 stop = len(self)
300 stop = len(self)
301 return xrange(start, stop, step)
301 return xrange(start, stop, step)
302
302
303 @util.propertycache
303 @util.propertycache
304 def nodemap(self):
304 def nodemap(self):
305 self.rev(self.node(0))
305 self.rev(self.node(0))
306 return self._nodecache
306 return self._nodecache
307
307
308 def hasnode(self, node):
308 def hasnode(self, node):
309 try:
309 try:
310 self.rev(node)
310 self.rev(node)
311 return True
311 return True
312 except KeyError:
312 except KeyError:
313 return False
313 return False
314
314
315 def clearcaches(self):
315 def clearcaches(self):
316 try:
316 try:
317 self._nodecache.clearcaches()
317 self._nodecache.clearcaches()
318 except AttributeError:
318 except AttributeError:
319 self._nodecache = {nullid: nullrev}
319 self._nodecache = {nullid: nullrev}
320 self._nodepos = None
320 self._nodepos = None
321
321
322 def rev(self, node):
322 def rev(self, node):
323 try:
323 try:
324 return self._nodecache[node]
324 return self._nodecache[node]
325 except TypeError:
325 except TypeError:
326 raise
326 raise
327 except RevlogError:
327 except RevlogError:
328 # parsers.c radix tree lookup failed
328 # parsers.c radix tree lookup failed
329 raise LookupError(node, self.indexfile, _('no node'))
329 raise LookupError(node, self.indexfile, _('no node'))
330 except KeyError:
330 except KeyError:
331 # pure python cache lookup failed
331 # pure python cache lookup failed
332 n = self._nodecache
332 n = self._nodecache
333 i = self.index
333 i = self.index
334 p = self._nodepos
334 p = self._nodepos
335 if p is None:
335 if p is None:
336 p = len(i) - 2
336 p = len(i) - 2
337 for r in xrange(p, -1, -1):
337 for r in xrange(p, -1, -1):
338 v = i[r][7]
338 v = i[r][7]
339 n[v] = r
339 n[v] = r
340 if v == node:
340 if v == node:
341 self._nodepos = r - 1
341 self._nodepos = r - 1
342 return r
342 return r
343 raise LookupError(node, self.indexfile, _('no node'))
343 raise LookupError(node, self.indexfile, _('no node'))
344
344
345 def node(self, rev):
345 def node(self, rev):
346 return self.index[rev][7]
346 return self.index[rev][7]
347 def linkrev(self, rev):
347 def linkrev(self, rev):
348 return self.index[rev][4]
348 return self.index[rev][4]
349 def parents(self, node):
349 def parents(self, node):
350 i = self.index
350 i = self.index
351 d = i[self.rev(node)]
351 d = i[self.rev(node)]
352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 def parentrevs(self, rev):
353 def parentrevs(self, rev):
354 return self.index[rev][5:7]
354 return self.index[rev][5:7]
355 def start(self, rev):
355 def start(self, rev):
356 return int(self.index[rev][0] >> 16)
356 return int(self.index[rev][0] >> 16)
357 def end(self, rev):
357 def end(self, rev):
358 return self.start(rev) + self.length(rev)
358 return self.start(rev) + self.length(rev)
359 def length(self, rev):
359 def length(self, rev):
360 return self.index[rev][1]
360 return self.index[rev][1]
361 def chainbase(self, rev):
361 def chainbase(self, rev):
362 index = self.index
362 index = self.index
363 base = index[rev][3]
363 base = index[rev][3]
364 while base != rev:
364 while base != rev:
365 rev = base
365 rev = base
366 base = index[rev][3]
366 base = index[rev][3]
367 return base
367 return base
368 def chainlen(self, rev):
368 def chainlen(self, rev):
369 return self._chaininfo(rev)[0]
369 return self._chaininfo(rev)[0]
370
370
371 def _chaininfo(self, rev):
371 def _chaininfo(self, rev):
372 chaininfocache = self._chaininfocache
372 chaininfocache = self._chaininfocache
373 if rev in chaininfocache:
373 if rev in chaininfocache:
374 return chaininfocache[rev]
374 return chaininfocache[rev]
375 index = self.index
375 index = self.index
376 generaldelta = self._generaldelta
376 generaldelta = self._generaldelta
377 iterrev = rev
377 iterrev = rev
378 e = index[iterrev]
378 e = index[iterrev]
379 clen = 0
379 clen = 0
380 compresseddeltalen = 0
380 compresseddeltalen = 0
381 while iterrev != e[3]:
381 while iterrev != e[3]:
382 clen += 1
382 clen += 1
383 compresseddeltalen += e[1]
383 compresseddeltalen += e[1]
384 if generaldelta:
384 if generaldelta:
385 iterrev = e[3]
385 iterrev = e[3]
386 else:
386 else:
387 iterrev -= 1
387 iterrev -= 1
388 if iterrev in chaininfocache:
388 if iterrev in chaininfocache:
389 t = chaininfocache[iterrev]
389 t = chaininfocache[iterrev]
390 clen += t[0]
390 clen += t[0]
391 compresseddeltalen += t[1]
391 compresseddeltalen += t[1]
392 break
392 break
393 e = index[iterrev]
393 e = index[iterrev]
394 else:
394 else:
395 # Add text length of base since decompressing that also takes
395 # Add text length of base since decompressing that also takes
396 # work. For cache hits the length is already included.
396 # work. For cache hits the length is already included.
397 compresseddeltalen += e[1]
397 compresseddeltalen += e[1]
398 r = (clen, compresseddeltalen)
398 r = (clen, compresseddeltalen)
399 chaininfocache[rev] = r
399 chaininfocache[rev] = r
400 return r
400 return r
401
401
402 def flags(self, rev):
402 def flags(self, rev):
403 return self.index[rev][0] & 0xFFFF
403 return self.index[rev][0] & 0xFFFF
404 def rawsize(self, rev):
404 def rawsize(self, rev):
405 """return the length of the uncompressed text for a given revision"""
405 """return the length of the uncompressed text for a given revision"""
406 l = self.index[rev][2]
406 l = self.index[rev][2]
407 if l >= 0:
407 if l >= 0:
408 return l
408 return l
409
409
410 t = self.revision(self.node(rev))
410 t = self.revision(self.node(rev))
411 return len(t)
411 return len(t)
412 size = rawsize
412 size = rawsize
413
413
414 def ancestors(self, revs, stoprev=0, inclusive=False):
414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 """Generate the ancestors of 'revs' in reverse topological order.
415 """Generate the ancestors of 'revs' in reverse topological order.
416 Does not generate revs lower than stoprev.
416 Does not generate revs lower than stoprev.
417
417
418 See the documentation for ancestor.lazyancestors for more details."""
418 See the documentation for ancestor.lazyancestors for more details."""
419
419
420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 inclusive=inclusive)
421 inclusive=inclusive)
422
422
423 def descendants(self, revs):
423 def descendants(self, revs):
424 """Generate the descendants of 'revs' in revision order.
424 """Generate the descendants of 'revs' in revision order.
425
425
426 Yield a sequence of revision numbers starting with a child of
426 Yield a sequence of revision numbers starting with a child of
427 some rev in revs, i.e., each revision is *not* considered a
427 some rev in revs, i.e., each revision is *not* considered a
428 descendant of itself. Results are ordered by revision number (a
428 descendant of itself. Results are ordered by revision number (a
429 topological sort)."""
429 topological sort)."""
430 first = min(revs)
430 first = min(revs)
431 if first == nullrev:
431 if first == nullrev:
432 for i in self:
432 for i in self:
433 yield i
433 yield i
434 return
434 return
435
435
436 seen = set(revs)
436 seen = set(revs)
437 for i in self.revs(start=first + 1):
437 for i in self.revs(start=first + 1):
438 for x in self.parentrevs(i):
438 for x in self.parentrevs(i):
439 if x != nullrev and x in seen:
439 if x != nullrev and x in seen:
440 seen.add(i)
440 seen.add(i)
441 yield i
441 yield i
442 break
442 break
443
443
444 def findcommonmissing(self, common=None, heads=None):
444 def findcommonmissing(self, common=None, heads=None):
445 """Return a tuple of the ancestors of common and the ancestors of heads
445 """Return a tuple of the ancestors of common and the ancestors of heads
446 that are not ancestors of common. In revset terminology, we return the
446 that are not ancestors of common. In revset terminology, we return the
447 tuple:
447 tuple:
448
448
449 ::common, (::heads) - (::common)
449 ::common, (::heads) - (::common)
450
450
451 The list is sorted by revision number, meaning it is
451 The list is sorted by revision number, meaning it is
452 topologically sorted.
452 topologically sorted.
453
453
454 'heads' and 'common' are both lists of node IDs. If heads is
454 'heads' and 'common' are both lists of node IDs. If heads is
455 not supplied, uses all of the revlog's heads. If common is not
455 not supplied, uses all of the revlog's heads. If common is not
456 supplied, uses nullid."""
456 supplied, uses nullid."""
457 if common is None:
457 if common is None:
458 common = [nullid]
458 common = [nullid]
459 if heads is None:
459 if heads is None:
460 heads = self.heads()
460 heads = self.heads()
461
461
462 common = [self.rev(n) for n in common]
462 common = [self.rev(n) for n in common]
463 heads = [self.rev(n) for n in heads]
463 heads = [self.rev(n) for n in heads]
464
464
465 # we want the ancestors, but inclusive
465 # we want the ancestors, but inclusive
466 class lazyset(object):
466 class lazyset(object):
467 def __init__(self, lazyvalues):
467 def __init__(self, lazyvalues):
468 self.addedvalues = set()
468 self.addedvalues = set()
469 self.lazyvalues = lazyvalues
469 self.lazyvalues = lazyvalues
470
470
471 def __contains__(self, value):
471 def __contains__(self, value):
472 return value in self.addedvalues or value in self.lazyvalues
472 return value in self.addedvalues or value in self.lazyvalues
473
473
474 def __iter__(self):
474 def __iter__(self):
475 added = self.addedvalues
475 added = self.addedvalues
476 for r in added:
476 for r in added:
477 yield r
477 yield r
478 for r in self.lazyvalues:
478 for r in self.lazyvalues:
479 if not r in added:
479 if not r in added:
480 yield r
480 yield r
481
481
482 def add(self, value):
482 def add(self, value):
483 self.addedvalues.add(value)
483 self.addedvalues.add(value)
484
484
485 def update(self, values):
485 def update(self, values):
486 self.addedvalues.update(values)
486 self.addedvalues.update(values)
487
487
488 has = lazyset(self.ancestors(common))
488 has = lazyset(self.ancestors(common))
489 has.add(nullrev)
489 has.add(nullrev)
490 has.update(common)
490 has.update(common)
491
491
492 # take all ancestors from heads that aren't in has
492 # take all ancestors from heads that aren't in has
493 missing = set()
493 missing = set()
494 visit = collections.deque(r for r in heads if r not in has)
494 visit = collections.deque(r for r in heads if r not in has)
495 while visit:
495 while visit:
496 r = visit.popleft()
496 r = visit.popleft()
497 if r in missing:
497 if r in missing:
498 continue
498 continue
499 else:
499 else:
500 missing.add(r)
500 missing.add(r)
501 for p in self.parentrevs(r):
501 for p in self.parentrevs(r):
502 if p not in has:
502 if p not in has:
503 visit.append(p)
503 visit.append(p)
504 missing = list(missing)
504 missing = list(missing)
505 missing.sort()
505 missing.sort()
506 return has, [self.node(r) for r in missing]
506 return has, [self.node(r) for r in missing]
507
507
508 def incrementalmissingrevs(self, common=None):
508 def incrementalmissingrevs(self, common=None):
509 """Return an object that can be used to incrementally compute the
509 """Return an object that can be used to incrementally compute the
510 revision numbers of the ancestors of arbitrary sets that are not
510 revision numbers of the ancestors of arbitrary sets that are not
511 ancestors of common. This is an ancestor.incrementalmissingancestors
511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 object.
512 object.
513
513
514 'common' is a list of revision numbers. If common is not supplied, uses
514 'common' is a list of revision numbers. If common is not supplied, uses
515 nullrev.
515 nullrev.
516 """
516 """
517 if common is None:
517 if common is None:
518 common = [nullrev]
518 common = [nullrev]
519
519
520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521
521
522 def findmissingrevs(self, common=None, heads=None):
522 def findmissingrevs(self, common=None, heads=None):
523 """Return the revision numbers of the ancestors of heads that
523 """Return the revision numbers of the ancestors of heads that
524 are not ancestors of common.
524 are not ancestors of common.
525
525
526 More specifically, return a list of revision numbers corresponding to
526 More specifically, return a list of revision numbers corresponding to
527 nodes N such that every N satisfies the following constraints:
527 nodes N such that every N satisfies the following constraints:
528
528
529 1. N is an ancestor of some node in 'heads'
529 1. N is an ancestor of some node in 'heads'
530 2. N is not an ancestor of any node in 'common'
530 2. N is not an ancestor of any node in 'common'
531
531
532 The list is sorted by revision number, meaning it is
532 The list is sorted by revision number, meaning it is
533 topologically sorted.
533 topologically sorted.
534
534
535 'heads' and 'common' are both lists of revision numbers. If heads is
535 'heads' and 'common' are both lists of revision numbers. If heads is
536 not supplied, uses all of the revlog's heads. If common is not
536 not supplied, uses all of the revlog's heads. If common is not
537 supplied, uses nullid."""
537 supplied, uses nullid."""
538 if common is None:
538 if common is None:
539 common = [nullrev]
539 common = [nullrev]
540 if heads is None:
540 if heads is None:
541 heads = self.headrevs()
541 heads = self.headrevs()
542
542
543 inc = self.incrementalmissingrevs(common=common)
543 inc = self.incrementalmissingrevs(common=common)
544 return inc.missingancestors(heads)
544 return inc.missingancestors(heads)
545
545
546 def findmissing(self, common=None, heads=None):
546 def findmissing(self, common=None, heads=None):
547 """Return the ancestors of heads that are not ancestors of common.
547 """Return the ancestors of heads that are not ancestors of common.
548
548
549 More specifically, return a list of nodes N such that every N
549 More specifically, return a list of nodes N such that every N
550 satisfies the following constraints:
550 satisfies the following constraints:
551
551
552 1. N is an ancestor of some node in 'heads'
552 1. N is an ancestor of some node in 'heads'
553 2. N is not an ancestor of any node in 'common'
553 2. N is not an ancestor of any node in 'common'
554
554
555 The list is sorted by revision number, meaning it is
555 The list is sorted by revision number, meaning it is
556 topologically sorted.
556 topologically sorted.
557
557
558 'heads' and 'common' are both lists of node IDs. If heads is
558 'heads' and 'common' are both lists of node IDs. If heads is
559 not supplied, uses all of the revlog's heads. If common is not
559 not supplied, uses all of the revlog's heads. If common is not
560 supplied, uses nullid."""
560 supplied, uses nullid."""
561 if common is None:
561 if common is None:
562 common = [nullid]
562 common = [nullid]
563 if heads is None:
563 if heads is None:
564 heads = self.heads()
564 heads = self.heads()
565
565
566 common = [self.rev(n) for n in common]
566 common = [self.rev(n) for n in common]
567 heads = [self.rev(n) for n in heads]
567 heads = [self.rev(n) for n in heads]
568
568
569 inc = self.incrementalmissingrevs(common=common)
569 inc = self.incrementalmissingrevs(common=common)
570 return [self.node(r) for r in inc.missingancestors(heads)]
570 return [self.node(r) for r in inc.missingancestors(heads)]
571
571
572 def nodesbetween(self, roots=None, heads=None):
572 def nodesbetween(self, roots=None, heads=None):
573 """Return a topological path from 'roots' to 'heads'.
573 """Return a topological path from 'roots' to 'heads'.
574
574
575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 topologically sorted list of all nodes N that satisfy both of
576 topologically sorted list of all nodes N that satisfy both of
577 these constraints:
577 these constraints:
578
578
579 1. N is a descendant of some node in 'roots'
579 1. N is a descendant of some node in 'roots'
580 2. N is an ancestor of some node in 'heads'
580 2. N is an ancestor of some node in 'heads'
581
581
582 Every node is considered to be both a descendant and an ancestor
582 Every node is considered to be both a descendant and an ancestor
583 of itself, so every reachable node in 'roots' and 'heads' will be
583 of itself, so every reachable node in 'roots' and 'heads' will be
584 included in 'nodes'.
584 included in 'nodes'.
585
585
586 'outroots' is the list of reachable nodes in 'roots', i.e., the
586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 subset of 'roots' that is returned in 'nodes'. Likewise,
587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 'outheads' is the subset of 'heads' that is also in 'nodes'.
588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589
589
590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 unspecified, uses nullid as the only root. If 'heads' is
591 unspecified, uses nullid as the only root. If 'heads' is
592 unspecified, uses list of all of the revlog's heads."""
592 unspecified, uses list of all of the revlog's heads."""
593 nonodes = ([], [], [])
593 nonodes = ([], [], [])
594 if roots is not None:
594 if roots is not None:
595 roots = list(roots)
595 roots = list(roots)
596 if not roots:
596 if not roots:
597 return nonodes
597 return nonodes
598 lowestrev = min([self.rev(n) for n in roots])
598 lowestrev = min([self.rev(n) for n in roots])
599 else:
599 else:
600 roots = [nullid] # Everybody's a descendant of nullid
600 roots = [nullid] # Everybody's a descendant of nullid
601 lowestrev = nullrev
601 lowestrev = nullrev
602 if (lowestrev == nullrev) and (heads is None):
602 if (lowestrev == nullrev) and (heads is None):
603 # We want _all_ the nodes!
603 # We want _all_ the nodes!
604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 if heads is None:
605 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
606 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
607 # node.
608 highestrev = len(self) - 1
608 highestrev = len(self) - 1
609 # Set ancestors to None to signal that every node is an ancestor.
609 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
610 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
611 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
612 heads = {}
613 else:
613 else:
614 heads = list(heads)
614 heads = list(heads)
615 if not heads:
615 if not heads:
616 return nonodes
616 return nonodes
617 ancestors = set()
617 ancestors = set()
618 # Turn heads into a dictionary so we can remove 'fake' heads.
618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
619 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
620 # find from roots.
621 heads = dict.fromkeys(heads, False)
621 heads = dict.fromkeys(heads, False)
622 # Start at the top and keep marking parents until we're done.
622 # Start at the top and keep marking parents until we're done.
623 nodestotag = set(heads)
623 nodestotag = set(heads)
624 # Remember where the top was so we can use it as a limit later.
624 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
625 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
626 while nodestotag:
627 # grab a node to tag
627 # grab a node to tag
628 n = nodestotag.pop()
628 n = nodestotag.pop()
629 # Never tag nullid
629 # Never tag nullid
630 if n == nullid:
630 if n == nullid:
631 continue
631 continue
632 # A node's revision number represents its place in a
632 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
633 # topologically sorted list of nodes.
634 r = self.rev(n)
634 r = self.rev(n)
635 if r >= lowestrev:
635 if r >= lowestrev:
636 if n not in ancestors:
636 if n not in ancestors:
637 # If we are possibly a descendant of one of the roots
637 # If we are possibly a descendant of one of the roots
638 # and we haven't already been marked as an ancestor
638 # and we haven't already been marked as an ancestor
639 ancestors.add(n) # Mark as ancestor
639 ancestors.add(n) # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
640 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.update([p for p in self.parents(n) if
641 nodestotag.update([p for p in self.parents(n) if
642 p != nullid])
642 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
643 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
644 # So it is, real heads should not be the ancestors of
645 # any other heads.
645 # any other heads.
646 heads.pop(n)
646 heads.pop(n)
647 if not ancestors:
647 if not ancestors:
648 return nonodes
648 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
649 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
650 # roots that are not ancestors.
651
651
652 # If one of the roots was nullid, everything is included anyway.
652 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
653 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
654 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
655 # include roots that aren't ancestors.
656
656
657 # Filter out roots that aren't ancestors of heads
657 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
658 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
659 # Recompute the lowest revision
660 if roots:
660 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 # No more roots? Return empty list
663 # No more roots? Return empty list
664 return nonodes
664 return nonodes
665 else:
665 else:
666 # We are descending from nullid, and don't need to care about
666 # We are descending from nullid, and don't need to care about
667 # any other roots.
667 # any other roots.
668 lowestrev = nullrev
668 lowestrev = nullrev
669 roots = [nullid]
669 roots = [nullid]
670 # Transform our roots list into a set.
670 # Transform our roots list into a set.
671 descendants = set(roots)
671 descendants = set(roots)
672 # Also, keep the original roots so we can filter out roots that aren't
672 # Also, keep the original roots so we can filter out roots that aren't
673 # 'real' roots (i.e. are descended from other roots).
673 # 'real' roots (i.e. are descended from other roots).
674 roots = descendants.copy()
674 roots = descendants.copy()
675 # Our topologically sorted list of output nodes.
675 # Our topologically sorted list of output nodes.
676 orderedout = []
676 orderedout = []
677 # Don't start at nullid since we don't want nullid in our output list,
677 # Don't start at nullid since we don't want nullid in our output list,
678 # and if nullid shows up in descendants, empty parents will look like
678 # and if nullid shows up in descendants, empty parents will look like
679 # they're descendants.
679 # they're descendants.
680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 n = self.node(r)
681 n = self.node(r)
682 isdescendant = False
682 isdescendant = False
683 if lowestrev == nullrev: # Everybody is a descendant of nullid
683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 isdescendant = True
684 isdescendant = True
685 elif n in descendants:
685 elif n in descendants:
686 # n is already a descendant
686 # n is already a descendant
687 isdescendant = True
687 isdescendant = True
688 # This check only needs to be done here because all the roots
688 # This check only needs to be done here because all the roots
689 # will start being marked is descendants before the loop.
689 # will start being marked is descendants before the loop.
690 if n in roots:
690 if n in roots:
691 # If n was a root, check if it's a 'real' root.
691 # If n was a root, check if it's a 'real' root.
692 p = tuple(self.parents(n))
692 p = tuple(self.parents(n))
693 # If any of its parents are descendants, it's not a root.
693 # If any of its parents are descendants, it's not a root.
694 if (p[0] in descendants) or (p[1] in descendants):
694 if (p[0] in descendants) or (p[1] in descendants):
695 roots.remove(n)
695 roots.remove(n)
696 else:
696 else:
697 p = tuple(self.parents(n))
697 p = tuple(self.parents(n))
698 # A node is a descendant if either of its parents are
698 # A node is a descendant if either of its parents are
699 # descendants. (We seeded the dependents list with the roots
699 # descendants. (We seeded the dependents list with the roots
700 # up there, remember?)
700 # up there, remember?)
701 if (p[0] in descendants) or (p[1] in descendants):
701 if (p[0] in descendants) or (p[1] in descendants):
702 descendants.add(n)
702 descendants.add(n)
703 isdescendant = True
703 isdescendant = True
704 if isdescendant and ((ancestors is None) or (n in ancestors)):
704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 # Only include nodes that are both descendants and ancestors.
705 # Only include nodes that are both descendants and ancestors.
706 orderedout.append(n)
706 orderedout.append(n)
707 if (ancestors is not None) and (n in heads):
707 if (ancestors is not None) and (n in heads):
708 # We're trying to figure out which heads are reachable
708 # We're trying to figure out which heads are reachable
709 # from roots.
709 # from roots.
710 # Mark this head as having been reached
710 # Mark this head as having been reached
711 heads[n] = True
711 heads[n] = True
712 elif ancestors is None:
712 elif ancestors is None:
713 # Otherwise, we're trying to discover the heads.
713 # Otherwise, we're trying to discover the heads.
714 # Assume this is a head because if it isn't, the next step
714 # Assume this is a head because if it isn't, the next step
715 # will eventually remove it.
715 # will eventually remove it.
716 heads[n] = True
716 heads[n] = True
717 # But, obviously its parents aren't.
717 # But, obviously its parents aren't.
718 for p in self.parents(n):
718 for p in self.parents(n):
719 heads.pop(p, None)
719 heads.pop(p, None)
720 heads = [n for n, flag in heads.iteritems() if flag]
720 heads = [n for n, flag in heads.iteritems() if flag]
721 roots = list(roots)
721 roots = list(roots)
722 assert orderedout
722 assert orderedout
723 assert roots
723 assert roots
724 assert heads
724 assert heads
725 return (orderedout, roots, heads)
725 return (orderedout, roots, heads)
726
726
727 def headrevs(self):
727 def headrevs(self):
728 try:
728 try:
729 return self.index.headrevs()
729 return self.index.headrevs()
730 except AttributeError:
730 except AttributeError:
731 return self._headrevs()
731 return self._headrevs()
732
732
733 def computephases(self, roots):
733 def computephases(self, roots):
734 return self.index.computephasesmapsets(roots)
734 return self.index.computephasesmapsets(roots)
735
735
736 def _headrevs(self):
736 def _headrevs(self):
737 count = len(self)
737 count = len(self)
738 if not count:
738 if not count:
739 return [nullrev]
739 return [nullrev]
740 # we won't iter over filtered rev so nobody is a head at start
740 # we won't iter over filtered rev so nobody is a head at start
741 ishead = [0] * (count + 1)
741 ishead = [0] * (count + 1)
742 index = self.index
742 index = self.index
743 for r in self:
743 for r in self:
744 ishead[r] = 1 # I may be an head
744 ishead[r] = 1 # I may be an head
745 e = index[r]
745 e = index[r]
746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 return [r for r, val in enumerate(ishead) if val]
747 return [r for r, val in enumerate(ishead) if val]
748
748
749 def heads(self, start=None, stop=None):
749 def heads(self, start=None, stop=None):
750 """return the list of all nodes that have no children
750 """return the list of all nodes that have no children
751
751
752 if start is specified, only heads that are descendants of
752 if start is specified, only heads that are descendants of
753 start will be returned
753 start will be returned
754 if stop is specified, it will consider all the revs from stop
754 if stop is specified, it will consider all the revs from stop
755 as if they had no children
755 as if they had no children
756 """
756 """
757 if start is None and stop is None:
757 if start is None and stop is None:
758 if not len(self):
758 if not len(self):
759 return [nullid]
759 return [nullid]
760 return [self.node(r) for r in self.headrevs()]
760 return [self.node(r) for r in self.headrevs()]
761
761
762 if start is None:
762 if start is None:
763 start = nullid
763 start = nullid
764 if stop is None:
764 if stop is None:
765 stop = []
765 stop = []
766 stoprevs = set([self.rev(n) for n in stop])
766 stoprevs = set([self.rev(n) for n in stop])
767 startrev = self.rev(start)
767 startrev = self.rev(start)
768 reachable = set((startrev,))
768 reachable = set((startrev,))
769 heads = set((startrev,))
769 heads = set((startrev,))
770
770
771 parentrevs = self.parentrevs
771 parentrevs = self.parentrevs
772 for r in self.revs(start=startrev + 1):
772 for r in self.revs(start=startrev + 1):
773 for p in parentrevs(r):
773 for p in parentrevs(r):
774 if p in reachable:
774 if p in reachable:
775 if r not in stoprevs:
775 if r not in stoprevs:
776 reachable.add(r)
776 reachable.add(r)
777 heads.add(r)
777 heads.add(r)
778 if p in heads and p not in stoprevs:
778 if p in heads and p not in stoprevs:
779 heads.remove(p)
779 heads.remove(p)
780
780
781 return [self.node(r) for r in heads]
781 return [self.node(r) for r in heads]
782
782
783 def children(self, node):
783 def children(self, node):
784 """find the children of a given node"""
784 """find the children of a given node"""
785 c = []
785 c = []
786 p = self.rev(node)
786 p = self.rev(node)
787 for r in self.revs(start=p + 1):
787 for r in self.revs(start=p + 1):
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 if prevs:
789 if prevs:
790 for pr in prevs:
790 for pr in prevs:
791 if pr == p:
791 if pr == p:
792 c.append(self.node(r))
792 c.append(self.node(r))
793 elif p == nullrev:
793 elif p == nullrev:
794 c.append(self.node(r))
794 c.append(self.node(r))
795 return c
795 return c
796
796
797 def descendant(self, start, end):
797 def descendant(self, start, end):
798 if start == nullrev:
798 if start == nullrev:
799 return True
799 return True
800 for i in self.descendants([start]):
800 for i in self.descendants([start]):
801 if i == end:
801 if i == end:
802 return True
802 return True
803 elif i > end:
803 elif i > end:
804 break
804 break
805 return False
805 return False
806
806
807 def commonancestorsheads(self, a, b):
807 def commonancestorsheads(self, a, b):
808 """calculate all the heads of the common ancestors of nodes a and b"""
808 """calculate all the heads of the common ancestors of nodes a and b"""
809 a, b = self.rev(a), self.rev(b)
809 a, b = self.rev(a), self.rev(b)
810 try:
810 try:
811 ancs = self.index.commonancestorsheads(a, b)
811 ancs = self.index.commonancestorsheads(a, b)
812 except (AttributeError, OverflowError): # C implementation failed
812 except (AttributeError, OverflowError): # C implementation failed
813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 return map(self.node, ancs)
814 return map(self.node, ancs)
815
815
816 def isancestor(self, a, b):
816 def isancestor(self, a, b):
817 """return True if node a is an ancestor of node b
817 """return True if node a is an ancestor of node b
818
818
819 The implementation of this is trivial but the use of
819 The implementation of this is trivial but the use of
820 commonancestorsheads is not."""
820 commonancestorsheads is not."""
821 return a in self.commonancestorsheads(a, b)
821 return a in self.commonancestorsheads(a, b)
822
822
823 def ancestor(self, a, b):
823 def ancestor(self, a, b):
824 """calculate the "best" common ancestor of nodes a and b"""
824 """calculate the "best" common ancestor of nodes a and b"""
825
825
826 a, b = self.rev(a), self.rev(b)
826 a, b = self.rev(a), self.rev(b)
827 try:
827 try:
828 ancs = self.index.ancestors(a, b)
828 ancs = self.index.ancestors(a, b)
829 except (AttributeError, OverflowError):
829 except (AttributeError, OverflowError):
830 ancs = ancestor.ancestors(self.parentrevs, a, b)
830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 if ancs:
831 if ancs:
832 # choose a consistent winner when there's a tie
832 # choose a consistent winner when there's a tie
833 return min(map(self.node, ancs))
833 return min(map(self.node, ancs))
834 return nullid
834 return nullid
835
835
836 def _match(self, id):
836 def _match(self, id):
837 if isinstance(id, int):
837 if isinstance(id, int):
838 # rev
838 # rev
839 return self.node(id)
839 return self.node(id)
840 if len(id) == 20:
840 if len(id) == 20:
841 # possibly a binary node
841 # possibly a binary node
842 # odds of a binary node being all hex in ASCII are 1 in 10**25
842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 try:
843 try:
844 node = id
844 node = id
845 self.rev(node) # quick search the index
845 self.rev(node) # quick search the index
846 return node
846 return node
847 except LookupError:
847 except LookupError:
848 pass # may be partial hex id
848 pass # may be partial hex id
849 try:
849 try:
850 # str(rev)
850 # str(rev)
851 rev = int(id)
851 rev = int(id)
852 if str(rev) != id:
852 if str(rev) != id:
853 raise ValueError
853 raise ValueError
854 if rev < 0:
854 if rev < 0:
855 rev = len(self) + rev
855 rev = len(self) + rev
856 if rev < 0 or rev >= len(self):
856 if rev < 0 or rev >= len(self):
857 raise ValueError
857 raise ValueError
858 return self.node(rev)
858 return self.node(rev)
859 except (ValueError, OverflowError):
859 except (ValueError, OverflowError):
860 pass
860 pass
861 if len(id) == 40:
861 if len(id) == 40:
862 try:
862 try:
863 # a full hex nodeid?
863 # a full hex nodeid?
864 node = bin(id)
864 node = bin(id)
865 self.rev(node)
865 self.rev(node)
866 return node
866 return node
867 except (TypeError, LookupError):
867 except (TypeError, LookupError):
868 pass
868 pass
869
869
870 def _partialmatch(self, id):
870 def _partialmatch(self, id):
871 try:
871 try:
872 n = self.index.partialmatch(id)
872 n = self.index.partialmatch(id)
873 if n and self.hasnode(n):
873 if n and self.hasnode(n):
874 return n
874 return n
875 return None
875 return None
876 except RevlogError:
876 except RevlogError:
877 # parsers.c radix tree lookup gave multiple matches
877 # parsers.c radix tree lookup gave multiple matches
878 # fall through to slow path that filters hidden revisions
878 # fall through to slow path that filters hidden revisions
879 pass
879 pass
880 except (AttributeError, ValueError):
880 except (AttributeError, ValueError):
881 # we are pure python, or key was too short to search radix tree
881 # we are pure python, or key was too short to search radix tree
882 pass
882 pass
883
883
884 if id in self._pcache:
884 if id in self._pcache:
885 return self._pcache[id]
885 return self._pcache[id]
886
886
887 if len(id) < 40:
887 if len(id) < 40:
888 try:
888 try:
889 # hex(node)[:...]
889 # hex(node)[:...]
890 l = len(id) // 2 # grab an even number of digits
890 l = len(id) // 2 # grab an even number of digits
891 prefix = bin(id[:l * 2])
891 prefix = bin(id[:l * 2])
892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 nl = [n for n in nl if hex(n).startswith(id) and
893 nl = [n for n in nl if hex(n).startswith(id) and
894 self.hasnode(n)]
894 self.hasnode(n)]
895 if len(nl) > 0:
895 if len(nl) > 0:
896 if len(nl) == 1:
896 if len(nl) == 1:
897 self._pcache[id] = nl[0]
897 self._pcache[id] = nl[0]
898 return nl[0]
898 return nl[0]
899 raise LookupError(id, self.indexfile,
899 raise LookupError(id, self.indexfile,
900 _('ambiguous identifier'))
900 _('ambiguous identifier'))
901 return None
901 return None
902 except TypeError:
902 except TypeError:
903 pass
903 pass
904
904
905 def lookup(self, id):
905 def lookup(self, id):
906 """locate a node based on:
906 """locate a node based on:
907 - revision number or str(revision number)
907 - revision number or str(revision number)
908 - nodeid or subset of hex nodeid
908 - nodeid or subset of hex nodeid
909 """
909 """
910 n = self._match(id)
910 n = self._match(id)
911 if n is not None:
911 if n is not None:
912 return n
912 return n
913 n = self._partialmatch(id)
913 n = self._partialmatch(id)
914 if n:
914 if n:
915 return n
915 return n
916
916
917 raise LookupError(id, self.indexfile, _('no match found'))
917 raise LookupError(id, self.indexfile, _('no match found'))
918
918
919 def cmp(self, node, text):
919 def cmp(self, node, text):
920 """compare text with a given file revision
920 """compare text with a given file revision
921
921
922 returns True if text is different than what is stored.
922 returns True if text is different than what is stored.
923 """
923 """
924 p1, p2 = self.parents(node)
924 p1, p2 = self.parents(node)
925 return hash(text, p1, p2) != node
925 return hash(text, p1, p2) != node
926
926
927 def _addchunk(self, offset, data):
927 def _addchunk(self, offset, data):
928 o, d = self._chunkcache
928 o, d = self._chunkcache
929 # try to add to existing cache
929 # try to add to existing cache
930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 self._chunkcache = o, d + data
931 self._chunkcache = o, d + data
932 else:
932 else:
933 self._chunkcache = offset, data
933 self._chunkcache = offset, data
934
934
935 def _loadchunk(self, offset, length):
935 def _loadchunk(self, offset, length, df=None):
936 if self._inline:
936 """Load a chunk/segment from the revlog.
937 df = self.opener(self.indexfile)
937
938 Accepts absolute offset, length to read, and an optional existing
939 file handle to read from.
940
941 If an existing file handle is passed, it will be seeked and the
942 original seek position will NOT be restored.
943 """
944 if df is not None:
945 closehandle = False
938 else:
946 else:
939 df = self.opener(self.datafile)
947 if self._inline:
948 df = self.opener(self.indexfile)
949 else:
950 df = self.opener(self.datafile)
951 closehandle = True
940
952
941 # Cache data both forward and backward around the requested
953 # Cache data both forward and backward around the requested
942 # data, in a fixed size window. This helps speed up operations
954 # data, in a fixed size window. This helps speed up operations
943 # involving reading the revlog backwards.
955 # involving reading the revlog backwards.
944 cachesize = self._chunkcachesize
956 cachesize = self._chunkcachesize
945 realoffset = offset & ~(cachesize - 1)
957 realoffset = offset & ~(cachesize - 1)
946 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
947 - realoffset)
959 - realoffset)
948 df.seek(realoffset)
960 df.seek(realoffset)
949 d = df.read(reallength)
961 d = df.read(reallength)
950 df.close()
962 if closehandle:
963 df.close()
951 self._addchunk(realoffset, d)
964 self._addchunk(realoffset, d)
952 if offset != realoffset or reallength != length:
965 if offset != realoffset or reallength != length:
953 return util.buffer(d, offset - realoffset, length)
966 return util.buffer(d, offset - realoffset, length)
954 return d
967 return d
955
968
956 def _getchunk(self, offset, length):
969 def _getchunk(self, offset, length, df=None):
957 o, d = self._chunkcache
970 o, d = self._chunkcache
958 l = len(d)
971 l = len(d)
959
972
960 # is it in the cache?
973 # is it in the cache?
961 cachestart = offset - o
974 cachestart = offset - o
962 cacheend = cachestart + length
975 cacheend = cachestart + length
963 if cachestart >= 0 and cacheend <= l:
976 if cachestart >= 0 and cacheend <= l:
964 if cachestart == 0 and cacheend == l:
977 if cachestart == 0 and cacheend == l:
965 return d # avoid a copy
978 return d # avoid a copy
966 return util.buffer(d, cachestart, cacheend - cachestart)
979 return util.buffer(d, cachestart, cacheend - cachestart)
967
980
968 return self._loadchunk(offset, length)
981 return self._loadchunk(offset, length, df=df)
969
982
970 def _chunkraw(self, startrev, endrev):
983 def _chunkraw(self, startrev, endrev, df=None):
971 start = self.start(startrev)
984 start = self.start(startrev)
972 end = self.end(endrev)
985 end = self.end(endrev)
973 if self._inline:
986 if self._inline:
974 start += (startrev + 1) * self._io.size
987 start += (startrev + 1) * self._io.size
975 end += (endrev + 1) * self._io.size
988 end += (endrev + 1) * self._io.size
976 length = end - start
989 length = end - start
977 return self._getchunk(start, length)
990 return self._getchunk(start, length, df=df)
978
991
979 def _chunk(self, rev):
992 def _chunk(self, rev, df=None):
980 return decompress(self._chunkraw(rev, rev))
993 return decompress(self._chunkraw(rev, rev, df=df))
981
994
982 def _chunks(self, revs):
995 def _chunks(self, revs, df=None):
983 '''faster version of [self._chunk(rev) for rev in revs]
996 '''faster version of [self._chunk(rev) for rev in revs]
984
997
985 Assumes that revs is in ascending order.'''
998 Assumes that revs is in ascending order.'''
986 if not revs:
999 if not revs:
987 return []
1000 return []
988 start = self.start
1001 start = self.start
989 length = self.length
1002 length = self.length
990 inline = self._inline
1003 inline = self._inline
991 iosize = self._io.size
1004 iosize = self._io.size
992 buffer = util.buffer
1005 buffer = util.buffer
993
1006
994 l = []
1007 l = []
995 ladd = l.append
1008 ladd = l.append
996
1009
997 # preload the cache
1010 # preload the cache
998 try:
1011 try:
999 while True:
1012 while True:
1000 # ensure that the cache doesn't change out from under us
1013 # ensure that the cache doesn't change out from under us
1001 _cache = self._chunkcache
1014 _cache = self._chunkcache
1002 self._chunkraw(revs[0], revs[-1])
1015 self._chunkraw(revs[0], revs[-1], df=df)
1003 if _cache == self._chunkcache:
1016 if _cache == self._chunkcache:
1004 break
1017 break
1005 offset, data = _cache
1018 offset, data = _cache
1006 except OverflowError:
1019 except OverflowError:
1007 # issue4215 - we can't cache a run of chunks greater than
1020 # issue4215 - we can't cache a run of chunks greater than
1008 # 2G on Windows
1021 # 2G on Windows
1009 return [self._chunk(rev) for rev in revs]
1022 return [self._chunk(rev, df=df) for rev in revs]
1010
1023
1011 for rev in revs:
1024 for rev in revs:
1012 chunkstart = start(rev)
1025 chunkstart = start(rev)
1013 if inline:
1026 if inline:
1014 chunkstart += (rev + 1) * iosize
1027 chunkstart += (rev + 1) * iosize
1015 chunklength = length(rev)
1028 chunklength = length(rev)
1016 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1017
1030
1018 return l
1031 return l
1019
1032
1020 def _chunkclear(self):
1033 def _chunkclear(self):
1021 self._chunkcache = (0, '')
1034 self._chunkcache = (0, '')
1022
1035
1023 def deltaparent(self, rev):
1036 def deltaparent(self, rev):
1024 """return deltaparent of the given revision"""
1037 """return deltaparent of the given revision"""
1025 base = self.index[rev][3]
1038 base = self.index[rev][3]
1026 if base == rev:
1039 if base == rev:
1027 return nullrev
1040 return nullrev
1028 elif self._generaldelta:
1041 elif self._generaldelta:
1029 return base
1042 return base
1030 else:
1043 else:
1031 return rev - 1
1044 return rev - 1
1032
1045
1033 def revdiff(self, rev1, rev2):
1046 def revdiff(self, rev1, rev2):
1034 """return or calculate a delta between two revisions"""
1047 """return or calculate a delta between two revisions"""
1035 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1036 return str(self._chunk(rev2))
1049 return str(self._chunk(rev2))
1037
1050
1038 return mdiff.textdiff(self.revision(rev1),
1051 return mdiff.textdiff(self.revision(rev1),
1039 self.revision(rev2))
1052 self.revision(rev2))
1040
1053
1041 def revision(self, nodeorrev):
1054 def revision(self, nodeorrev, _df=None):
1042 """return an uncompressed revision of a given node or revision
1055 """return an uncompressed revision of a given node or revision
1043 number.
1056 number.
1057
1058 _df is an existing file handle to read from. It is meant to only be
1059 used internally.
1044 """
1060 """
1045 if isinstance(nodeorrev, int):
1061 if isinstance(nodeorrev, int):
1046 rev = nodeorrev
1062 rev = nodeorrev
1047 node = self.node(rev)
1063 node = self.node(rev)
1048 else:
1064 else:
1049 node = nodeorrev
1065 node = nodeorrev
1050 rev = None
1066 rev = None
1051
1067
1052 cachedrev = None
1068 cachedrev = None
1053 if node == nullid:
1069 if node == nullid:
1054 return ""
1070 return ""
1055 if self._cache:
1071 if self._cache:
1056 if self._cache[0] == node:
1072 if self._cache[0] == node:
1057 return self._cache[2]
1073 return self._cache[2]
1058 cachedrev = self._cache[1]
1074 cachedrev = self._cache[1]
1059
1075
1060 # look up what we need to read
1076 # look up what we need to read
1061 text = None
1077 text = None
1062 if rev is None:
1078 if rev is None:
1063 rev = self.rev(node)
1079 rev = self.rev(node)
1064
1080
1065 # check rev flags
1081 # check rev flags
1066 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1067 raise RevlogError(_('incompatible revision flag %x') %
1083 raise RevlogError(_('incompatible revision flag %x') %
1068 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1069
1085
1070 # build delta chain
1086 # build delta chain
1071 chain = []
1087 chain = []
1072 index = self.index # for performance
1088 index = self.index # for performance
1073 generaldelta = self._generaldelta
1089 generaldelta = self._generaldelta
1074 iterrev = rev
1090 iterrev = rev
1075 e = index[iterrev]
1091 e = index[iterrev]
1076 while iterrev != e[3] and iterrev != cachedrev:
1092 while iterrev != e[3] and iterrev != cachedrev:
1077 chain.append(iterrev)
1093 chain.append(iterrev)
1078 if generaldelta:
1094 if generaldelta:
1079 iterrev = e[3]
1095 iterrev = e[3]
1080 else:
1096 else:
1081 iterrev -= 1
1097 iterrev -= 1
1082 e = index[iterrev]
1098 e = index[iterrev]
1083
1099
1084 if iterrev == cachedrev:
1100 if iterrev == cachedrev:
1085 # cache hit
1101 # cache hit
1086 text = self._cache[2]
1102 text = self._cache[2]
1087 else:
1103 else:
1088 chain.append(iterrev)
1104 chain.append(iterrev)
1089 chain.reverse()
1105 chain.reverse()
1090
1106
1091 # drop cache to save memory
1107 # drop cache to save memory
1092 self._cache = None
1108 self._cache = None
1093
1109
1094 bins = self._chunks(chain)
1110 bins = self._chunks(chain, df=_df)
1095 if text is None:
1111 if text is None:
1096 text = str(bins[0])
1112 text = str(bins[0])
1097 bins = bins[1:]
1113 bins = bins[1:]
1098
1114
1099 text = mdiff.patches(text, bins)
1115 text = mdiff.patches(text, bins)
1100
1116
1101 text = self._checkhash(text, node, rev)
1117 text = self._checkhash(text, node, rev)
1102
1118
1103 self._cache = (node, rev, text)
1119 self._cache = (node, rev, text)
1104 return text
1120 return text
1105
1121
1106 def hash(self, text, p1, p2):
1122 def hash(self, text, p1, p2):
1107 """Compute a node hash.
1123 """Compute a node hash.
1108
1124
1109 Available as a function so that subclasses can replace the hash
1125 Available as a function so that subclasses can replace the hash
1110 as needed.
1126 as needed.
1111 """
1127 """
1112 return hash(text, p1, p2)
1128 return hash(text, p1, p2)
1113
1129
1114 def _checkhash(self, text, node, rev):
1130 def _checkhash(self, text, node, rev):
1115 p1, p2 = self.parents(node)
1131 p1, p2 = self.parents(node)
1116 self.checkhash(text, p1, p2, node, rev)
1132 self.checkhash(text, p1, p2, node, rev)
1117 return text
1133 return text
1118
1134
1119 def checkhash(self, text, p1, p2, node, rev=None):
1135 def checkhash(self, text, p1, p2, node, rev=None):
1120 if node != self.hash(text, p1, p2):
1136 if node != self.hash(text, p1, p2):
1121 revornode = rev
1137 revornode = rev
1122 if revornode is None:
1138 if revornode is None:
1123 revornode = templatefilters.short(hex(node))
1139 revornode = templatefilters.short(hex(node))
1124 raise RevlogError(_("integrity check failed on %s:%s")
1140 raise RevlogError(_("integrity check failed on %s:%s")
1125 % (self.indexfile, revornode))
1141 % (self.indexfile, revornode))
1126
1142
1127 def checkinlinesize(self, tr, fp=None):
1143 def checkinlinesize(self, tr, fp=None):
1128 """Check if the revlog is too big for inline and convert if so.
1144 """Check if the revlog is too big for inline and convert if so.
1129
1145
1130 This should be called after revisions are added to the revlog. If the
1146 This should be called after revisions are added to the revlog. If the
1131 revlog has grown too large to be an inline revlog, it will convert it
1147 revlog has grown too large to be an inline revlog, it will convert it
1132 to use multiple index and data files.
1148 to use multiple index and data files.
1133 """
1149 """
1134 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1135 return
1151 return
1136
1152
1137 trinfo = tr.find(self.indexfile)
1153 trinfo = tr.find(self.indexfile)
1138 if trinfo is None:
1154 if trinfo is None:
1139 raise RevlogError(_("%s not found in the transaction")
1155 raise RevlogError(_("%s not found in the transaction")
1140 % self.indexfile)
1156 % self.indexfile)
1141
1157
1142 trindex = trinfo[2]
1158 trindex = trinfo[2]
1143 if trindex is not None:
1159 if trindex is not None:
1144 dataoff = self.start(trindex)
1160 dataoff = self.start(trindex)
1145 else:
1161 else:
1146 # revlog was stripped at start of transaction, use all leftover data
1162 # revlog was stripped at start of transaction, use all leftover data
1147 trindex = len(self) - 1
1163 trindex = len(self) - 1
1148 dataoff = self.end(-2)
1164 dataoff = self.end(-2)
1149
1165
1150 tr.add(self.datafile, dataoff)
1166 tr.add(self.datafile, dataoff)
1151
1167
1152 if fp:
1168 if fp:
1153 fp.flush()
1169 fp.flush()
1154 fp.close()
1170 fp.close()
1155
1171
1156 df = self.opener(self.datafile, 'w')
1172 df = self.opener(self.datafile, 'w')
1157 try:
1173 try:
1158 for r in self:
1174 for r in self:
1159 df.write(self._chunkraw(r, r))
1175 df.write(self._chunkraw(r, r))
1160 finally:
1176 finally:
1161 df.close()
1177 df.close()
1162
1178
1163 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1164 self.version &= ~(REVLOGNGINLINEDATA)
1180 self.version &= ~(REVLOGNGINLINEDATA)
1165 self._inline = False
1181 self._inline = False
1166 for i in self:
1182 for i in self:
1167 e = self._io.packentry(self.index[i], self.node, self.version, i)
1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1168 fp.write(e)
1184 fp.write(e)
1169
1185
1170 # if we don't call close, the temp file will never replace the
1186 # if we don't call close, the temp file will never replace the
1171 # real index
1187 # real index
1172 fp.close()
1188 fp.close()
1173
1189
1174 tr.replace(self.indexfile, trindex * self._io.size)
1190 tr.replace(self.indexfile, trindex * self._io.size)
1175 self._chunkclear()
1191 self._chunkclear()
1176
1192
1177 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1178 node=None):
1194 node=None):
1179 """add a revision to the log
1195 """add a revision to the log
1180
1196
1181 text - the revision data to add
1197 text - the revision data to add
1182 transaction - the transaction object used for rollback
1198 transaction - the transaction object used for rollback
1183 link - the linkrev data to add
1199 link - the linkrev data to add
1184 p1, p2 - the parent nodeids of the revision
1200 p1, p2 - the parent nodeids of the revision
1185 cachedelta - an optional precomputed delta
1201 cachedelta - an optional precomputed delta
1186 node - nodeid of revision; typically node is not specified, and it is
1202 node - nodeid of revision; typically node is not specified, and it is
1187 computed by default as hash(text, p1, p2), however subclasses might
1203 computed by default as hash(text, p1, p2), however subclasses might
1188 use different hashing method (and override checkhash() in such case)
1204 use different hashing method (and override checkhash() in such case)
1189 """
1205 """
1190 if link == nullrev:
1206 if link == nullrev:
1191 raise RevlogError(_("attempted to add linkrev -1 to %s")
1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1192 % self.indexfile)
1208 % self.indexfile)
1193
1209
1194 if len(text) > _maxentrysize:
1210 if len(text) > _maxentrysize:
1195 raise RevlogError(
1211 raise RevlogError(
1196 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1197 % (self.indexfile, len(text)))
1213 % (self.indexfile, len(text)))
1198
1214
1199 node = node or self.hash(text, p1, p2)
1215 node = node or self.hash(text, p1, p2)
1200 if node in self.nodemap:
1216 if node in self.nodemap:
1201 return node
1217 return node
1202
1218
1203 dfh = None
1219 dfh = None
1204 if not self._inline:
1220 if not self._inline:
1205 dfh = self.opener(self.datafile, "a")
1221 dfh = self.opener(self.datafile, "a")
1206 ifh = self.opener(self.indexfile, "a+")
1222 ifh = self.opener(self.indexfile, "a+")
1207 try:
1223 try:
1208 return self._addrevision(node, text, transaction, link, p1, p2,
1224 return self._addrevision(node, text, transaction, link, p1, p2,
1209 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1210 finally:
1226 finally:
1211 if dfh:
1227 if dfh:
1212 dfh.close()
1228 dfh.close()
1213 ifh.close()
1229 ifh.close()
1214
1230
1215 def compress(self, text):
1231 def compress(self, text):
1216 """ generate a possibly-compressed representation of text """
1232 """ generate a possibly-compressed representation of text """
1217 if not text:
1233 if not text:
1218 return ("", text)
1234 return ("", text)
1219 l = len(text)
1235 l = len(text)
1220 bin = None
1236 bin = None
1221 if l < 44:
1237 if l < 44:
1222 pass
1238 pass
1223 elif l > 1000000:
1239 elif l > 1000000:
1224 # zlib makes an internal copy, thus doubling memory usage for
1240 # zlib makes an internal copy, thus doubling memory usage for
1225 # large files, so lets do this in pieces
1241 # large files, so lets do this in pieces
1226 z = zlib.compressobj()
1242 z = zlib.compressobj()
1227 p = []
1243 p = []
1228 pos = 0
1244 pos = 0
1229 while pos < l:
1245 while pos < l:
1230 pos2 = pos + 2**20
1246 pos2 = pos + 2**20
1231 p.append(z.compress(text[pos:pos2]))
1247 p.append(z.compress(text[pos:pos2]))
1232 pos = pos2
1248 pos = pos2
1233 p.append(z.flush())
1249 p.append(z.flush())
1234 if sum(map(len, p)) < l:
1250 if sum(map(len, p)) < l:
1235 bin = "".join(p)
1251 bin = "".join(p)
1236 else:
1252 else:
1237 bin = _compress(text)
1253 bin = _compress(text)
1238 if bin is None or len(bin) > l:
1254 if bin is None or len(bin) > l:
1239 if text[0] == '\0':
1255 if text[0] == '\0':
1240 return ("", text)
1256 return ("", text)
1241 return ('u', text)
1257 return ('u', text)
1242 return ("", bin)
1258 return ("", bin)
1243
1259
1244 def _isgooddelta(self, d, textlen):
1260 def _isgooddelta(self, d, textlen):
1245 """Returns True if the given delta is good. Good means that it is within
1261 """Returns True if the given delta is good. Good means that it is within
1246 the disk span, disk size, and chain length bounds that we know to be
1262 the disk span, disk size, and chain length bounds that we know to be
1247 performant."""
1263 performant."""
1248 if d is None:
1264 if d is None:
1249 return False
1265 return False
1250
1266
1251 # - 'dist' is the distance from the base revision -- bounding it limits
1267 # - 'dist' is the distance from the base revision -- bounding it limits
1252 # the amount of I/O we need to do.
1268 # the amount of I/O we need to do.
1253 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1254 # to apply -- bounding it limits the amount of CPU we consume.
1270 # to apply -- bounding it limits the amount of CPU we consume.
1255 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1256 if (dist > textlen * 4 or l > textlen or
1272 if (dist > textlen * 4 or l > textlen or
1257 compresseddeltalen > textlen * 2 or
1273 compresseddeltalen > textlen * 2 or
1258 (self._maxchainlen and chainlen > self._maxchainlen)):
1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1259 return False
1275 return False
1260
1276
1261 return True
1277 return True
1262
1278
1263 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1264 cachedelta, ifh, dfh, alwayscache=False):
1280 cachedelta, ifh, dfh, alwayscache=False):
1265 """internal function to add revisions to the log
1281 """internal function to add revisions to the log
1266
1282
1267 see addrevision for argument descriptions.
1283 see addrevision for argument descriptions.
1268 invariants:
1284 invariants:
1269 - text is optional (can be None); if not set, cachedelta must be set.
1285 - text is optional (can be None); if not set, cachedelta must be set.
1270 if both are set, they must correspond to each other.
1286 if both are set, they must correspond to each other.
1271 """
1287 """
1272 btext = [text]
1288 btext = [text]
1273 def buildtext():
1289 def buildtext():
1274 if btext[0] is not None:
1290 if btext[0] is not None:
1275 return btext[0]
1291 return btext[0]
1276 # flush any pending writes here so we can read it in revision
1292 # flush any pending writes here so we can read it in revision
1277 if dfh:
1293 if dfh:
1278 dfh.flush()
1294 dfh.flush()
1279 ifh.flush()
1295 ifh.flush()
1280 baserev = cachedelta[0]
1296 baserev = cachedelta[0]
1281 delta = cachedelta[1]
1297 delta = cachedelta[1]
1282 # special case deltas which replace entire base; no need to decode
1298 # special case deltas which replace entire base; no need to decode
1283 # base revision. this neatly avoids censored bases, which throw when
1299 # base revision. this neatly avoids censored bases, which throw when
1284 # they're decoded.
1300 # they're decoded.
1285 hlen = struct.calcsize(">lll")
1301 hlen = struct.calcsize(">lll")
1286 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1302 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1287 len(delta) - hlen):
1303 len(delta) - hlen):
1288 btext[0] = delta[hlen:]
1304 btext[0] = delta[hlen:]
1289 else:
1305 else:
1290 basetext = self.revision(self.node(baserev))
1306 basetext = self.revision(self.node(baserev))
1291 btext[0] = mdiff.patch(basetext, delta)
1307 btext[0] = mdiff.patch(basetext, delta)
1292 try:
1308 try:
1293 self.checkhash(btext[0], p1, p2, node)
1309 self.checkhash(btext[0], p1, p2, node)
1294 if flags & REVIDX_ISCENSORED:
1310 if flags & REVIDX_ISCENSORED:
1295 raise RevlogError(_('node %s is not censored') % node)
1311 raise RevlogError(_('node %s is not censored') % node)
1296 except CensoredNodeError:
1312 except CensoredNodeError:
1297 # must pass the censored index flag to add censored revisions
1313 # must pass the censored index flag to add censored revisions
1298 if not flags & REVIDX_ISCENSORED:
1314 if not flags & REVIDX_ISCENSORED:
1299 raise
1315 raise
1300 return btext[0]
1316 return btext[0]
1301
1317
1302 def builddelta(rev):
1318 def builddelta(rev):
1303 # can we use the cached delta?
1319 # can we use the cached delta?
1304 if cachedelta and cachedelta[0] == rev:
1320 if cachedelta and cachedelta[0] == rev:
1305 delta = cachedelta[1]
1321 delta = cachedelta[1]
1306 else:
1322 else:
1307 t = buildtext()
1323 t = buildtext()
1308 if self.iscensored(rev):
1324 if self.iscensored(rev):
1309 # deltas based on a censored revision must replace the
1325 # deltas based on a censored revision must replace the
1310 # full content in one patch, so delta works everywhere
1326 # full content in one patch, so delta works everywhere
1311 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1312 delta = header + t
1328 delta = header + t
1313 else:
1329 else:
1314 ptext = self.revision(self.node(rev))
1330 ptext = self.revision(self.node(rev))
1315 delta = mdiff.textdiff(ptext, t)
1331 delta = mdiff.textdiff(ptext, t)
1316 data = self.compress(delta)
1332 data = self.compress(delta)
1317 l = len(data[1]) + len(data[0])
1333 l = len(data[1]) + len(data[0])
1318 if basecache[0] == rev:
1334 if basecache[0] == rev:
1319 chainbase = basecache[1]
1335 chainbase = basecache[1]
1320 else:
1336 else:
1321 chainbase = self.chainbase(rev)
1337 chainbase = self.chainbase(rev)
1322 dist = l + offset - self.start(chainbase)
1338 dist = l + offset - self.start(chainbase)
1323 if self._generaldelta:
1339 if self._generaldelta:
1324 base = rev
1340 base = rev
1325 else:
1341 else:
1326 base = chainbase
1342 base = chainbase
1327 chainlen, compresseddeltalen = self._chaininfo(rev)
1343 chainlen, compresseddeltalen = self._chaininfo(rev)
1328 chainlen += 1
1344 chainlen += 1
1329 compresseddeltalen += l
1345 compresseddeltalen += l
1330 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1346 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1331
1347
1332 curr = len(self)
1348 curr = len(self)
1333 prev = curr - 1
1349 prev = curr - 1
1334 base = chainbase = curr
1350 base = chainbase = curr
1335 chainlen = None
1351 chainlen = None
1336 offset = self.end(prev)
1352 offset = self.end(prev)
1337 d = None
1353 d = None
1338 if self._basecache is None:
1354 if self._basecache is None:
1339 self._basecache = (prev, self.chainbase(prev))
1355 self._basecache = (prev, self.chainbase(prev))
1340 basecache = self._basecache
1356 basecache = self._basecache
1341 p1r, p2r = self.rev(p1), self.rev(p2)
1357 p1r, p2r = self.rev(p1), self.rev(p2)
1342
1358
1343 # full versions are inserted when the needed deltas
1359 # full versions are inserted when the needed deltas
1344 # become comparable to the uncompressed text
1360 # become comparable to the uncompressed text
1345 if text is None:
1361 if text is None:
1346 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1362 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1347 cachedelta[1])
1363 cachedelta[1])
1348 else:
1364 else:
1349 textlen = len(text)
1365 textlen = len(text)
1350
1366
1351 # should we try to build a delta?
1367 # should we try to build a delta?
1352 if prev != nullrev:
1368 if prev != nullrev:
1353 if self._generaldelta:
1369 if self._generaldelta:
1354 if p2r != nullrev and self._aggressivemergedeltas:
1370 if p2r != nullrev and self._aggressivemergedeltas:
1355 d = builddelta(p1r)
1371 d = builddelta(p1r)
1356 d2 = builddelta(p2r)
1372 d2 = builddelta(p2r)
1357 p1good = self._isgooddelta(d, textlen)
1373 p1good = self._isgooddelta(d, textlen)
1358 p2good = self._isgooddelta(d2, textlen)
1374 p2good = self._isgooddelta(d2, textlen)
1359 if p1good and p2good:
1375 if p1good and p2good:
1360 # If both are good deltas, choose the smallest
1376 # If both are good deltas, choose the smallest
1361 if d2[1] < d[1]:
1377 if d2[1] < d[1]:
1362 d = d2
1378 d = d2
1363 elif p2good:
1379 elif p2good:
1364 # If only p2 is good, use it
1380 # If only p2 is good, use it
1365 d = d2
1381 d = d2
1366 elif p1good:
1382 elif p1good:
1367 pass
1383 pass
1368 else:
1384 else:
1369 # Neither is good, try against prev to hopefully save us
1385 # Neither is good, try against prev to hopefully save us
1370 # a fulltext.
1386 # a fulltext.
1371 d = builddelta(prev)
1387 d = builddelta(prev)
1372 else:
1388 else:
1373 # Pick whichever parent is closer to us (to minimize the
1389 # Pick whichever parent is closer to us (to minimize the
1374 # chance of having to build a fulltext). Since
1390 # chance of having to build a fulltext). Since
1375 # nullrev == -1, any non-merge commit will always pick p1r.
1391 # nullrev == -1, any non-merge commit will always pick p1r.
1376 drev = p2r if p2r > p1r else p1r
1392 drev = p2r if p2r > p1r else p1r
1377 d = builddelta(drev)
1393 d = builddelta(drev)
1378 # If the chosen delta will result in us making a full text,
1394 # If the chosen delta will result in us making a full text,
1379 # give it one last try against prev.
1395 # give it one last try against prev.
1380 if drev != prev and not self._isgooddelta(d, textlen):
1396 if drev != prev and not self._isgooddelta(d, textlen):
1381 d = builddelta(prev)
1397 d = builddelta(prev)
1382 else:
1398 else:
1383 d = builddelta(prev)
1399 d = builddelta(prev)
1384 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1400 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1385
1401
1386 if not self._isgooddelta(d, textlen):
1402 if not self._isgooddelta(d, textlen):
1387 text = buildtext()
1403 text = buildtext()
1388 data = self.compress(text)
1404 data = self.compress(text)
1389 l = len(data[1]) + len(data[0])
1405 l = len(data[1]) + len(data[0])
1390 base = chainbase = curr
1406 base = chainbase = curr
1391
1407
1392 e = (offset_type(offset, flags), l, textlen,
1408 e = (offset_type(offset, flags), l, textlen,
1393 base, link, p1r, p2r, node)
1409 base, link, p1r, p2r, node)
1394 self.index.insert(-1, e)
1410 self.index.insert(-1, e)
1395 self.nodemap[node] = curr
1411 self.nodemap[node] = curr
1396
1412
1397 entry = self._io.packentry(e, self.node, self.version, curr)
1413 entry = self._io.packentry(e, self.node, self.version, curr)
1398 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1414 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1399
1415
1400 if alwayscache and text is None:
1416 if alwayscache and text is None:
1401 text = buildtext()
1417 text = buildtext()
1402
1418
1403 if type(text) == str: # only accept immutable objects
1419 if type(text) == str: # only accept immutable objects
1404 self._cache = (node, curr, text)
1420 self._cache = (node, curr, text)
1405 self._basecache = (curr, chainbase)
1421 self._basecache = (curr, chainbase)
1406 return node
1422 return node
1407
1423
1408 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1424 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1409 curr = len(self) - 1
1425 curr = len(self) - 1
1410 if not self._inline:
1426 if not self._inline:
1411 transaction.add(self.datafile, offset)
1427 transaction.add(self.datafile, offset)
1412 transaction.add(self.indexfile, curr * len(entry))
1428 transaction.add(self.indexfile, curr * len(entry))
1413 if data[0]:
1429 if data[0]:
1414 dfh.write(data[0])
1430 dfh.write(data[0])
1415 dfh.write(data[1])
1431 dfh.write(data[1])
1416 dfh.flush()
1432 dfh.flush()
1417 ifh.write(entry)
1433 ifh.write(entry)
1418 else:
1434 else:
1419 offset += curr * self._io.size
1435 offset += curr * self._io.size
1420 transaction.add(self.indexfile, offset, curr)
1436 transaction.add(self.indexfile, offset, curr)
1421 ifh.write(entry)
1437 ifh.write(entry)
1422 ifh.write(data[0])
1438 ifh.write(data[0])
1423 ifh.write(data[1])
1439 ifh.write(data[1])
1424 self.checkinlinesize(transaction, ifh)
1440 self.checkinlinesize(transaction, ifh)
1425
1441
1426 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1442 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1427 """
1443 """
1428 add a delta group
1444 add a delta group
1429
1445
1430 given a set of deltas, add them to the revision log. the
1446 given a set of deltas, add them to the revision log. the
1431 first delta is against its parent, which should be in our
1447 first delta is against its parent, which should be in our
1432 log, the rest are against the previous delta.
1448 log, the rest are against the previous delta.
1433
1449
1434 If ``addrevisioncb`` is defined, it will be called with arguments of
1450 If ``addrevisioncb`` is defined, it will be called with arguments of
1435 this revlog and the node that was added.
1451 this revlog and the node that was added.
1436 """
1452 """
1437
1453
1438 # track the base of the current delta log
1454 # track the base of the current delta log
1439 content = []
1455 content = []
1440 node = None
1456 node = None
1441
1457
1442 r = len(self)
1458 r = len(self)
1443 end = 0
1459 end = 0
1444 if r:
1460 if r:
1445 end = self.end(r - 1)
1461 end = self.end(r - 1)
1446 ifh = self.opener(self.indexfile, "a+")
1462 ifh = self.opener(self.indexfile, "a+")
1447 isize = r * self._io.size
1463 isize = r * self._io.size
1448 if self._inline:
1464 if self._inline:
1449 transaction.add(self.indexfile, end + isize, r)
1465 transaction.add(self.indexfile, end + isize, r)
1450 dfh = None
1466 dfh = None
1451 else:
1467 else:
1452 transaction.add(self.indexfile, isize, r)
1468 transaction.add(self.indexfile, isize, r)
1453 transaction.add(self.datafile, end)
1469 transaction.add(self.datafile, end)
1454 dfh = self.opener(self.datafile, "a")
1470 dfh = self.opener(self.datafile, "a")
1455 def flush():
1471 def flush():
1456 if dfh:
1472 if dfh:
1457 dfh.flush()
1473 dfh.flush()
1458 ifh.flush()
1474 ifh.flush()
1459 try:
1475 try:
1460 # loop through our set of deltas
1476 # loop through our set of deltas
1461 chain = None
1477 chain = None
1462 while True:
1478 while True:
1463 chunkdata = bundle.deltachunk(chain)
1479 chunkdata = bundle.deltachunk(chain)
1464 if not chunkdata:
1480 if not chunkdata:
1465 break
1481 break
1466 node = chunkdata['node']
1482 node = chunkdata['node']
1467 p1 = chunkdata['p1']
1483 p1 = chunkdata['p1']
1468 p2 = chunkdata['p2']
1484 p2 = chunkdata['p2']
1469 cs = chunkdata['cs']
1485 cs = chunkdata['cs']
1470 deltabase = chunkdata['deltabase']
1486 deltabase = chunkdata['deltabase']
1471 delta = chunkdata['delta']
1487 delta = chunkdata['delta']
1472
1488
1473 content.append(node)
1489 content.append(node)
1474
1490
1475 link = linkmapper(cs)
1491 link = linkmapper(cs)
1476 if node in self.nodemap:
1492 if node in self.nodemap:
1477 # this can happen if two branches make the same change
1493 # this can happen if two branches make the same change
1478 chain = node
1494 chain = node
1479 continue
1495 continue
1480
1496
1481 for p in (p1, p2):
1497 for p in (p1, p2):
1482 if p not in self.nodemap:
1498 if p not in self.nodemap:
1483 raise LookupError(p, self.indexfile,
1499 raise LookupError(p, self.indexfile,
1484 _('unknown parent'))
1500 _('unknown parent'))
1485
1501
1486 if deltabase not in self.nodemap:
1502 if deltabase not in self.nodemap:
1487 raise LookupError(deltabase, self.indexfile,
1503 raise LookupError(deltabase, self.indexfile,
1488 _('unknown delta base'))
1504 _('unknown delta base'))
1489
1505
1490 baserev = self.rev(deltabase)
1506 baserev = self.rev(deltabase)
1491
1507
1492 if baserev != nullrev and self.iscensored(baserev):
1508 if baserev != nullrev and self.iscensored(baserev):
1493 # if base is censored, delta must be full replacement in a
1509 # if base is censored, delta must be full replacement in a
1494 # single patch operation
1510 # single patch operation
1495 hlen = struct.calcsize(">lll")
1511 hlen = struct.calcsize(">lll")
1496 oldlen = self.rawsize(baserev)
1512 oldlen = self.rawsize(baserev)
1497 newlen = len(delta) - hlen
1513 newlen = len(delta) - hlen
1498 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1514 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1499 raise error.CensoredBaseError(self.indexfile,
1515 raise error.CensoredBaseError(self.indexfile,
1500 self.node(baserev))
1516 self.node(baserev))
1501
1517
1502 flags = REVIDX_DEFAULT_FLAGS
1518 flags = REVIDX_DEFAULT_FLAGS
1503 if self._peek_iscensored(baserev, delta, flush):
1519 if self._peek_iscensored(baserev, delta, flush):
1504 flags |= REVIDX_ISCENSORED
1520 flags |= REVIDX_ISCENSORED
1505
1521
1506 # We assume consumers of addrevisioncb will want to retrieve
1522 # We assume consumers of addrevisioncb will want to retrieve
1507 # the added revision, which will require a call to
1523 # the added revision, which will require a call to
1508 # revision(). revision() will fast path if there is a cache
1524 # revision(). revision() will fast path if there is a cache
1509 # hit. So, we tell _addrevision() to always cache in this case.
1525 # hit. So, we tell _addrevision() to always cache in this case.
1510 chain = self._addrevision(node, None, transaction, link,
1526 chain = self._addrevision(node, None, transaction, link,
1511 p1, p2, flags, (baserev, delta),
1527 p1, p2, flags, (baserev, delta),
1512 ifh, dfh,
1528 ifh, dfh,
1513 alwayscache=bool(addrevisioncb))
1529 alwayscache=bool(addrevisioncb))
1514
1530
1515 if addrevisioncb:
1531 if addrevisioncb:
1516 addrevisioncb(self, chain)
1532 addrevisioncb(self, chain)
1517
1533
1518 if not dfh and not self._inline:
1534 if not dfh and not self._inline:
1519 # addrevision switched from inline to conventional
1535 # addrevision switched from inline to conventional
1520 # reopen the index
1536 # reopen the index
1521 ifh.close()
1537 ifh.close()
1522 dfh = self.opener(self.datafile, "a")
1538 dfh = self.opener(self.datafile, "a")
1523 ifh = self.opener(self.indexfile, "a")
1539 ifh = self.opener(self.indexfile, "a")
1524 finally:
1540 finally:
1525 if dfh:
1541 if dfh:
1526 dfh.close()
1542 dfh.close()
1527 ifh.close()
1543 ifh.close()
1528
1544
1529 return content
1545 return content
1530
1546
1531 def iscensored(self, rev):
1547 def iscensored(self, rev):
1532 """Check if a file revision is censored."""
1548 """Check if a file revision is censored."""
1533 return False
1549 return False
1534
1550
1535 def _peek_iscensored(self, baserev, delta, flush):
1551 def _peek_iscensored(self, baserev, delta, flush):
1536 """Quickly check if a delta produces a censored revision."""
1552 """Quickly check if a delta produces a censored revision."""
1537 return False
1553 return False
1538
1554
1539 def getstrippoint(self, minlink):
1555 def getstrippoint(self, minlink):
1540 """find the minimum rev that must be stripped to strip the linkrev
1556 """find the minimum rev that must be stripped to strip the linkrev
1541
1557
1542 Returns a tuple containing the minimum rev and a set of all revs that
1558 Returns a tuple containing the minimum rev and a set of all revs that
1543 have linkrevs that will be broken by this strip.
1559 have linkrevs that will be broken by this strip.
1544 """
1560 """
1545 brokenrevs = set()
1561 brokenrevs = set()
1546 strippoint = len(self)
1562 strippoint = len(self)
1547
1563
1548 heads = {}
1564 heads = {}
1549 futurelargelinkrevs = set()
1565 futurelargelinkrevs = set()
1550 for head in self.headrevs():
1566 for head in self.headrevs():
1551 headlinkrev = self.linkrev(head)
1567 headlinkrev = self.linkrev(head)
1552 heads[head] = headlinkrev
1568 heads[head] = headlinkrev
1553 if headlinkrev >= minlink:
1569 if headlinkrev >= minlink:
1554 futurelargelinkrevs.add(headlinkrev)
1570 futurelargelinkrevs.add(headlinkrev)
1555
1571
1556 # This algorithm involves walking down the rev graph, starting at the
1572 # This algorithm involves walking down the rev graph, starting at the
1557 # heads. Since the revs are topologically sorted according to linkrev,
1573 # heads. Since the revs are topologically sorted according to linkrev,
1558 # once all head linkrevs are below the minlink, we know there are
1574 # once all head linkrevs are below the minlink, we know there are
1559 # no more revs that could have a linkrev greater than minlink.
1575 # no more revs that could have a linkrev greater than minlink.
1560 # So we can stop walking.
1576 # So we can stop walking.
1561 while futurelargelinkrevs:
1577 while futurelargelinkrevs:
1562 strippoint -= 1
1578 strippoint -= 1
1563 linkrev = heads.pop(strippoint)
1579 linkrev = heads.pop(strippoint)
1564
1580
1565 if linkrev < minlink:
1581 if linkrev < minlink:
1566 brokenrevs.add(strippoint)
1582 brokenrevs.add(strippoint)
1567 else:
1583 else:
1568 futurelargelinkrevs.remove(linkrev)
1584 futurelargelinkrevs.remove(linkrev)
1569
1585
1570 for p in self.parentrevs(strippoint):
1586 for p in self.parentrevs(strippoint):
1571 if p != nullrev:
1587 if p != nullrev:
1572 plinkrev = self.linkrev(p)
1588 plinkrev = self.linkrev(p)
1573 heads[p] = plinkrev
1589 heads[p] = plinkrev
1574 if plinkrev >= minlink:
1590 if plinkrev >= minlink:
1575 futurelargelinkrevs.add(plinkrev)
1591 futurelargelinkrevs.add(plinkrev)
1576
1592
1577 return strippoint, brokenrevs
1593 return strippoint, brokenrevs
1578
1594
1579 def strip(self, minlink, transaction):
1595 def strip(self, minlink, transaction):
1580 """truncate the revlog on the first revision with a linkrev >= minlink
1596 """truncate the revlog on the first revision with a linkrev >= minlink
1581
1597
1582 This function is called when we're stripping revision minlink and
1598 This function is called when we're stripping revision minlink and
1583 its descendants from the repository.
1599 its descendants from the repository.
1584
1600
1585 We have to remove all revisions with linkrev >= minlink, because
1601 We have to remove all revisions with linkrev >= minlink, because
1586 the equivalent changelog revisions will be renumbered after the
1602 the equivalent changelog revisions will be renumbered after the
1587 strip.
1603 strip.
1588
1604
1589 So we truncate the revlog on the first of these revisions, and
1605 So we truncate the revlog on the first of these revisions, and
1590 trust that the caller has saved the revisions that shouldn't be
1606 trust that the caller has saved the revisions that shouldn't be
1591 removed and that it'll re-add them after this truncation.
1607 removed and that it'll re-add them after this truncation.
1592 """
1608 """
1593 if len(self) == 0:
1609 if len(self) == 0:
1594 return
1610 return
1595
1611
1596 rev, _ = self.getstrippoint(minlink)
1612 rev, _ = self.getstrippoint(minlink)
1597 if rev == len(self):
1613 if rev == len(self):
1598 return
1614 return
1599
1615
1600 # first truncate the files on disk
1616 # first truncate the files on disk
1601 end = self.start(rev)
1617 end = self.start(rev)
1602 if not self._inline:
1618 if not self._inline:
1603 transaction.add(self.datafile, end)
1619 transaction.add(self.datafile, end)
1604 end = rev * self._io.size
1620 end = rev * self._io.size
1605 else:
1621 else:
1606 end += rev * self._io.size
1622 end += rev * self._io.size
1607
1623
1608 transaction.add(self.indexfile, end)
1624 transaction.add(self.indexfile, end)
1609
1625
1610 # then reset internal state in memory to forget those revisions
1626 # then reset internal state in memory to forget those revisions
1611 self._cache = None
1627 self._cache = None
1612 self._chaininfocache = {}
1628 self._chaininfocache = {}
1613 self._chunkclear()
1629 self._chunkclear()
1614 for x in xrange(rev, len(self)):
1630 for x in xrange(rev, len(self)):
1615 del self.nodemap[self.node(x)]
1631 del self.nodemap[self.node(x)]
1616
1632
1617 del self.index[rev:-1]
1633 del self.index[rev:-1]
1618
1634
1619 def checksize(self):
1635 def checksize(self):
1620 expected = 0
1636 expected = 0
1621 if len(self):
1637 if len(self):
1622 expected = max(0, self.end(len(self) - 1))
1638 expected = max(0, self.end(len(self) - 1))
1623
1639
1624 try:
1640 try:
1625 f = self.opener(self.datafile)
1641 f = self.opener(self.datafile)
1626 f.seek(0, 2)
1642 f.seek(0, 2)
1627 actual = f.tell()
1643 actual = f.tell()
1628 f.close()
1644 f.close()
1629 dd = actual - expected
1645 dd = actual - expected
1630 except IOError as inst:
1646 except IOError as inst:
1631 if inst.errno != errno.ENOENT:
1647 if inst.errno != errno.ENOENT:
1632 raise
1648 raise
1633 dd = 0
1649 dd = 0
1634
1650
1635 try:
1651 try:
1636 f = self.opener(self.indexfile)
1652 f = self.opener(self.indexfile)
1637 f.seek(0, 2)
1653 f.seek(0, 2)
1638 actual = f.tell()
1654 actual = f.tell()
1639 f.close()
1655 f.close()
1640 s = self._io.size
1656 s = self._io.size
1641 i = max(0, actual // s)
1657 i = max(0, actual // s)
1642 di = actual - (i * s)
1658 di = actual - (i * s)
1643 if self._inline:
1659 if self._inline:
1644 databytes = 0
1660 databytes = 0
1645 for r in self:
1661 for r in self:
1646 databytes += max(0, self.length(r))
1662 databytes += max(0, self.length(r))
1647 dd = 0
1663 dd = 0
1648 di = actual - len(self) * s - databytes
1664 di = actual - len(self) * s - databytes
1649 except IOError as inst:
1665 except IOError as inst:
1650 if inst.errno != errno.ENOENT:
1666 if inst.errno != errno.ENOENT:
1651 raise
1667 raise
1652 di = 0
1668 di = 0
1653
1669
1654 return (dd, di)
1670 return (dd, di)
1655
1671
1656 def files(self):
1672 def files(self):
1657 res = [self.indexfile]
1673 res = [self.indexfile]
1658 if not self._inline:
1674 if not self._inline:
1659 res.append(self.datafile)
1675 res.append(self.datafile)
1660 return res
1676 return res
General Comments 0
You need to be logged in to leave comments. Login now