##// END OF EJS Templates
revlog: use existing file handle when reading during _addrevision...
Gregory Szorc -
r26379:39d64325 default
parent child Browse files
Show More
@@ -1,1676 +1,1680 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 import collections
15 import collections
16 from node import bin, hex, nullid, nullrev
16 from node import bin, hex, nullid, nullrev
17 from i18n import _
17 from i18n import _
18 import ancestor, mdiff, parsers, error, util, templatefilters
18 import ancestor, mdiff, parsers, error, util, templatefilters
19 import struct, zlib, errno
19 import struct, zlib, errno
20
20
21 _pack = struct.pack
21 _pack = struct.pack
22 _unpack = struct.unpack
22 _unpack = struct.unpack
23 _compress = zlib.compress
23 _compress = zlib.compress
24 _decompress = zlib.decompress
24 _decompress = zlib.decompress
25 _sha = util.sha1
25 _sha = util.sha1
26
26
27 # revlog header flags
27 # revlog header flags
28 REVLOGV0 = 0
28 REVLOGV0 = 0
29 REVLOGNG = 1
29 REVLOGNG = 1
30 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGGENERALDELTA = (1 << 17)
31 REVLOGGENERALDELTA = (1 << 17)
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36
36
37 # revlog index flags
37 # revlog index flags
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41
41
42 # max size of revlog with inline data
42 # max size of revlog with inline data
43 _maxinline = 131072
43 _maxinline = 131072
44 _chunksize = 1048576
44 _chunksize = 1048576
45
45
46 RevlogError = error.RevlogError
46 RevlogError = error.RevlogError
47 LookupError = error.LookupError
47 LookupError = error.LookupError
48 CensoredNodeError = error.CensoredNodeError
48 CensoredNodeError = error.CensoredNodeError
49
49
50 def getoffset(q):
50 def getoffset(q):
51 return int(q >> 16)
51 return int(q >> 16)
52
52
53 def gettype(q):
53 def gettype(q):
54 return int(q & 0xFFFF)
54 return int(q & 0xFFFF)
55
55
56 def offset_type(offset, type):
56 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
57 return long(long(offset) << 16 | type)
58
58
59 _nullhash = _sha(nullid)
59 _nullhash = _sha(nullid)
60
60
61 def hash(text, p1, p2):
61 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
62 """generate a hash from the given text and its parent hashes
63
63
64 This hash combines both the current file contents and its history
64 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
65 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
66 content in the revision graph.
67 """
67 """
68 # As of now, if one of the parent node is null, p2 is null
68 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
69 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
70 # deep copy of a hash is faster than creating one
71 s = _nullhash.copy()
71 s = _nullhash.copy()
72 s.update(p1)
72 s.update(p1)
73 else:
73 else:
74 # none of the parent nodes are nullid
74 # none of the parent nodes are nullid
75 l = [p1, p2]
75 l = [p1, p2]
76 l.sort()
76 l.sort()
77 s = _sha(l[0])
77 s = _sha(l[0])
78 s.update(l[1])
78 s.update(l[1])
79 s.update(text)
79 s.update(text)
80 return s.digest()
80 return s.digest()
81
81
82 def decompress(bin):
82 def decompress(bin):
83 """ decompress the given input """
83 """ decompress the given input """
84 if not bin:
84 if not bin:
85 return bin
85 return bin
86 t = bin[0]
86 t = bin[0]
87 if t == '\0':
87 if t == '\0':
88 return bin
88 return bin
89 if t == 'x':
89 if t == 'x':
90 try:
90 try:
91 return _decompress(bin)
91 return _decompress(bin)
92 except zlib.error as e:
92 except zlib.error as e:
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 if t == 'u':
94 if t == 'u':
95 return bin[1:]
95 return bin[1:]
96 raise RevlogError(_("unknown compression type %r") % t)
96 raise RevlogError(_("unknown compression type %r") % t)
97
97
98 # index v0:
98 # index v0:
99 # 4 bytes: offset
99 # 4 bytes: offset
100 # 4 bytes: compressed length
100 # 4 bytes: compressed length
101 # 4 bytes: base rev
101 # 4 bytes: base rev
102 # 4 bytes: link rev
102 # 4 bytes: link rev
103 # 20 bytes: parent 1 nodeid
103 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 2 nodeid
104 # 20 bytes: parent 2 nodeid
105 # 20 bytes: nodeid
105 # 20 bytes: nodeid
106 indexformatv0 = ">4l20s20s20s"
106 indexformatv0 = ">4l20s20s20s"
107
107
108 class revlogoldio(object):
108 class revlogoldio(object):
109 def __init__(self):
109 def __init__(self):
110 self.size = struct.calcsize(indexformatv0)
110 self.size = struct.calcsize(indexformatv0)
111
111
112 def parseindex(self, data, inline):
112 def parseindex(self, data, inline):
113 s = self.size
113 s = self.size
114 index = []
114 index = []
115 nodemap = {nullid: nullrev}
115 nodemap = {nullid: nullrev}
116 n = off = 0
116 n = off = 0
117 l = len(data)
117 l = len(data)
118 while off + s <= l:
118 while off + s <= l:
119 cur = data[off:off + s]
119 cur = data[off:off + s]
120 off += s
120 off += s
121 e = _unpack(indexformatv0, cur)
121 e = _unpack(indexformatv0, cur)
122 # transform to revlogv1 format
122 # transform to revlogv1 format
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 index.append(e2)
125 index.append(e2)
126 nodemap[e[6]] = n
126 nodemap[e[6]] = n
127 n += 1
127 n += 1
128
128
129 # add the magic null revision at -1
129 # add the magic null revision at -1
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131
131
132 return index, nodemap, None
132 return index, nodemap, None
133
133
134 def packentry(self, entry, node, version, rev):
134 def packentry(self, entry, node, version, rev):
135 if gettype(entry[0]):
135 if gettype(entry[0]):
136 raise RevlogError(_("index entry flags need RevlogNG"))
136 raise RevlogError(_("index entry flags need RevlogNG"))
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 node(entry[5]), node(entry[6]), entry[7])
138 node(entry[5]), node(entry[6]), entry[7])
139 return _pack(indexformatv0, *e2)
139 return _pack(indexformatv0, *e2)
140
140
141 # index ng:
141 # index ng:
142 # 6 bytes: offset
142 # 6 bytes: offset
143 # 2 bytes: flags
143 # 2 bytes: flags
144 # 4 bytes: compressed length
144 # 4 bytes: compressed length
145 # 4 bytes: uncompressed length
145 # 4 bytes: uncompressed length
146 # 4 bytes: base rev
146 # 4 bytes: base rev
147 # 4 bytes: link rev
147 # 4 bytes: link rev
148 # 4 bytes: parent 1 rev
148 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 2 rev
149 # 4 bytes: parent 2 rev
150 # 32 bytes: nodeid
150 # 32 bytes: nodeid
151 indexformatng = ">Qiiiiii20s12x"
151 indexformatng = ">Qiiiiii20s12x"
152 versionformat = ">I"
152 versionformat = ">I"
153
153
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # signed integer)
155 # signed integer)
156 _maxentrysize = 0x7fffffff
156 _maxentrysize = 0x7fffffff
157
157
158 class revlogio(object):
158 class revlogio(object):
159 def __init__(self):
159 def __init__(self):
160 self.size = struct.calcsize(indexformatng)
160 self.size = struct.calcsize(indexformatng)
161
161
162 def parseindex(self, data, inline):
162 def parseindex(self, data, inline):
163 # call the C implementation to parse the index data
163 # call the C implementation to parse the index data
164 index, cache = parsers.parse_index2(data, inline)
164 index, cache = parsers.parse_index2(data, inline)
165 return index, getattr(index, 'nodemap', None), cache
165 return index, getattr(index, 'nodemap', None), cache
166
166
167 def packentry(self, entry, node, version, rev):
167 def packentry(self, entry, node, version, rev):
168 p = _pack(indexformatng, *entry)
168 p = _pack(indexformatng, *entry)
169 if rev == 0:
169 if rev == 0:
170 p = _pack(versionformat, version) + p[4:]
170 p = _pack(versionformat, version) + p[4:]
171 return p
171 return p
172
172
173 class revlog(object):
173 class revlog(object):
174 """
174 """
175 the underlying revision storage object
175 the underlying revision storage object
176
176
177 A revlog consists of two parts, an index and the revision data.
177 A revlog consists of two parts, an index and the revision data.
178
178
179 The index is a file with a fixed record size containing
179 The index is a file with a fixed record size containing
180 information on each revision, including its nodeid (hash), the
180 information on each revision, including its nodeid (hash), the
181 nodeids of its parents, the position and offset of its data within
181 nodeids of its parents, the position and offset of its data within
182 the data file, and the revision it's based on. Finally, each entry
182 the data file, and the revision it's based on. Finally, each entry
183 contains a linkrev entry that can serve as a pointer to external
183 contains a linkrev entry that can serve as a pointer to external
184 data.
184 data.
185
185
186 The revision data itself is a linear collection of data chunks.
186 The revision data itself is a linear collection of data chunks.
187 Each chunk represents a revision and is usually represented as a
187 Each chunk represents a revision and is usually represented as a
188 delta against the previous chunk. To bound lookup time, runs of
188 delta against the previous chunk. To bound lookup time, runs of
189 deltas are limited to about 2 times the length of the original
189 deltas are limited to about 2 times the length of the original
190 version data. This makes retrieval of a version proportional to
190 version data. This makes retrieval of a version proportional to
191 its size, or O(1) relative to the number of revisions.
191 its size, or O(1) relative to the number of revisions.
192
192
193 Both pieces of the revlog are written to in an append-only
193 Both pieces of the revlog are written to in an append-only
194 fashion, which means we never need to rewrite a file to insert or
194 fashion, which means we never need to rewrite a file to insert or
195 remove data, and can use some simple techniques to avoid the need
195 remove data, and can use some simple techniques to avoid the need
196 for locking while reading.
196 for locking while reading.
197 """
197 """
198 def __init__(self, opener, indexfile):
198 def __init__(self, opener, indexfile):
199 """
199 """
200 create a revlog object
200 create a revlog object
201
201
202 opener is a function that abstracts the file opening operation
202 opener is a function that abstracts the file opening operation
203 and can be used to implement COW semantics or the like.
203 and can be used to implement COW semantics or the like.
204 """
204 """
205 self.indexfile = indexfile
205 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d"
206 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener
207 self.opener = opener
208 self._cache = None
208 self._cache = None
209 self._basecache = None
209 self._basecache = None
210 self._chunkcache = (0, '')
210 self._chunkcache = (0, '')
211 self._chunkcachesize = 65536
211 self._chunkcachesize = 65536
212 self._maxchainlen = None
212 self._maxchainlen = None
213 self._aggressivemergedeltas = False
213 self._aggressivemergedeltas = False
214 self.index = []
214 self.index = []
215 self._pcache = {}
215 self._pcache = {}
216 self._nodecache = {nullid: nullrev}
216 self._nodecache = {nullid: nullrev}
217 self._nodepos = None
217 self._nodepos = None
218
218
219 v = REVLOG_DEFAULT_VERSION
219 v = REVLOG_DEFAULT_VERSION
220 opts = getattr(opener, 'options', None)
220 opts = getattr(opener, 'options', None)
221 if opts is not None:
221 if opts is not None:
222 if 'revlogv1' in opts:
222 if 'revlogv1' in opts:
223 if 'generaldelta' in opts:
223 if 'generaldelta' in opts:
224 v |= REVLOGGENERALDELTA
224 v |= REVLOGGENERALDELTA
225 else:
225 else:
226 v = 0
226 v = 0
227 if 'chunkcachesize' in opts:
227 if 'chunkcachesize' in opts:
228 self._chunkcachesize = opts['chunkcachesize']
228 self._chunkcachesize = opts['chunkcachesize']
229 if 'maxchainlen' in opts:
229 if 'maxchainlen' in opts:
230 self._maxchainlen = opts['maxchainlen']
230 self._maxchainlen = opts['maxchainlen']
231 if 'aggressivemergedeltas' in opts:
231 if 'aggressivemergedeltas' in opts:
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233
233
234 if self._chunkcachesize <= 0:
234 if self._chunkcachesize <= 0:
235 raise RevlogError(_('revlog chunk cache size %r is not greater '
235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 'than 0') % self._chunkcachesize)
236 'than 0') % self._chunkcachesize)
237 elif self._chunkcachesize & (self._chunkcachesize - 1):
237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 raise RevlogError(_('revlog chunk cache size %r is not a power '
238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 'of 2') % self._chunkcachesize)
239 'of 2') % self._chunkcachesize)
240
240
241 indexdata = ''
241 indexdata = ''
242 self._initempty = True
242 self._initempty = True
243 try:
243 try:
244 f = self.opener(self.indexfile)
244 f = self.opener(self.indexfile)
245 indexdata = f.read()
245 indexdata = f.read()
246 f.close()
246 f.close()
247 if len(indexdata) > 0:
247 if len(indexdata) > 0:
248 v = struct.unpack(versionformat, indexdata[:4])[0]
248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 self._initempty = False
249 self._initempty = False
250 except IOError as inst:
250 except IOError as inst:
251 if inst.errno != errno.ENOENT:
251 if inst.errno != errno.ENOENT:
252 raise
252 raise
253
253
254 self.version = v
254 self.version = v
255 self._inline = v & REVLOGNGINLINEDATA
255 self._inline = v & REVLOGNGINLINEDATA
256 self._generaldelta = v & REVLOGGENERALDELTA
256 self._generaldelta = v & REVLOGGENERALDELTA
257 flags = v & ~0xFFFF
257 flags = v & ~0xFFFF
258 fmt = v & 0xFFFF
258 fmt = v & 0xFFFF
259 if fmt == REVLOGV0 and flags:
259 if fmt == REVLOGV0 and flags:
260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 % (self.indexfile, flags >> 16))
261 % (self.indexfile, flags >> 16))
262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 % (self.indexfile, flags >> 16))
264 % (self.indexfile, flags >> 16))
265 elif fmt > REVLOGNG:
265 elif fmt > REVLOGNG:
266 raise RevlogError(_("index %s unknown format %d")
266 raise RevlogError(_("index %s unknown format %d")
267 % (self.indexfile, fmt))
267 % (self.indexfile, fmt))
268
268
269 self._io = revlogio()
269 self._io = revlogio()
270 if self.version == REVLOGV0:
270 if self.version == REVLOGV0:
271 self._io = revlogoldio()
271 self._io = revlogoldio()
272 try:
272 try:
273 d = self._io.parseindex(indexdata, self._inline)
273 d = self._io.parseindex(indexdata, self._inline)
274 except (ValueError, IndexError):
274 except (ValueError, IndexError):
275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 self.index, nodemap, self._chunkcache = d
276 self.index, nodemap, self._chunkcache = d
277 if nodemap is not None:
277 if nodemap is not None:
278 self.nodemap = self._nodecache = nodemap
278 self.nodemap = self._nodecache = nodemap
279 if not self._chunkcache:
279 if not self._chunkcache:
280 self._chunkclear()
280 self._chunkclear()
281 # revnum -> (chain-length, sum-delta-length)
281 # revnum -> (chain-length, sum-delta-length)
282 self._chaininfocache = {}
282 self._chaininfocache = {}
283
283
284 def tip(self):
284 def tip(self):
285 return self.node(len(self.index) - 2)
285 return self.node(len(self.index) - 2)
286 def __contains__(self, rev):
286 def __contains__(self, rev):
287 return 0 <= rev < len(self)
287 return 0 <= rev < len(self)
288 def __len__(self):
288 def __len__(self):
289 return len(self.index) - 1
289 return len(self.index) - 1
290 def __iter__(self):
290 def __iter__(self):
291 return iter(xrange(len(self)))
291 return iter(xrange(len(self)))
292 def revs(self, start=0, stop=None):
292 def revs(self, start=0, stop=None):
293 """iterate over all rev in this revlog (from start to stop)"""
293 """iterate over all rev in this revlog (from start to stop)"""
294 step = 1
294 step = 1
295 if stop is not None:
295 if stop is not None:
296 if start > stop:
296 if start > stop:
297 step = -1
297 step = -1
298 stop += step
298 stop += step
299 else:
299 else:
300 stop = len(self)
300 stop = len(self)
301 return xrange(start, stop, step)
301 return xrange(start, stop, step)
302
302
303 @util.propertycache
303 @util.propertycache
304 def nodemap(self):
304 def nodemap(self):
305 self.rev(self.node(0))
305 self.rev(self.node(0))
306 return self._nodecache
306 return self._nodecache
307
307
308 def hasnode(self, node):
308 def hasnode(self, node):
309 try:
309 try:
310 self.rev(node)
310 self.rev(node)
311 return True
311 return True
312 except KeyError:
312 except KeyError:
313 return False
313 return False
314
314
315 def clearcaches(self):
315 def clearcaches(self):
316 try:
316 try:
317 self._nodecache.clearcaches()
317 self._nodecache.clearcaches()
318 except AttributeError:
318 except AttributeError:
319 self._nodecache = {nullid: nullrev}
319 self._nodecache = {nullid: nullrev}
320 self._nodepos = None
320 self._nodepos = None
321
321
322 def rev(self, node):
322 def rev(self, node):
323 try:
323 try:
324 return self._nodecache[node]
324 return self._nodecache[node]
325 except TypeError:
325 except TypeError:
326 raise
326 raise
327 except RevlogError:
327 except RevlogError:
328 # parsers.c radix tree lookup failed
328 # parsers.c radix tree lookup failed
329 raise LookupError(node, self.indexfile, _('no node'))
329 raise LookupError(node, self.indexfile, _('no node'))
330 except KeyError:
330 except KeyError:
331 # pure python cache lookup failed
331 # pure python cache lookup failed
332 n = self._nodecache
332 n = self._nodecache
333 i = self.index
333 i = self.index
334 p = self._nodepos
334 p = self._nodepos
335 if p is None:
335 if p is None:
336 p = len(i) - 2
336 p = len(i) - 2
337 for r in xrange(p, -1, -1):
337 for r in xrange(p, -1, -1):
338 v = i[r][7]
338 v = i[r][7]
339 n[v] = r
339 n[v] = r
340 if v == node:
340 if v == node:
341 self._nodepos = r - 1
341 self._nodepos = r - 1
342 return r
342 return r
343 raise LookupError(node, self.indexfile, _('no node'))
343 raise LookupError(node, self.indexfile, _('no node'))
344
344
345 def node(self, rev):
345 def node(self, rev):
346 return self.index[rev][7]
346 return self.index[rev][7]
347 def linkrev(self, rev):
347 def linkrev(self, rev):
348 return self.index[rev][4]
348 return self.index[rev][4]
349 def parents(self, node):
349 def parents(self, node):
350 i = self.index
350 i = self.index
351 d = i[self.rev(node)]
351 d = i[self.rev(node)]
352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 def parentrevs(self, rev):
353 def parentrevs(self, rev):
354 return self.index[rev][5:7]
354 return self.index[rev][5:7]
355 def start(self, rev):
355 def start(self, rev):
356 return int(self.index[rev][0] >> 16)
356 return int(self.index[rev][0] >> 16)
357 def end(self, rev):
357 def end(self, rev):
358 return self.start(rev) + self.length(rev)
358 return self.start(rev) + self.length(rev)
359 def length(self, rev):
359 def length(self, rev):
360 return self.index[rev][1]
360 return self.index[rev][1]
361 def chainbase(self, rev):
361 def chainbase(self, rev):
362 index = self.index
362 index = self.index
363 base = index[rev][3]
363 base = index[rev][3]
364 while base != rev:
364 while base != rev:
365 rev = base
365 rev = base
366 base = index[rev][3]
366 base = index[rev][3]
367 return base
367 return base
368 def chainlen(self, rev):
368 def chainlen(self, rev):
369 return self._chaininfo(rev)[0]
369 return self._chaininfo(rev)[0]
370
370
371 def _chaininfo(self, rev):
371 def _chaininfo(self, rev):
372 chaininfocache = self._chaininfocache
372 chaininfocache = self._chaininfocache
373 if rev in chaininfocache:
373 if rev in chaininfocache:
374 return chaininfocache[rev]
374 return chaininfocache[rev]
375 index = self.index
375 index = self.index
376 generaldelta = self._generaldelta
376 generaldelta = self._generaldelta
377 iterrev = rev
377 iterrev = rev
378 e = index[iterrev]
378 e = index[iterrev]
379 clen = 0
379 clen = 0
380 compresseddeltalen = 0
380 compresseddeltalen = 0
381 while iterrev != e[3]:
381 while iterrev != e[3]:
382 clen += 1
382 clen += 1
383 compresseddeltalen += e[1]
383 compresseddeltalen += e[1]
384 if generaldelta:
384 if generaldelta:
385 iterrev = e[3]
385 iterrev = e[3]
386 else:
386 else:
387 iterrev -= 1
387 iterrev -= 1
388 if iterrev in chaininfocache:
388 if iterrev in chaininfocache:
389 t = chaininfocache[iterrev]
389 t = chaininfocache[iterrev]
390 clen += t[0]
390 clen += t[0]
391 compresseddeltalen += t[1]
391 compresseddeltalen += t[1]
392 break
392 break
393 e = index[iterrev]
393 e = index[iterrev]
394 else:
394 else:
395 # Add text length of base since decompressing that also takes
395 # Add text length of base since decompressing that also takes
396 # work. For cache hits the length is already included.
396 # work. For cache hits the length is already included.
397 compresseddeltalen += e[1]
397 compresseddeltalen += e[1]
398 r = (clen, compresseddeltalen)
398 r = (clen, compresseddeltalen)
399 chaininfocache[rev] = r
399 chaininfocache[rev] = r
400 return r
400 return r
401
401
402 def flags(self, rev):
402 def flags(self, rev):
403 return self.index[rev][0] & 0xFFFF
403 return self.index[rev][0] & 0xFFFF
404 def rawsize(self, rev):
404 def rawsize(self, rev):
405 """return the length of the uncompressed text for a given revision"""
405 """return the length of the uncompressed text for a given revision"""
406 l = self.index[rev][2]
406 l = self.index[rev][2]
407 if l >= 0:
407 if l >= 0:
408 return l
408 return l
409
409
410 t = self.revision(self.node(rev))
410 t = self.revision(self.node(rev))
411 return len(t)
411 return len(t)
412 size = rawsize
412 size = rawsize
413
413
414 def ancestors(self, revs, stoprev=0, inclusive=False):
414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 """Generate the ancestors of 'revs' in reverse topological order.
415 """Generate the ancestors of 'revs' in reverse topological order.
416 Does not generate revs lower than stoprev.
416 Does not generate revs lower than stoprev.
417
417
418 See the documentation for ancestor.lazyancestors for more details."""
418 See the documentation for ancestor.lazyancestors for more details."""
419
419
420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 inclusive=inclusive)
421 inclusive=inclusive)
422
422
423 def descendants(self, revs):
423 def descendants(self, revs):
424 """Generate the descendants of 'revs' in revision order.
424 """Generate the descendants of 'revs' in revision order.
425
425
426 Yield a sequence of revision numbers starting with a child of
426 Yield a sequence of revision numbers starting with a child of
427 some rev in revs, i.e., each revision is *not* considered a
427 some rev in revs, i.e., each revision is *not* considered a
428 descendant of itself. Results are ordered by revision number (a
428 descendant of itself. Results are ordered by revision number (a
429 topological sort)."""
429 topological sort)."""
430 first = min(revs)
430 first = min(revs)
431 if first == nullrev:
431 if first == nullrev:
432 for i in self:
432 for i in self:
433 yield i
433 yield i
434 return
434 return
435
435
436 seen = set(revs)
436 seen = set(revs)
437 for i in self.revs(start=first + 1):
437 for i in self.revs(start=first + 1):
438 for x in self.parentrevs(i):
438 for x in self.parentrevs(i):
439 if x != nullrev and x in seen:
439 if x != nullrev and x in seen:
440 seen.add(i)
440 seen.add(i)
441 yield i
441 yield i
442 break
442 break
443
443
444 def findcommonmissing(self, common=None, heads=None):
444 def findcommonmissing(self, common=None, heads=None):
445 """Return a tuple of the ancestors of common and the ancestors of heads
445 """Return a tuple of the ancestors of common and the ancestors of heads
446 that are not ancestors of common. In revset terminology, we return the
446 that are not ancestors of common. In revset terminology, we return the
447 tuple:
447 tuple:
448
448
449 ::common, (::heads) - (::common)
449 ::common, (::heads) - (::common)
450
450
451 The list is sorted by revision number, meaning it is
451 The list is sorted by revision number, meaning it is
452 topologically sorted.
452 topologically sorted.
453
453
454 'heads' and 'common' are both lists of node IDs. If heads is
454 'heads' and 'common' are both lists of node IDs. If heads is
455 not supplied, uses all of the revlog's heads. If common is not
455 not supplied, uses all of the revlog's heads. If common is not
456 supplied, uses nullid."""
456 supplied, uses nullid."""
457 if common is None:
457 if common is None:
458 common = [nullid]
458 common = [nullid]
459 if heads is None:
459 if heads is None:
460 heads = self.heads()
460 heads = self.heads()
461
461
462 common = [self.rev(n) for n in common]
462 common = [self.rev(n) for n in common]
463 heads = [self.rev(n) for n in heads]
463 heads = [self.rev(n) for n in heads]
464
464
465 # we want the ancestors, but inclusive
465 # we want the ancestors, but inclusive
466 class lazyset(object):
466 class lazyset(object):
467 def __init__(self, lazyvalues):
467 def __init__(self, lazyvalues):
468 self.addedvalues = set()
468 self.addedvalues = set()
469 self.lazyvalues = lazyvalues
469 self.lazyvalues = lazyvalues
470
470
471 def __contains__(self, value):
471 def __contains__(self, value):
472 return value in self.addedvalues or value in self.lazyvalues
472 return value in self.addedvalues or value in self.lazyvalues
473
473
474 def __iter__(self):
474 def __iter__(self):
475 added = self.addedvalues
475 added = self.addedvalues
476 for r in added:
476 for r in added:
477 yield r
477 yield r
478 for r in self.lazyvalues:
478 for r in self.lazyvalues:
479 if not r in added:
479 if not r in added:
480 yield r
480 yield r
481
481
482 def add(self, value):
482 def add(self, value):
483 self.addedvalues.add(value)
483 self.addedvalues.add(value)
484
484
485 def update(self, values):
485 def update(self, values):
486 self.addedvalues.update(values)
486 self.addedvalues.update(values)
487
487
488 has = lazyset(self.ancestors(common))
488 has = lazyset(self.ancestors(common))
489 has.add(nullrev)
489 has.add(nullrev)
490 has.update(common)
490 has.update(common)
491
491
492 # take all ancestors from heads that aren't in has
492 # take all ancestors from heads that aren't in has
493 missing = set()
493 missing = set()
494 visit = collections.deque(r for r in heads if r not in has)
494 visit = collections.deque(r for r in heads if r not in has)
495 while visit:
495 while visit:
496 r = visit.popleft()
496 r = visit.popleft()
497 if r in missing:
497 if r in missing:
498 continue
498 continue
499 else:
499 else:
500 missing.add(r)
500 missing.add(r)
501 for p in self.parentrevs(r):
501 for p in self.parentrevs(r):
502 if p not in has:
502 if p not in has:
503 visit.append(p)
503 visit.append(p)
504 missing = list(missing)
504 missing = list(missing)
505 missing.sort()
505 missing.sort()
506 return has, [self.node(r) for r in missing]
506 return has, [self.node(r) for r in missing]
507
507
508 def incrementalmissingrevs(self, common=None):
508 def incrementalmissingrevs(self, common=None):
509 """Return an object that can be used to incrementally compute the
509 """Return an object that can be used to incrementally compute the
510 revision numbers of the ancestors of arbitrary sets that are not
510 revision numbers of the ancestors of arbitrary sets that are not
511 ancestors of common. This is an ancestor.incrementalmissingancestors
511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 object.
512 object.
513
513
514 'common' is a list of revision numbers. If common is not supplied, uses
514 'common' is a list of revision numbers. If common is not supplied, uses
515 nullrev.
515 nullrev.
516 """
516 """
517 if common is None:
517 if common is None:
518 common = [nullrev]
518 common = [nullrev]
519
519
520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521
521
522 def findmissingrevs(self, common=None, heads=None):
522 def findmissingrevs(self, common=None, heads=None):
523 """Return the revision numbers of the ancestors of heads that
523 """Return the revision numbers of the ancestors of heads that
524 are not ancestors of common.
524 are not ancestors of common.
525
525
526 More specifically, return a list of revision numbers corresponding to
526 More specifically, return a list of revision numbers corresponding to
527 nodes N such that every N satisfies the following constraints:
527 nodes N such that every N satisfies the following constraints:
528
528
529 1. N is an ancestor of some node in 'heads'
529 1. N is an ancestor of some node in 'heads'
530 2. N is not an ancestor of any node in 'common'
530 2. N is not an ancestor of any node in 'common'
531
531
532 The list is sorted by revision number, meaning it is
532 The list is sorted by revision number, meaning it is
533 topologically sorted.
533 topologically sorted.
534
534
535 'heads' and 'common' are both lists of revision numbers. If heads is
535 'heads' and 'common' are both lists of revision numbers. If heads is
536 not supplied, uses all of the revlog's heads. If common is not
536 not supplied, uses all of the revlog's heads. If common is not
537 supplied, uses nullid."""
537 supplied, uses nullid."""
538 if common is None:
538 if common is None:
539 common = [nullrev]
539 common = [nullrev]
540 if heads is None:
540 if heads is None:
541 heads = self.headrevs()
541 heads = self.headrevs()
542
542
543 inc = self.incrementalmissingrevs(common=common)
543 inc = self.incrementalmissingrevs(common=common)
544 return inc.missingancestors(heads)
544 return inc.missingancestors(heads)
545
545
546 def findmissing(self, common=None, heads=None):
546 def findmissing(self, common=None, heads=None):
547 """Return the ancestors of heads that are not ancestors of common.
547 """Return the ancestors of heads that are not ancestors of common.
548
548
549 More specifically, return a list of nodes N such that every N
549 More specifically, return a list of nodes N such that every N
550 satisfies the following constraints:
550 satisfies the following constraints:
551
551
552 1. N is an ancestor of some node in 'heads'
552 1. N is an ancestor of some node in 'heads'
553 2. N is not an ancestor of any node in 'common'
553 2. N is not an ancestor of any node in 'common'
554
554
555 The list is sorted by revision number, meaning it is
555 The list is sorted by revision number, meaning it is
556 topologically sorted.
556 topologically sorted.
557
557
558 'heads' and 'common' are both lists of node IDs. If heads is
558 'heads' and 'common' are both lists of node IDs. If heads is
559 not supplied, uses all of the revlog's heads. If common is not
559 not supplied, uses all of the revlog's heads. If common is not
560 supplied, uses nullid."""
560 supplied, uses nullid."""
561 if common is None:
561 if common is None:
562 common = [nullid]
562 common = [nullid]
563 if heads is None:
563 if heads is None:
564 heads = self.heads()
564 heads = self.heads()
565
565
566 common = [self.rev(n) for n in common]
566 common = [self.rev(n) for n in common]
567 heads = [self.rev(n) for n in heads]
567 heads = [self.rev(n) for n in heads]
568
568
569 inc = self.incrementalmissingrevs(common=common)
569 inc = self.incrementalmissingrevs(common=common)
570 return [self.node(r) for r in inc.missingancestors(heads)]
570 return [self.node(r) for r in inc.missingancestors(heads)]
571
571
572 def nodesbetween(self, roots=None, heads=None):
572 def nodesbetween(self, roots=None, heads=None):
573 """Return a topological path from 'roots' to 'heads'.
573 """Return a topological path from 'roots' to 'heads'.
574
574
575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 topologically sorted list of all nodes N that satisfy both of
576 topologically sorted list of all nodes N that satisfy both of
577 these constraints:
577 these constraints:
578
578
579 1. N is a descendant of some node in 'roots'
579 1. N is a descendant of some node in 'roots'
580 2. N is an ancestor of some node in 'heads'
580 2. N is an ancestor of some node in 'heads'
581
581
582 Every node is considered to be both a descendant and an ancestor
582 Every node is considered to be both a descendant and an ancestor
583 of itself, so every reachable node in 'roots' and 'heads' will be
583 of itself, so every reachable node in 'roots' and 'heads' will be
584 included in 'nodes'.
584 included in 'nodes'.
585
585
586 'outroots' is the list of reachable nodes in 'roots', i.e., the
586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 subset of 'roots' that is returned in 'nodes'. Likewise,
587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 'outheads' is the subset of 'heads' that is also in 'nodes'.
588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589
589
590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 unspecified, uses nullid as the only root. If 'heads' is
591 unspecified, uses nullid as the only root. If 'heads' is
592 unspecified, uses list of all of the revlog's heads."""
592 unspecified, uses list of all of the revlog's heads."""
593 nonodes = ([], [], [])
593 nonodes = ([], [], [])
594 if roots is not None:
594 if roots is not None:
595 roots = list(roots)
595 roots = list(roots)
596 if not roots:
596 if not roots:
597 return nonodes
597 return nonodes
598 lowestrev = min([self.rev(n) for n in roots])
598 lowestrev = min([self.rev(n) for n in roots])
599 else:
599 else:
600 roots = [nullid] # Everybody's a descendant of nullid
600 roots = [nullid] # Everybody's a descendant of nullid
601 lowestrev = nullrev
601 lowestrev = nullrev
602 if (lowestrev == nullrev) and (heads is None):
602 if (lowestrev == nullrev) and (heads is None):
603 # We want _all_ the nodes!
603 # We want _all_ the nodes!
604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 if heads is None:
605 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
606 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
607 # node.
608 highestrev = len(self) - 1
608 highestrev = len(self) - 1
609 # Set ancestors to None to signal that every node is an ancestor.
609 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
610 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
611 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
612 heads = {}
613 else:
613 else:
614 heads = list(heads)
614 heads = list(heads)
615 if not heads:
615 if not heads:
616 return nonodes
616 return nonodes
617 ancestors = set()
617 ancestors = set()
618 # Turn heads into a dictionary so we can remove 'fake' heads.
618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
619 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
620 # find from roots.
621 heads = dict.fromkeys(heads, False)
621 heads = dict.fromkeys(heads, False)
622 # Start at the top and keep marking parents until we're done.
622 # Start at the top and keep marking parents until we're done.
623 nodestotag = set(heads)
623 nodestotag = set(heads)
624 # Remember where the top was so we can use it as a limit later.
624 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
625 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
626 while nodestotag:
627 # grab a node to tag
627 # grab a node to tag
628 n = nodestotag.pop()
628 n = nodestotag.pop()
629 # Never tag nullid
629 # Never tag nullid
630 if n == nullid:
630 if n == nullid:
631 continue
631 continue
632 # A node's revision number represents its place in a
632 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
633 # topologically sorted list of nodes.
634 r = self.rev(n)
634 r = self.rev(n)
635 if r >= lowestrev:
635 if r >= lowestrev:
636 if n not in ancestors:
636 if n not in ancestors:
637 # If we are possibly a descendant of one of the roots
637 # If we are possibly a descendant of one of the roots
638 # and we haven't already been marked as an ancestor
638 # and we haven't already been marked as an ancestor
639 ancestors.add(n) # Mark as ancestor
639 ancestors.add(n) # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
640 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.update([p for p in self.parents(n) if
641 nodestotag.update([p for p in self.parents(n) if
642 p != nullid])
642 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
643 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
644 # So it is, real heads should not be the ancestors of
645 # any other heads.
645 # any other heads.
646 heads.pop(n)
646 heads.pop(n)
647 if not ancestors:
647 if not ancestors:
648 return nonodes
648 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
649 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
650 # roots that are not ancestors.
651
651
652 # If one of the roots was nullid, everything is included anyway.
652 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
653 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
654 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
655 # include roots that aren't ancestors.
656
656
657 # Filter out roots that aren't ancestors of heads
657 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
658 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
659 # Recompute the lowest revision
660 if roots:
660 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 # No more roots? Return empty list
663 # No more roots? Return empty list
664 return nonodes
664 return nonodes
665 else:
665 else:
666 # We are descending from nullid, and don't need to care about
666 # We are descending from nullid, and don't need to care about
667 # any other roots.
667 # any other roots.
668 lowestrev = nullrev
668 lowestrev = nullrev
669 roots = [nullid]
669 roots = [nullid]
670 # Transform our roots list into a set.
670 # Transform our roots list into a set.
671 descendants = set(roots)
671 descendants = set(roots)
672 # Also, keep the original roots so we can filter out roots that aren't
672 # Also, keep the original roots so we can filter out roots that aren't
673 # 'real' roots (i.e. are descended from other roots).
673 # 'real' roots (i.e. are descended from other roots).
674 roots = descendants.copy()
674 roots = descendants.copy()
675 # Our topologically sorted list of output nodes.
675 # Our topologically sorted list of output nodes.
676 orderedout = []
676 orderedout = []
677 # Don't start at nullid since we don't want nullid in our output list,
677 # Don't start at nullid since we don't want nullid in our output list,
678 # and if nullid shows up in descendants, empty parents will look like
678 # and if nullid shows up in descendants, empty parents will look like
679 # they're descendants.
679 # they're descendants.
680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 n = self.node(r)
681 n = self.node(r)
682 isdescendant = False
682 isdescendant = False
683 if lowestrev == nullrev: # Everybody is a descendant of nullid
683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 isdescendant = True
684 isdescendant = True
685 elif n in descendants:
685 elif n in descendants:
686 # n is already a descendant
686 # n is already a descendant
687 isdescendant = True
687 isdescendant = True
688 # This check only needs to be done here because all the roots
688 # This check only needs to be done here because all the roots
689 # will start being marked is descendants before the loop.
689 # will start being marked is descendants before the loop.
690 if n in roots:
690 if n in roots:
691 # If n was a root, check if it's a 'real' root.
691 # If n was a root, check if it's a 'real' root.
692 p = tuple(self.parents(n))
692 p = tuple(self.parents(n))
693 # If any of its parents are descendants, it's not a root.
693 # If any of its parents are descendants, it's not a root.
694 if (p[0] in descendants) or (p[1] in descendants):
694 if (p[0] in descendants) or (p[1] in descendants):
695 roots.remove(n)
695 roots.remove(n)
696 else:
696 else:
697 p = tuple(self.parents(n))
697 p = tuple(self.parents(n))
698 # A node is a descendant if either of its parents are
698 # A node is a descendant if either of its parents are
699 # descendants. (We seeded the dependents list with the roots
699 # descendants. (We seeded the dependents list with the roots
700 # up there, remember?)
700 # up there, remember?)
701 if (p[0] in descendants) or (p[1] in descendants):
701 if (p[0] in descendants) or (p[1] in descendants):
702 descendants.add(n)
702 descendants.add(n)
703 isdescendant = True
703 isdescendant = True
704 if isdescendant and ((ancestors is None) or (n in ancestors)):
704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 # Only include nodes that are both descendants and ancestors.
705 # Only include nodes that are both descendants and ancestors.
706 orderedout.append(n)
706 orderedout.append(n)
707 if (ancestors is not None) and (n in heads):
707 if (ancestors is not None) and (n in heads):
708 # We're trying to figure out which heads are reachable
708 # We're trying to figure out which heads are reachable
709 # from roots.
709 # from roots.
710 # Mark this head as having been reached
710 # Mark this head as having been reached
711 heads[n] = True
711 heads[n] = True
712 elif ancestors is None:
712 elif ancestors is None:
713 # Otherwise, we're trying to discover the heads.
713 # Otherwise, we're trying to discover the heads.
714 # Assume this is a head because if it isn't, the next step
714 # Assume this is a head because if it isn't, the next step
715 # will eventually remove it.
715 # will eventually remove it.
716 heads[n] = True
716 heads[n] = True
717 # But, obviously its parents aren't.
717 # But, obviously its parents aren't.
718 for p in self.parents(n):
718 for p in self.parents(n):
719 heads.pop(p, None)
719 heads.pop(p, None)
720 heads = [n for n, flag in heads.iteritems() if flag]
720 heads = [n for n, flag in heads.iteritems() if flag]
721 roots = list(roots)
721 roots = list(roots)
722 assert orderedout
722 assert orderedout
723 assert roots
723 assert roots
724 assert heads
724 assert heads
725 return (orderedout, roots, heads)
725 return (orderedout, roots, heads)
726
726
727 def headrevs(self):
727 def headrevs(self):
728 try:
728 try:
729 return self.index.headrevs()
729 return self.index.headrevs()
730 except AttributeError:
730 except AttributeError:
731 return self._headrevs()
731 return self._headrevs()
732
732
733 def computephases(self, roots):
733 def computephases(self, roots):
734 return self.index.computephasesmapsets(roots)
734 return self.index.computephasesmapsets(roots)
735
735
736 def _headrevs(self):
736 def _headrevs(self):
737 count = len(self)
737 count = len(self)
738 if not count:
738 if not count:
739 return [nullrev]
739 return [nullrev]
740 # we won't iter over filtered rev so nobody is a head at start
740 # we won't iter over filtered rev so nobody is a head at start
741 ishead = [0] * (count + 1)
741 ishead = [0] * (count + 1)
742 index = self.index
742 index = self.index
743 for r in self:
743 for r in self:
744 ishead[r] = 1 # I may be an head
744 ishead[r] = 1 # I may be an head
745 e = index[r]
745 e = index[r]
746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 return [r for r, val in enumerate(ishead) if val]
747 return [r for r, val in enumerate(ishead) if val]
748
748
749 def heads(self, start=None, stop=None):
749 def heads(self, start=None, stop=None):
750 """return the list of all nodes that have no children
750 """return the list of all nodes that have no children
751
751
752 if start is specified, only heads that are descendants of
752 if start is specified, only heads that are descendants of
753 start will be returned
753 start will be returned
754 if stop is specified, it will consider all the revs from stop
754 if stop is specified, it will consider all the revs from stop
755 as if they had no children
755 as if they had no children
756 """
756 """
757 if start is None and stop is None:
757 if start is None and stop is None:
758 if not len(self):
758 if not len(self):
759 return [nullid]
759 return [nullid]
760 return [self.node(r) for r in self.headrevs()]
760 return [self.node(r) for r in self.headrevs()]
761
761
762 if start is None:
762 if start is None:
763 start = nullid
763 start = nullid
764 if stop is None:
764 if stop is None:
765 stop = []
765 stop = []
766 stoprevs = set([self.rev(n) for n in stop])
766 stoprevs = set([self.rev(n) for n in stop])
767 startrev = self.rev(start)
767 startrev = self.rev(start)
768 reachable = set((startrev,))
768 reachable = set((startrev,))
769 heads = set((startrev,))
769 heads = set((startrev,))
770
770
771 parentrevs = self.parentrevs
771 parentrevs = self.parentrevs
772 for r in self.revs(start=startrev + 1):
772 for r in self.revs(start=startrev + 1):
773 for p in parentrevs(r):
773 for p in parentrevs(r):
774 if p in reachable:
774 if p in reachable:
775 if r not in stoprevs:
775 if r not in stoprevs:
776 reachable.add(r)
776 reachable.add(r)
777 heads.add(r)
777 heads.add(r)
778 if p in heads and p not in stoprevs:
778 if p in heads and p not in stoprevs:
779 heads.remove(p)
779 heads.remove(p)
780
780
781 return [self.node(r) for r in heads]
781 return [self.node(r) for r in heads]
782
782
783 def children(self, node):
783 def children(self, node):
784 """find the children of a given node"""
784 """find the children of a given node"""
785 c = []
785 c = []
786 p = self.rev(node)
786 p = self.rev(node)
787 for r in self.revs(start=p + 1):
787 for r in self.revs(start=p + 1):
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 if prevs:
789 if prevs:
790 for pr in prevs:
790 for pr in prevs:
791 if pr == p:
791 if pr == p:
792 c.append(self.node(r))
792 c.append(self.node(r))
793 elif p == nullrev:
793 elif p == nullrev:
794 c.append(self.node(r))
794 c.append(self.node(r))
795 return c
795 return c
796
796
797 def descendant(self, start, end):
797 def descendant(self, start, end):
798 if start == nullrev:
798 if start == nullrev:
799 return True
799 return True
800 for i in self.descendants([start]):
800 for i in self.descendants([start]):
801 if i == end:
801 if i == end:
802 return True
802 return True
803 elif i > end:
803 elif i > end:
804 break
804 break
805 return False
805 return False
806
806
807 def commonancestorsheads(self, a, b):
807 def commonancestorsheads(self, a, b):
808 """calculate all the heads of the common ancestors of nodes a and b"""
808 """calculate all the heads of the common ancestors of nodes a and b"""
809 a, b = self.rev(a), self.rev(b)
809 a, b = self.rev(a), self.rev(b)
810 try:
810 try:
811 ancs = self.index.commonancestorsheads(a, b)
811 ancs = self.index.commonancestorsheads(a, b)
812 except (AttributeError, OverflowError): # C implementation failed
812 except (AttributeError, OverflowError): # C implementation failed
813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 return map(self.node, ancs)
814 return map(self.node, ancs)
815
815
816 def isancestor(self, a, b):
816 def isancestor(self, a, b):
817 """return True if node a is an ancestor of node b
817 """return True if node a is an ancestor of node b
818
818
819 The implementation of this is trivial but the use of
819 The implementation of this is trivial but the use of
820 commonancestorsheads is not."""
820 commonancestorsheads is not."""
821 return a in self.commonancestorsheads(a, b)
821 return a in self.commonancestorsheads(a, b)
822
822
823 def ancestor(self, a, b):
823 def ancestor(self, a, b):
824 """calculate the "best" common ancestor of nodes a and b"""
824 """calculate the "best" common ancestor of nodes a and b"""
825
825
826 a, b = self.rev(a), self.rev(b)
826 a, b = self.rev(a), self.rev(b)
827 try:
827 try:
828 ancs = self.index.ancestors(a, b)
828 ancs = self.index.ancestors(a, b)
829 except (AttributeError, OverflowError):
829 except (AttributeError, OverflowError):
830 ancs = ancestor.ancestors(self.parentrevs, a, b)
830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 if ancs:
831 if ancs:
832 # choose a consistent winner when there's a tie
832 # choose a consistent winner when there's a tie
833 return min(map(self.node, ancs))
833 return min(map(self.node, ancs))
834 return nullid
834 return nullid
835
835
836 def _match(self, id):
836 def _match(self, id):
837 if isinstance(id, int):
837 if isinstance(id, int):
838 # rev
838 # rev
839 return self.node(id)
839 return self.node(id)
840 if len(id) == 20:
840 if len(id) == 20:
841 # possibly a binary node
841 # possibly a binary node
842 # odds of a binary node being all hex in ASCII are 1 in 10**25
842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 try:
843 try:
844 node = id
844 node = id
845 self.rev(node) # quick search the index
845 self.rev(node) # quick search the index
846 return node
846 return node
847 except LookupError:
847 except LookupError:
848 pass # may be partial hex id
848 pass # may be partial hex id
849 try:
849 try:
850 # str(rev)
850 # str(rev)
851 rev = int(id)
851 rev = int(id)
852 if str(rev) != id:
852 if str(rev) != id:
853 raise ValueError
853 raise ValueError
854 if rev < 0:
854 if rev < 0:
855 rev = len(self) + rev
855 rev = len(self) + rev
856 if rev < 0 or rev >= len(self):
856 if rev < 0 or rev >= len(self):
857 raise ValueError
857 raise ValueError
858 return self.node(rev)
858 return self.node(rev)
859 except (ValueError, OverflowError):
859 except (ValueError, OverflowError):
860 pass
860 pass
861 if len(id) == 40:
861 if len(id) == 40:
862 try:
862 try:
863 # a full hex nodeid?
863 # a full hex nodeid?
864 node = bin(id)
864 node = bin(id)
865 self.rev(node)
865 self.rev(node)
866 return node
866 return node
867 except (TypeError, LookupError):
867 except (TypeError, LookupError):
868 pass
868 pass
869
869
870 def _partialmatch(self, id):
870 def _partialmatch(self, id):
871 try:
871 try:
872 n = self.index.partialmatch(id)
872 n = self.index.partialmatch(id)
873 if n and self.hasnode(n):
873 if n and self.hasnode(n):
874 return n
874 return n
875 return None
875 return None
876 except RevlogError:
876 except RevlogError:
877 # parsers.c radix tree lookup gave multiple matches
877 # parsers.c radix tree lookup gave multiple matches
878 # fall through to slow path that filters hidden revisions
878 # fall through to slow path that filters hidden revisions
879 pass
879 pass
880 except (AttributeError, ValueError):
880 except (AttributeError, ValueError):
881 # we are pure python, or key was too short to search radix tree
881 # we are pure python, or key was too short to search radix tree
882 pass
882 pass
883
883
884 if id in self._pcache:
884 if id in self._pcache:
885 return self._pcache[id]
885 return self._pcache[id]
886
886
887 if len(id) < 40:
887 if len(id) < 40:
888 try:
888 try:
889 # hex(node)[:...]
889 # hex(node)[:...]
890 l = len(id) // 2 # grab an even number of digits
890 l = len(id) // 2 # grab an even number of digits
891 prefix = bin(id[:l * 2])
891 prefix = bin(id[:l * 2])
892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 nl = [n for n in nl if hex(n).startswith(id) and
893 nl = [n for n in nl if hex(n).startswith(id) and
894 self.hasnode(n)]
894 self.hasnode(n)]
895 if len(nl) > 0:
895 if len(nl) > 0:
896 if len(nl) == 1:
896 if len(nl) == 1:
897 self._pcache[id] = nl[0]
897 self._pcache[id] = nl[0]
898 return nl[0]
898 return nl[0]
899 raise LookupError(id, self.indexfile,
899 raise LookupError(id, self.indexfile,
900 _('ambiguous identifier'))
900 _('ambiguous identifier'))
901 return None
901 return None
902 except TypeError:
902 except TypeError:
903 pass
903 pass
904
904
905 def lookup(self, id):
905 def lookup(self, id):
906 """locate a node based on:
906 """locate a node based on:
907 - revision number or str(revision number)
907 - revision number or str(revision number)
908 - nodeid or subset of hex nodeid
908 - nodeid or subset of hex nodeid
909 """
909 """
910 n = self._match(id)
910 n = self._match(id)
911 if n is not None:
911 if n is not None:
912 return n
912 return n
913 n = self._partialmatch(id)
913 n = self._partialmatch(id)
914 if n:
914 if n:
915 return n
915 return n
916
916
917 raise LookupError(id, self.indexfile, _('no match found'))
917 raise LookupError(id, self.indexfile, _('no match found'))
918
918
919 def cmp(self, node, text):
919 def cmp(self, node, text):
920 """compare text with a given file revision
920 """compare text with a given file revision
921
921
922 returns True if text is different than what is stored.
922 returns True if text is different than what is stored.
923 """
923 """
924 p1, p2 = self.parents(node)
924 p1, p2 = self.parents(node)
925 return hash(text, p1, p2) != node
925 return hash(text, p1, p2) != node
926
926
927 def _addchunk(self, offset, data):
927 def _addchunk(self, offset, data):
928 o, d = self._chunkcache
928 o, d = self._chunkcache
929 # try to add to existing cache
929 # try to add to existing cache
930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 self._chunkcache = o, d + data
931 self._chunkcache = o, d + data
932 else:
932 else:
933 self._chunkcache = offset, data
933 self._chunkcache = offset, data
934
934
935 def _loadchunk(self, offset, length, df=None):
935 def _loadchunk(self, offset, length, df=None):
936 """Load a chunk/segment from the revlog.
936 """Load a chunk/segment from the revlog.
937
937
938 Accepts absolute offset, length to read, and an optional existing
938 Accepts absolute offset, length to read, and an optional existing
939 file handle to read from.
939 file handle to read from.
940
940
941 If an existing file handle is passed, it will be seeked and the
941 If an existing file handle is passed, it will be seeked and the
942 original seek position will NOT be restored.
942 original seek position will NOT be restored.
943 """
943 """
944 if df is not None:
944 if df is not None:
945 closehandle = False
945 closehandle = False
946 else:
946 else:
947 if self._inline:
947 if self._inline:
948 df = self.opener(self.indexfile)
948 df = self.opener(self.indexfile)
949 else:
949 else:
950 df = self.opener(self.datafile)
950 df = self.opener(self.datafile)
951 closehandle = True
951 closehandle = True
952
952
953 # Cache data both forward and backward around the requested
953 # Cache data both forward and backward around the requested
954 # data, in a fixed size window. This helps speed up operations
954 # data, in a fixed size window. This helps speed up operations
955 # involving reading the revlog backwards.
955 # involving reading the revlog backwards.
956 cachesize = self._chunkcachesize
956 cachesize = self._chunkcachesize
957 realoffset = offset & ~(cachesize - 1)
957 realoffset = offset & ~(cachesize - 1)
958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
959 - realoffset)
959 - realoffset)
960 df.seek(realoffset)
960 df.seek(realoffset)
961 d = df.read(reallength)
961 d = df.read(reallength)
962 if closehandle:
962 if closehandle:
963 df.close()
963 df.close()
964 self._addchunk(realoffset, d)
964 self._addchunk(realoffset, d)
965 if offset != realoffset or reallength != length:
965 if offset != realoffset or reallength != length:
966 return util.buffer(d, offset - realoffset, length)
966 return util.buffer(d, offset - realoffset, length)
967 return d
967 return d
968
968
969 def _getchunk(self, offset, length, df=None):
969 def _getchunk(self, offset, length, df=None):
970 o, d = self._chunkcache
970 o, d = self._chunkcache
971 l = len(d)
971 l = len(d)
972
972
973 # is it in the cache?
973 # is it in the cache?
974 cachestart = offset - o
974 cachestart = offset - o
975 cacheend = cachestart + length
975 cacheend = cachestart + length
976 if cachestart >= 0 and cacheend <= l:
976 if cachestart >= 0 and cacheend <= l:
977 if cachestart == 0 and cacheend == l:
977 if cachestart == 0 and cacheend == l:
978 return d # avoid a copy
978 return d # avoid a copy
979 return util.buffer(d, cachestart, cacheend - cachestart)
979 return util.buffer(d, cachestart, cacheend - cachestart)
980
980
981 return self._loadchunk(offset, length, df=df)
981 return self._loadchunk(offset, length, df=df)
982
982
983 def _chunkraw(self, startrev, endrev, df=None):
983 def _chunkraw(self, startrev, endrev, df=None):
984 start = self.start(startrev)
984 start = self.start(startrev)
985 end = self.end(endrev)
985 end = self.end(endrev)
986 if self._inline:
986 if self._inline:
987 start += (startrev + 1) * self._io.size
987 start += (startrev + 1) * self._io.size
988 end += (endrev + 1) * self._io.size
988 end += (endrev + 1) * self._io.size
989 length = end - start
989 length = end - start
990 return self._getchunk(start, length, df=df)
990 return self._getchunk(start, length, df=df)
991
991
992 def _chunk(self, rev, df=None):
992 def _chunk(self, rev, df=None):
993 return decompress(self._chunkraw(rev, rev, df=df))
993 return decompress(self._chunkraw(rev, rev, df=df))
994
994
995 def _chunks(self, revs, df=None):
995 def _chunks(self, revs, df=None):
996 '''faster version of [self._chunk(rev) for rev in revs]
996 '''faster version of [self._chunk(rev) for rev in revs]
997
997
998 Assumes that revs is in ascending order.'''
998 Assumes that revs is in ascending order.'''
999 if not revs:
999 if not revs:
1000 return []
1000 return []
1001 start = self.start
1001 start = self.start
1002 length = self.length
1002 length = self.length
1003 inline = self._inline
1003 inline = self._inline
1004 iosize = self._io.size
1004 iosize = self._io.size
1005 buffer = util.buffer
1005 buffer = util.buffer
1006
1006
1007 l = []
1007 l = []
1008 ladd = l.append
1008 ladd = l.append
1009
1009
1010 # preload the cache
1010 # preload the cache
1011 try:
1011 try:
1012 while True:
1012 while True:
1013 # ensure that the cache doesn't change out from under us
1013 # ensure that the cache doesn't change out from under us
1014 _cache = self._chunkcache
1014 _cache = self._chunkcache
1015 self._chunkraw(revs[0], revs[-1], df=df)
1015 self._chunkraw(revs[0], revs[-1], df=df)
1016 if _cache == self._chunkcache:
1016 if _cache == self._chunkcache:
1017 break
1017 break
1018 offset, data = _cache
1018 offset, data = _cache
1019 except OverflowError:
1019 except OverflowError:
1020 # issue4215 - we can't cache a run of chunks greater than
1020 # issue4215 - we can't cache a run of chunks greater than
1021 # 2G on Windows
1021 # 2G on Windows
1022 return [self._chunk(rev, df=df) for rev in revs]
1022 return [self._chunk(rev, df=df) for rev in revs]
1023
1023
1024 for rev in revs:
1024 for rev in revs:
1025 chunkstart = start(rev)
1025 chunkstart = start(rev)
1026 if inline:
1026 if inline:
1027 chunkstart += (rev + 1) * iosize
1027 chunkstart += (rev + 1) * iosize
1028 chunklength = length(rev)
1028 chunklength = length(rev)
1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1030
1030
1031 return l
1031 return l
1032
1032
1033 def _chunkclear(self):
1033 def _chunkclear(self):
1034 self._chunkcache = (0, '')
1034 self._chunkcache = (0, '')
1035
1035
1036 def deltaparent(self, rev):
1036 def deltaparent(self, rev):
1037 """return deltaparent of the given revision"""
1037 """return deltaparent of the given revision"""
1038 base = self.index[rev][3]
1038 base = self.index[rev][3]
1039 if base == rev:
1039 if base == rev:
1040 return nullrev
1040 return nullrev
1041 elif self._generaldelta:
1041 elif self._generaldelta:
1042 return base
1042 return base
1043 else:
1043 else:
1044 return rev - 1
1044 return rev - 1
1045
1045
1046 def revdiff(self, rev1, rev2):
1046 def revdiff(self, rev1, rev2):
1047 """return or calculate a delta between two revisions"""
1047 """return or calculate a delta between two revisions"""
1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1049 return str(self._chunk(rev2))
1049 return str(self._chunk(rev2))
1050
1050
1051 return mdiff.textdiff(self.revision(rev1),
1051 return mdiff.textdiff(self.revision(rev1),
1052 self.revision(rev2))
1052 self.revision(rev2))
1053
1053
1054 def revision(self, nodeorrev, _df=None):
1054 def revision(self, nodeorrev, _df=None):
1055 """return an uncompressed revision of a given node or revision
1055 """return an uncompressed revision of a given node or revision
1056 number.
1056 number.
1057
1057
1058 _df is an existing file handle to read from. It is meant to only be
1058 _df is an existing file handle to read from. It is meant to only be
1059 used internally.
1059 used internally.
1060 """
1060 """
1061 if isinstance(nodeorrev, int):
1061 if isinstance(nodeorrev, int):
1062 rev = nodeorrev
1062 rev = nodeorrev
1063 node = self.node(rev)
1063 node = self.node(rev)
1064 else:
1064 else:
1065 node = nodeorrev
1065 node = nodeorrev
1066 rev = None
1066 rev = None
1067
1067
1068 cachedrev = None
1068 cachedrev = None
1069 if node == nullid:
1069 if node == nullid:
1070 return ""
1070 return ""
1071 if self._cache:
1071 if self._cache:
1072 if self._cache[0] == node:
1072 if self._cache[0] == node:
1073 return self._cache[2]
1073 return self._cache[2]
1074 cachedrev = self._cache[1]
1074 cachedrev = self._cache[1]
1075
1075
1076 # look up what we need to read
1076 # look up what we need to read
1077 text = None
1077 text = None
1078 if rev is None:
1078 if rev is None:
1079 rev = self.rev(node)
1079 rev = self.rev(node)
1080
1080
1081 # check rev flags
1081 # check rev flags
1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1083 raise RevlogError(_('incompatible revision flag %x') %
1083 raise RevlogError(_('incompatible revision flag %x') %
1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1085
1085
1086 # build delta chain
1086 # build delta chain
1087 chain = []
1087 chain = []
1088 index = self.index # for performance
1088 index = self.index # for performance
1089 generaldelta = self._generaldelta
1089 generaldelta = self._generaldelta
1090 iterrev = rev
1090 iterrev = rev
1091 e = index[iterrev]
1091 e = index[iterrev]
1092 while iterrev != e[3] and iterrev != cachedrev:
1092 while iterrev != e[3] and iterrev != cachedrev:
1093 chain.append(iterrev)
1093 chain.append(iterrev)
1094 if generaldelta:
1094 if generaldelta:
1095 iterrev = e[3]
1095 iterrev = e[3]
1096 else:
1096 else:
1097 iterrev -= 1
1097 iterrev -= 1
1098 e = index[iterrev]
1098 e = index[iterrev]
1099
1099
1100 if iterrev == cachedrev:
1100 if iterrev == cachedrev:
1101 # cache hit
1101 # cache hit
1102 text = self._cache[2]
1102 text = self._cache[2]
1103 else:
1103 else:
1104 chain.append(iterrev)
1104 chain.append(iterrev)
1105 chain.reverse()
1105 chain.reverse()
1106
1106
1107 # drop cache to save memory
1107 # drop cache to save memory
1108 self._cache = None
1108 self._cache = None
1109
1109
1110 bins = self._chunks(chain, df=_df)
1110 bins = self._chunks(chain, df=_df)
1111 if text is None:
1111 if text is None:
1112 text = str(bins[0])
1112 text = str(bins[0])
1113 bins = bins[1:]
1113 bins = bins[1:]
1114
1114
1115 text = mdiff.patches(text, bins)
1115 text = mdiff.patches(text, bins)
1116
1116
1117 text = self._checkhash(text, node, rev)
1117 text = self._checkhash(text, node, rev)
1118
1118
1119 self._cache = (node, rev, text)
1119 self._cache = (node, rev, text)
1120 return text
1120 return text
1121
1121
1122 def hash(self, text, p1, p2):
1122 def hash(self, text, p1, p2):
1123 """Compute a node hash.
1123 """Compute a node hash.
1124
1124
1125 Available as a function so that subclasses can replace the hash
1125 Available as a function so that subclasses can replace the hash
1126 as needed.
1126 as needed.
1127 """
1127 """
1128 return hash(text, p1, p2)
1128 return hash(text, p1, p2)
1129
1129
1130 def _checkhash(self, text, node, rev):
1130 def _checkhash(self, text, node, rev):
1131 p1, p2 = self.parents(node)
1131 p1, p2 = self.parents(node)
1132 self.checkhash(text, p1, p2, node, rev)
1132 self.checkhash(text, p1, p2, node, rev)
1133 return text
1133 return text
1134
1134
1135 def checkhash(self, text, p1, p2, node, rev=None):
1135 def checkhash(self, text, p1, p2, node, rev=None):
1136 if node != self.hash(text, p1, p2):
1136 if node != self.hash(text, p1, p2):
1137 revornode = rev
1137 revornode = rev
1138 if revornode is None:
1138 if revornode is None:
1139 revornode = templatefilters.short(hex(node))
1139 revornode = templatefilters.short(hex(node))
1140 raise RevlogError(_("integrity check failed on %s:%s")
1140 raise RevlogError(_("integrity check failed on %s:%s")
1141 % (self.indexfile, revornode))
1141 % (self.indexfile, revornode))
1142
1142
1143 def checkinlinesize(self, tr, fp=None):
1143 def checkinlinesize(self, tr, fp=None):
1144 """Check if the revlog is too big for inline and convert if so.
1144 """Check if the revlog is too big for inline and convert if so.
1145
1145
1146 This should be called after revisions are added to the revlog. If the
1146 This should be called after revisions are added to the revlog. If the
1147 revlog has grown too large to be an inline revlog, it will convert it
1147 revlog has grown too large to be an inline revlog, it will convert it
1148 to use multiple index and data files.
1148 to use multiple index and data files.
1149 """
1149 """
1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1151 return
1151 return
1152
1152
1153 trinfo = tr.find(self.indexfile)
1153 trinfo = tr.find(self.indexfile)
1154 if trinfo is None:
1154 if trinfo is None:
1155 raise RevlogError(_("%s not found in the transaction")
1155 raise RevlogError(_("%s not found in the transaction")
1156 % self.indexfile)
1156 % self.indexfile)
1157
1157
1158 trindex = trinfo[2]
1158 trindex = trinfo[2]
1159 if trindex is not None:
1159 if trindex is not None:
1160 dataoff = self.start(trindex)
1160 dataoff = self.start(trindex)
1161 else:
1161 else:
1162 # revlog was stripped at start of transaction, use all leftover data
1162 # revlog was stripped at start of transaction, use all leftover data
1163 trindex = len(self) - 1
1163 trindex = len(self) - 1
1164 dataoff = self.end(-2)
1164 dataoff = self.end(-2)
1165
1165
1166 tr.add(self.datafile, dataoff)
1166 tr.add(self.datafile, dataoff)
1167
1167
1168 if fp:
1168 if fp:
1169 fp.flush()
1169 fp.flush()
1170 fp.close()
1170 fp.close()
1171
1171
1172 df = self.opener(self.datafile, 'w')
1172 df = self.opener(self.datafile, 'w')
1173 try:
1173 try:
1174 for r in self:
1174 for r in self:
1175 df.write(self._chunkraw(r, r))
1175 df.write(self._chunkraw(r, r))
1176 finally:
1176 finally:
1177 df.close()
1177 df.close()
1178
1178
1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1180 self.version &= ~(REVLOGNGINLINEDATA)
1180 self.version &= ~(REVLOGNGINLINEDATA)
1181 self._inline = False
1181 self._inline = False
1182 for i in self:
1182 for i in self:
1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1184 fp.write(e)
1184 fp.write(e)
1185
1185
1186 # if we don't call close, the temp file will never replace the
1186 # if we don't call close, the temp file will never replace the
1187 # real index
1187 # real index
1188 fp.close()
1188 fp.close()
1189
1189
1190 tr.replace(self.indexfile, trindex * self._io.size)
1190 tr.replace(self.indexfile, trindex * self._io.size)
1191 self._chunkclear()
1191 self._chunkclear()
1192
1192
1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1194 node=None):
1194 node=None):
1195 """add a revision to the log
1195 """add a revision to the log
1196
1196
1197 text - the revision data to add
1197 text - the revision data to add
1198 transaction - the transaction object used for rollback
1198 transaction - the transaction object used for rollback
1199 link - the linkrev data to add
1199 link - the linkrev data to add
1200 p1, p2 - the parent nodeids of the revision
1200 p1, p2 - the parent nodeids of the revision
1201 cachedelta - an optional precomputed delta
1201 cachedelta - an optional precomputed delta
1202 node - nodeid of revision; typically node is not specified, and it is
1202 node - nodeid of revision; typically node is not specified, and it is
1203 computed by default as hash(text, p1, p2), however subclasses might
1203 computed by default as hash(text, p1, p2), however subclasses might
1204 use different hashing method (and override checkhash() in such case)
1204 use different hashing method (and override checkhash() in such case)
1205 """
1205 """
1206 if link == nullrev:
1206 if link == nullrev:
1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1208 % self.indexfile)
1208 % self.indexfile)
1209
1209
1210 if len(text) > _maxentrysize:
1210 if len(text) > _maxentrysize:
1211 raise RevlogError(
1211 raise RevlogError(
1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1213 % (self.indexfile, len(text)))
1213 % (self.indexfile, len(text)))
1214
1214
1215 node = node or self.hash(text, p1, p2)
1215 node = node or self.hash(text, p1, p2)
1216 if node in self.nodemap:
1216 if node in self.nodemap:
1217 return node
1217 return node
1218
1218
1219 dfh = None
1219 dfh = None
1220 if not self._inline:
1220 if not self._inline:
1221 dfh = self.opener(self.datafile, "a+")
1221 dfh = self.opener(self.datafile, "a+")
1222 ifh = self.opener(self.indexfile, "a+")
1222 ifh = self.opener(self.indexfile, "a+")
1223 try:
1223 try:
1224 return self._addrevision(node, text, transaction, link, p1, p2,
1224 return self._addrevision(node, text, transaction, link, p1, p2,
1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1226 finally:
1226 finally:
1227 if dfh:
1227 if dfh:
1228 dfh.close()
1228 dfh.close()
1229 ifh.close()
1229 ifh.close()
1230
1230
1231 def compress(self, text):
1231 def compress(self, text):
1232 """ generate a possibly-compressed representation of text """
1232 """ generate a possibly-compressed representation of text """
1233 if not text:
1233 if not text:
1234 return ("", text)
1234 return ("", text)
1235 l = len(text)
1235 l = len(text)
1236 bin = None
1236 bin = None
1237 if l < 44:
1237 if l < 44:
1238 pass
1238 pass
1239 elif l > 1000000:
1239 elif l > 1000000:
1240 # zlib makes an internal copy, thus doubling memory usage for
1240 # zlib makes an internal copy, thus doubling memory usage for
1241 # large files, so lets do this in pieces
1241 # large files, so lets do this in pieces
1242 z = zlib.compressobj()
1242 z = zlib.compressobj()
1243 p = []
1243 p = []
1244 pos = 0
1244 pos = 0
1245 while pos < l:
1245 while pos < l:
1246 pos2 = pos + 2**20
1246 pos2 = pos + 2**20
1247 p.append(z.compress(text[pos:pos2]))
1247 p.append(z.compress(text[pos:pos2]))
1248 pos = pos2
1248 pos = pos2
1249 p.append(z.flush())
1249 p.append(z.flush())
1250 if sum(map(len, p)) < l:
1250 if sum(map(len, p)) < l:
1251 bin = "".join(p)
1251 bin = "".join(p)
1252 else:
1252 else:
1253 bin = _compress(text)
1253 bin = _compress(text)
1254 if bin is None or len(bin) > l:
1254 if bin is None or len(bin) > l:
1255 if text[0] == '\0':
1255 if text[0] == '\0':
1256 return ("", text)
1256 return ("", text)
1257 return ('u', text)
1257 return ('u', text)
1258 return ("", bin)
1258 return ("", bin)
1259
1259
1260 def _isgooddelta(self, d, textlen):
1260 def _isgooddelta(self, d, textlen):
1261 """Returns True if the given delta is good. Good means that it is within
1261 """Returns True if the given delta is good. Good means that it is within
1262 the disk span, disk size, and chain length bounds that we know to be
1262 the disk span, disk size, and chain length bounds that we know to be
1263 performant."""
1263 performant."""
1264 if d is None:
1264 if d is None:
1265 return False
1265 return False
1266
1266
1267 # - 'dist' is the distance from the base revision -- bounding it limits
1267 # - 'dist' is the distance from the base revision -- bounding it limits
1268 # the amount of I/O we need to do.
1268 # the amount of I/O we need to do.
1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1270 # to apply -- bounding it limits the amount of CPU we consume.
1270 # to apply -- bounding it limits the amount of CPU we consume.
1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1272 if (dist > textlen * 4 or l > textlen or
1272 if (dist > textlen * 4 or l > textlen or
1273 compresseddeltalen > textlen * 2 or
1273 compresseddeltalen > textlen * 2 or
1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1275 return False
1275 return False
1276
1276
1277 return True
1277 return True
1278
1278
1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1280 cachedelta, ifh, dfh, alwayscache=False):
1280 cachedelta, ifh, dfh, alwayscache=False):
1281 """internal function to add revisions to the log
1281 """internal function to add revisions to the log
1282
1282
1283 see addrevision for argument descriptions.
1283 see addrevision for argument descriptions.
1284 invariants:
1284 invariants:
1285 - text is optional (can be None); if not set, cachedelta must be set.
1285 - text is optional (can be None); if not set, cachedelta must be set.
1286 if both are set, they must correspond to each other.
1286 if both are set, they must correspond to each other.
1287 """
1287 """
1288 btext = [text]
1288 btext = [text]
1289 def buildtext():
1289 def buildtext():
1290 if btext[0] is not None:
1290 if btext[0] is not None:
1291 return btext[0]
1291 return btext[0]
1292 # flush any pending writes here so we can read it in revision
1293 if dfh:
1294 dfh.flush()
1295 ifh.flush()
1296 baserev = cachedelta[0]
1292 baserev = cachedelta[0]
1297 delta = cachedelta[1]
1293 delta = cachedelta[1]
1298 # special case deltas which replace entire base; no need to decode
1294 # special case deltas which replace entire base; no need to decode
1299 # base revision. this neatly avoids censored bases, which throw when
1295 # base revision. this neatly avoids censored bases, which throw when
1300 # they're decoded.
1296 # they're decoded.
1301 hlen = struct.calcsize(">lll")
1297 hlen = struct.calcsize(">lll")
1302 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1298 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1303 len(delta) - hlen):
1299 len(delta) - hlen):
1304 btext[0] = delta[hlen:]
1300 btext[0] = delta[hlen:]
1305 else:
1301 else:
1306 basetext = self.revision(self.node(baserev))
1302 if self._inline:
1303 fh = ifh
1304 else:
1305 fh = dfh
1306 basetext = self.revision(self.node(baserev), _df=fh)
1307 btext[0] = mdiff.patch(basetext, delta)
1307 btext[0] = mdiff.patch(basetext, delta)
1308 try:
1308 try:
1309 self.checkhash(btext[0], p1, p2, node)
1309 self.checkhash(btext[0], p1, p2, node)
1310 if flags & REVIDX_ISCENSORED:
1310 if flags & REVIDX_ISCENSORED:
1311 raise RevlogError(_('node %s is not censored') % node)
1311 raise RevlogError(_('node %s is not censored') % node)
1312 except CensoredNodeError:
1312 except CensoredNodeError:
1313 # must pass the censored index flag to add censored revisions
1313 # must pass the censored index flag to add censored revisions
1314 if not flags & REVIDX_ISCENSORED:
1314 if not flags & REVIDX_ISCENSORED:
1315 raise
1315 raise
1316 return btext[0]
1316 return btext[0]
1317
1317
1318 def builddelta(rev):
1318 def builddelta(rev):
1319 # can we use the cached delta?
1319 # can we use the cached delta?
1320 if cachedelta and cachedelta[0] == rev:
1320 if cachedelta and cachedelta[0] == rev:
1321 delta = cachedelta[1]
1321 delta = cachedelta[1]
1322 else:
1322 else:
1323 t = buildtext()
1323 t = buildtext()
1324 if self.iscensored(rev):
1324 if self.iscensored(rev):
1325 # deltas based on a censored revision must replace the
1325 # deltas based on a censored revision must replace the
1326 # full content in one patch, so delta works everywhere
1326 # full content in one patch, so delta works everywhere
1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1328 delta = header + t
1328 delta = header + t
1329 else:
1329 else:
1330 ptext = self.revision(self.node(rev))
1330 if self._inline:
1331 fh = ifh
1332 else:
1333 fh = dfh
1334 ptext = self.revision(self.node(rev), _df=fh)
1331 delta = mdiff.textdiff(ptext, t)
1335 delta = mdiff.textdiff(ptext, t)
1332 data = self.compress(delta)
1336 data = self.compress(delta)
1333 l = len(data[1]) + len(data[0])
1337 l = len(data[1]) + len(data[0])
1334 if basecache[0] == rev:
1338 if basecache[0] == rev:
1335 chainbase = basecache[1]
1339 chainbase = basecache[1]
1336 else:
1340 else:
1337 chainbase = self.chainbase(rev)
1341 chainbase = self.chainbase(rev)
1338 dist = l + offset - self.start(chainbase)
1342 dist = l + offset - self.start(chainbase)
1339 if self._generaldelta:
1343 if self._generaldelta:
1340 base = rev
1344 base = rev
1341 else:
1345 else:
1342 base = chainbase
1346 base = chainbase
1343 chainlen, compresseddeltalen = self._chaininfo(rev)
1347 chainlen, compresseddeltalen = self._chaininfo(rev)
1344 chainlen += 1
1348 chainlen += 1
1345 compresseddeltalen += l
1349 compresseddeltalen += l
1346 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1350 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1347
1351
1348 curr = len(self)
1352 curr = len(self)
1349 prev = curr - 1
1353 prev = curr - 1
1350 base = chainbase = curr
1354 base = chainbase = curr
1351 chainlen = None
1355 chainlen = None
1352 offset = self.end(prev)
1356 offset = self.end(prev)
1353 d = None
1357 d = None
1354 if self._basecache is None:
1358 if self._basecache is None:
1355 self._basecache = (prev, self.chainbase(prev))
1359 self._basecache = (prev, self.chainbase(prev))
1356 basecache = self._basecache
1360 basecache = self._basecache
1357 p1r, p2r = self.rev(p1), self.rev(p2)
1361 p1r, p2r = self.rev(p1), self.rev(p2)
1358
1362
1359 # full versions are inserted when the needed deltas
1363 # full versions are inserted when the needed deltas
1360 # become comparable to the uncompressed text
1364 # become comparable to the uncompressed text
1361 if text is None:
1365 if text is None:
1362 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1366 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1363 cachedelta[1])
1367 cachedelta[1])
1364 else:
1368 else:
1365 textlen = len(text)
1369 textlen = len(text)
1366
1370
1367 # should we try to build a delta?
1371 # should we try to build a delta?
1368 if prev != nullrev:
1372 if prev != nullrev:
1369 if self._generaldelta:
1373 if self._generaldelta:
1370 if p2r != nullrev and self._aggressivemergedeltas:
1374 if p2r != nullrev and self._aggressivemergedeltas:
1371 d = builddelta(p1r)
1375 d = builddelta(p1r)
1372 d2 = builddelta(p2r)
1376 d2 = builddelta(p2r)
1373 p1good = self._isgooddelta(d, textlen)
1377 p1good = self._isgooddelta(d, textlen)
1374 p2good = self._isgooddelta(d2, textlen)
1378 p2good = self._isgooddelta(d2, textlen)
1375 if p1good and p2good:
1379 if p1good and p2good:
1376 # If both are good deltas, choose the smallest
1380 # If both are good deltas, choose the smallest
1377 if d2[1] < d[1]:
1381 if d2[1] < d[1]:
1378 d = d2
1382 d = d2
1379 elif p2good:
1383 elif p2good:
1380 # If only p2 is good, use it
1384 # If only p2 is good, use it
1381 d = d2
1385 d = d2
1382 elif p1good:
1386 elif p1good:
1383 pass
1387 pass
1384 else:
1388 else:
1385 # Neither is good, try against prev to hopefully save us
1389 # Neither is good, try against prev to hopefully save us
1386 # a fulltext.
1390 # a fulltext.
1387 d = builddelta(prev)
1391 d = builddelta(prev)
1388 else:
1392 else:
1389 # Pick whichever parent is closer to us (to minimize the
1393 # Pick whichever parent is closer to us (to minimize the
1390 # chance of having to build a fulltext). Since
1394 # chance of having to build a fulltext). Since
1391 # nullrev == -1, any non-merge commit will always pick p1r.
1395 # nullrev == -1, any non-merge commit will always pick p1r.
1392 drev = p2r if p2r > p1r else p1r
1396 drev = p2r if p2r > p1r else p1r
1393 d = builddelta(drev)
1397 d = builddelta(drev)
1394 # If the chosen delta will result in us making a full text,
1398 # If the chosen delta will result in us making a full text,
1395 # give it one last try against prev.
1399 # give it one last try against prev.
1396 if drev != prev and not self._isgooddelta(d, textlen):
1400 if drev != prev and not self._isgooddelta(d, textlen):
1397 d = builddelta(prev)
1401 d = builddelta(prev)
1398 else:
1402 else:
1399 d = builddelta(prev)
1403 d = builddelta(prev)
1400 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1401
1405
1402 if not self._isgooddelta(d, textlen):
1406 if not self._isgooddelta(d, textlen):
1403 text = buildtext()
1407 text = buildtext()
1404 data = self.compress(text)
1408 data = self.compress(text)
1405 l = len(data[1]) + len(data[0])
1409 l = len(data[1]) + len(data[0])
1406 base = chainbase = curr
1410 base = chainbase = curr
1407
1411
1408 e = (offset_type(offset, flags), l, textlen,
1412 e = (offset_type(offset, flags), l, textlen,
1409 base, link, p1r, p2r, node)
1413 base, link, p1r, p2r, node)
1410 self.index.insert(-1, e)
1414 self.index.insert(-1, e)
1411 self.nodemap[node] = curr
1415 self.nodemap[node] = curr
1412
1416
1413 entry = self._io.packentry(e, self.node, self.version, curr)
1417 entry = self._io.packentry(e, self.node, self.version, curr)
1414 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1418 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1415
1419
1416 if alwayscache and text is None:
1420 if alwayscache and text is None:
1417 text = buildtext()
1421 text = buildtext()
1418
1422
1419 if type(text) == str: # only accept immutable objects
1423 if type(text) == str: # only accept immutable objects
1420 self._cache = (node, curr, text)
1424 self._cache = (node, curr, text)
1421 self._basecache = (curr, chainbase)
1425 self._basecache = (curr, chainbase)
1422 return node
1426 return node
1423
1427
1424 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1428 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1425 curr = len(self) - 1
1429 curr = len(self) - 1
1426 if not self._inline:
1430 if not self._inline:
1427 transaction.add(self.datafile, offset)
1431 transaction.add(self.datafile, offset)
1428 transaction.add(self.indexfile, curr * len(entry))
1432 transaction.add(self.indexfile, curr * len(entry))
1429 if data[0]:
1433 if data[0]:
1430 dfh.write(data[0])
1434 dfh.write(data[0])
1431 dfh.write(data[1])
1435 dfh.write(data[1])
1432 dfh.flush()
1436 dfh.flush()
1433 ifh.write(entry)
1437 ifh.write(entry)
1434 else:
1438 else:
1435 offset += curr * self._io.size
1439 offset += curr * self._io.size
1436 transaction.add(self.indexfile, offset, curr)
1440 transaction.add(self.indexfile, offset, curr)
1437 ifh.write(entry)
1441 ifh.write(entry)
1438 ifh.write(data[0])
1442 ifh.write(data[0])
1439 ifh.write(data[1])
1443 ifh.write(data[1])
1440 self.checkinlinesize(transaction, ifh)
1444 self.checkinlinesize(transaction, ifh)
1441
1445
1442 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1446 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1443 """
1447 """
1444 add a delta group
1448 add a delta group
1445
1449
1446 given a set of deltas, add them to the revision log. the
1450 given a set of deltas, add them to the revision log. the
1447 first delta is against its parent, which should be in our
1451 first delta is against its parent, which should be in our
1448 log, the rest are against the previous delta.
1452 log, the rest are against the previous delta.
1449
1453
1450 If ``addrevisioncb`` is defined, it will be called with arguments of
1454 If ``addrevisioncb`` is defined, it will be called with arguments of
1451 this revlog and the node that was added.
1455 this revlog and the node that was added.
1452 """
1456 """
1453
1457
1454 # track the base of the current delta log
1458 # track the base of the current delta log
1455 content = []
1459 content = []
1456 node = None
1460 node = None
1457
1461
1458 r = len(self)
1462 r = len(self)
1459 end = 0
1463 end = 0
1460 if r:
1464 if r:
1461 end = self.end(r - 1)
1465 end = self.end(r - 1)
1462 ifh = self.opener(self.indexfile, "a+")
1466 ifh = self.opener(self.indexfile, "a+")
1463 isize = r * self._io.size
1467 isize = r * self._io.size
1464 if self._inline:
1468 if self._inline:
1465 transaction.add(self.indexfile, end + isize, r)
1469 transaction.add(self.indexfile, end + isize, r)
1466 dfh = None
1470 dfh = None
1467 else:
1471 else:
1468 transaction.add(self.indexfile, isize, r)
1472 transaction.add(self.indexfile, isize, r)
1469 transaction.add(self.datafile, end)
1473 transaction.add(self.datafile, end)
1470 dfh = self.opener(self.datafile, "a+")
1474 dfh = self.opener(self.datafile, "a+")
1471 def flush():
1475 def flush():
1472 if dfh:
1476 if dfh:
1473 dfh.flush()
1477 dfh.flush()
1474 ifh.flush()
1478 ifh.flush()
1475 try:
1479 try:
1476 # loop through our set of deltas
1480 # loop through our set of deltas
1477 chain = None
1481 chain = None
1478 while True:
1482 while True:
1479 chunkdata = bundle.deltachunk(chain)
1483 chunkdata = bundle.deltachunk(chain)
1480 if not chunkdata:
1484 if not chunkdata:
1481 break
1485 break
1482 node = chunkdata['node']
1486 node = chunkdata['node']
1483 p1 = chunkdata['p1']
1487 p1 = chunkdata['p1']
1484 p2 = chunkdata['p2']
1488 p2 = chunkdata['p2']
1485 cs = chunkdata['cs']
1489 cs = chunkdata['cs']
1486 deltabase = chunkdata['deltabase']
1490 deltabase = chunkdata['deltabase']
1487 delta = chunkdata['delta']
1491 delta = chunkdata['delta']
1488
1492
1489 content.append(node)
1493 content.append(node)
1490
1494
1491 link = linkmapper(cs)
1495 link = linkmapper(cs)
1492 if node in self.nodemap:
1496 if node in self.nodemap:
1493 # this can happen if two branches make the same change
1497 # this can happen if two branches make the same change
1494 chain = node
1498 chain = node
1495 continue
1499 continue
1496
1500
1497 for p in (p1, p2):
1501 for p in (p1, p2):
1498 if p not in self.nodemap:
1502 if p not in self.nodemap:
1499 raise LookupError(p, self.indexfile,
1503 raise LookupError(p, self.indexfile,
1500 _('unknown parent'))
1504 _('unknown parent'))
1501
1505
1502 if deltabase not in self.nodemap:
1506 if deltabase not in self.nodemap:
1503 raise LookupError(deltabase, self.indexfile,
1507 raise LookupError(deltabase, self.indexfile,
1504 _('unknown delta base'))
1508 _('unknown delta base'))
1505
1509
1506 baserev = self.rev(deltabase)
1510 baserev = self.rev(deltabase)
1507
1511
1508 if baserev != nullrev and self.iscensored(baserev):
1512 if baserev != nullrev and self.iscensored(baserev):
1509 # if base is censored, delta must be full replacement in a
1513 # if base is censored, delta must be full replacement in a
1510 # single patch operation
1514 # single patch operation
1511 hlen = struct.calcsize(">lll")
1515 hlen = struct.calcsize(">lll")
1512 oldlen = self.rawsize(baserev)
1516 oldlen = self.rawsize(baserev)
1513 newlen = len(delta) - hlen
1517 newlen = len(delta) - hlen
1514 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1518 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1515 raise error.CensoredBaseError(self.indexfile,
1519 raise error.CensoredBaseError(self.indexfile,
1516 self.node(baserev))
1520 self.node(baserev))
1517
1521
1518 flags = REVIDX_DEFAULT_FLAGS
1522 flags = REVIDX_DEFAULT_FLAGS
1519 if self._peek_iscensored(baserev, delta, flush):
1523 if self._peek_iscensored(baserev, delta, flush):
1520 flags |= REVIDX_ISCENSORED
1524 flags |= REVIDX_ISCENSORED
1521
1525
1522 # We assume consumers of addrevisioncb will want to retrieve
1526 # We assume consumers of addrevisioncb will want to retrieve
1523 # the added revision, which will require a call to
1527 # the added revision, which will require a call to
1524 # revision(). revision() will fast path if there is a cache
1528 # revision(). revision() will fast path if there is a cache
1525 # hit. So, we tell _addrevision() to always cache in this case.
1529 # hit. So, we tell _addrevision() to always cache in this case.
1526 chain = self._addrevision(node, None, transaction, link,
1530 chain = self._addrevision(node, None, transaction, link,
1527 p1, p2, flags, (baserev, delta),
1531 p1, p2, flags, (baserev, delta),
1528 ifh, dfh,
1532 ifh, dfh,
1529 alwayscache=bool(addrevisioncb))
1533 alwayscache=bool(addrevisioncb))
1530
1534
1531 if addrevisioncb:
1535 if addrevisioncb:
1532 addrevisioncb(self, chain)
1536 addrevisioncb(self, chain)
1533
1537
1534 if not dfh and not self._inline:
1538 if not dfh and not self._inline:
1535 # addrevision switched from inline to conventional
1539 # addrevision switched from inline to conventional
1536 # reopen the index
1540 # reopen the index
1537 ifh.close()
1541 ifh.close()
1538 dfh = self.opener(self.datafile, "a+")
1542 dfh = self.opener(self.datafile, "a+")
1539 ifh = self.opener(self.indexfile, "a+")
1543 ifh = self.opener(self.indexfile, "a+")
1540 finally:
1544 finally:
1541 if dfh:
1545 if dfh:
1542 dfh.close()
1546 dfh.close()
1543 ifh.close()
1547 ifh.close()
1544
1548
1545 return content
1549 return content
1546
1550
1547 def iscensored(self, rev):
1551 def iscensored(self, rev):
1548 """Check if a file revision is censored."""
1552 """Check if a file revision is censored."""
1549 return False
1553 return False
1550
1554
1551 def _peek_iscensored(self, baserev, delta, flush):
1555 def _peek_iscensored(self, baserev, delta, flush):
1552 """Quickly check if a delta produces a censored revision."""
1556 """Quickly check if a delta produces a censored revision."""
1553 return False
1557 return False
1554
1558
1555 def getstrippoint(self, minlink):
1559 def getstrippoint(self, minlink):
1556 """find the minimum rev that must be stripped to strip the linkrev
1560 """find the minimum rev that must be stripped to strip the linkrev
1557
1561
1558 Returns a tuple containing the minimum rev and a set of all revs that
1562 Returns a tuple containing the minimum rev and a set of all revs that
1559 have linkrevs that will be broken by this strip.
1563 have linkrevs that will be broken by this strip.
1560 """
1564 """
1561 brokenrevs = set()
1565 brokenrevs = set()
1562 strippoint = len(self)
1566 strippoint = len(self)
1563
1567
1564 heads = {}
1568 heads = {}
1565 futurelargelinkrevs = set()
1569 futurelargelinkrevs = set()
1566 for head in self.headrevs():
1570 for head in self.headrevs():
1567 headlinkrev = self.linkrev(head)
1571 headlinkrev = self.linkrev(head)
1568 heads[head] = headlinkrev
1572 heads[head] = headlinkrev
1569 if headlinkrev >= minlink:
1573 if headlinkrev >= minlink:
1570 futurelargelinkrevs.add(headlinkrev)
1574 futurelargelinkrevs.add(headlinkrev)
1571
1575
1572 # This algorithm involves walking down the rev graph, starting at the
1576 # This algorithm involves walking down the rev graph, starting at the
1573 # heads. Since the revs are topologically sorted according to linkrev,
1577 # heads. Since the revs are topologically sorted according to linkrev,
1574 # once all head linkrevs are below the minlink, we know there are
1578 # once all head linkrevs are below the minlink, we know there are
1575 # no more revs that could have a linkrev greater than minlink.
1579 # no more revs that could have a linkrev greater than minlink.
1576 # So we can stop walking.
1580 # So we can stop walking.
1577 while futurelargelinkrevs:
1581 while futurelargelinkrevs:
1578 strippoint -= 1
1582 strippoint -= 1
1579 linkrev = heads.pop(strippoint)
1583 linkrev = heads.pop(strippoint)
1580
1584
1581 if linkrev < minlink:
1585 if linkrev < minlink:
1582 brokenrevs.add(strippoint)
1586 brokenrevs.add(strippoint)
1583 else:
1587 else:
1584 futurelargelinkrevs.remove(linkrev)
1588 futurelargelinkrevs.remove(linkrev)
1585
1589
1586 for p in self.parentrevs(strippoint):
1590 for p in self.parentrevs(strippoint):
1587 if p != nullrev:
1591 if p != nullrev:
1588 plinkrev = self.linkrev(p)
1592 plinkrev = self.linkrev(p)
1589 heads[p] = plinkrev
1593 heads[p] = plinkrev
1590 if plinkrev >= minlink:
1594 if plinkrev >= minlink:
1591 futurelargelinkrevs.add(plinkrev)
1595 futurelargelinkrevs.add(plinkrev)
1592
1596
1593 return strippoint, brokenrevs
1597 return strippoint, brokenrevs
1594
1598
1595 def strip(self, minlink, transaction):
1599 def strip(self, minlink, transaction):
1596 """truncate the revlog on the first revision with a linkrev >= minlink
1600 """truncate the revlog on the first revision with a linkrev >= minlink
1597
1601
1598 This function is called when we're stripping revision minlink and
1602 This function is called when we're stripping revision minlink and
1599 its descendants from the repository.
1603 its descendants from the repository.
1600
1604
1601 We have to remove all revisions with linkrev >= minlink, because
1605 We have to remove all revisions with linkrev >= minlink, because
1602 the equivalent changelog revisions will be renumbered after the
1606 the equivalent changelog revisions will be renumbered after the
1603 strip.
1607 strip.
1604
1608
1605 So we truncate the revlog on the first of these revisions, and
1609 So we truncate the revlog on the first of these revisions, and
1606 trust that the caller has saved the revisions that shouldn't be
1610 trust that the caller has saved the revisions that shouldn't be
1607 removed and that it'll re-add them after this truncation.
1611 removed and that it'll re-add them after this truncation.
1608 """
1612 """
1609 if len(self) == 0:
1613 if len(self) == 0:
1610 return
1614 return
1611
1615
1612 rev, _ = self.getstrippoint(minlink)
1616 rev, _ = self.getstrippoint(minlink)
1613 if rev == len(self):
1617 if rev == len(self):
1614 return
1618 return
1615
1619
1616 # first truncate the files on disk
1620 # first truncate the files on disk
1617 end = self.start(rev)
1621 end = self.start(rev)
1618 if not self._inline:
1622 if not self._inline:
1619 transaction.add(self.datafile, end)
1623 transaction.add(self.datafile, end)
1620 end = rev * self._io.size
1624 end = rev * self._io.size
1621 else:
1625 else:
1622 end += rev * self._io.size
1626 end += rev * self._io.size
1623
1627
1624 transaction.add(self.indexfile, end)
1628 transaction.add(self.indexfile, end)
1625
1629
1626 # then reset internal state in memory to forget those revisions
1630 # then reset internal state in memory to forget those revisions
1627 self._cache = None
1631 self._cache = None
1628 self._chaininfocache = {}
1632 self._chaininfocache = {}
1629 self._chunkclear()
1633 self._chunkclear()
1630 for x in xrange(rev, len(self)):
1634 for x in xrange(rev, len(self)):
1631 del self.nodemap[self.node(x)]
1635 del self.nodemap[self.node(x)]
1632
1636
1633 del self.index[rev:-1]
1637 del self.index[rev:-1]
1634
1638
1635 def checksize(self):
1639 def checksize(self):
1636 expected = 0
1640 expected = 0
1637 if len(self):
1641 if len(self):
1638 expected = max(0, self.end(len(self) - 1))
1642 expected = max(0, self.end(len(self) - 1))
1639
1643
1640 try:
1644 try:
1641 f = self.opener(self.datafile)
1645 f = self.opener(self.datafile)
1642 f.seek(0, 2)
1646 f.seek(0, 2)
1643 actual = f.tell()
1647 actual = f.tell()
1644 f.close()
1648 f.close()
1645 dd = actual - expected
1649 dd = actual - expected
1646 except IOError as inst:
1650 except IOError as inst:
1647 if inst.errno != errno.ENOENT:
1651 if inst.errno != errno.ENOENT:
1648 raise
1652 raise
1649 dd = 0
1653 dd = 0
1650
1654
1651 try:
1655 try:
1652 f = self.opener(self.indexfile)
1656 f = self.opener(self.indexfile)
1653 f.seek(0, 2)
1657 f.seek(0, 2)
1654 actual = f.tell()
1658 actual = f.tell()
1655 f.close()
1659 f.close()
1656 s = self._io.size
1660 s = self._io.size
1657 i = max(0, actual // s)
1661 i = max(0, actual // s)
1658 di = actual - (i * s)
1662 di = actual - (i * s)
1659 if self._inline:
1663 if self._inline:
1660 databytes = 0
1664 databytes = 0
1661 for r in self:
1665 for r in self:
1662 databytes += max(0, self.length(r))
1666 databytes += max(0, self.length(r))
1663 dd = 0
1667 dd = 0
1664 di = actual - len(self) * s - databytes
1668 di = actual - len(self) * s - databytes
1665 except IOError as inst:
1669 except IOError as inst:
1666 if inst.errno != errno.ENOENT:
1670 if inst.errno != errno.ENOENT:
1667 raise
1671 raise
1668 di = 0
1672 di = 0
1669
1673
1670 return (dd, di)
1674 return (dd, di)
1671
1675
1672 def files(self):
1676 def files(self):
1673 res = [self.indexfile]
1677 res = [self.indexfile]
1674 if not self._inline:
1678 if not self._inline:
1675 res.append(self.datafile)
1679 res.append(self.datafile)
1676 return res
1680 return res
General Comments 0
You need to be logged in to leave comments. Login now