##// END OF EJS Templates
revlog: seek to end of file before writing (issue4943)...
Gregory Szorc -
r27430:e240e914 stable
parent child Browse files
Show More
@@ -1,1679 +1,1694 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 import collections
15 import collections
16 import os
16 from node import bin, hex, nullid, nullrev
17 from node import bin, hex, nullid, nullrev
17 from i18n import _
18 from i18n import _
18 import ancestor, mdiff, parsers, error, util, templatefilters
19 import ancestor, mdiff, parsers, error, util, templatefilters
19 import struct, zlib, errno
20 import struct, zlib, errno
20
21
21 _pack = struct.pack
22 _pack = struct.pack
22 _unpack = struct.unpack
23 _unpack = struct.unpack
23 _compress = zlib.compress
24 _compress = zlib.compress
24 _decompress = zlib.decompress
25 _decompress = zlib.decompress
25 _sha = util.sha1
26 _sha = util.sha1
26
27
27 # revlog header flags
28 # revlog header flags
28 REVLOGV0 = 0
29 REVLOGV0 = 0
29 REVLOGNG = 1
30 REVLOGNG = 1
30 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGGENERALDELTA = (1 << 17)
32 REVLOGGENERALDELTA = (1 << 17)
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36
37
37 # revlog index flags
38 # revlog index flags
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41
42
42 # max size of revlog with inline data
43 # max size of revlog with inline data
43 _maxinline = 131072
44 _maxinline = 131072
44 _chunksize = 1048576
45 _chunksize = 1048576
45
46
46 RevlogError = error.RevlogError
47 RevlogError = error.RevlogError
47 LookupError = error.LookupError
48 LookupError = error.LookupError
48 CensoredNodeError = error.CensoredNodeError
49 CensoredNodeError = error.CensoredNodeError
49
50
50 def getoffset(q):
51 def getoffset(q):
51 return int(q >> 16)
52 return int(q >> 16)
52
53
53 def gettype(q):
54 def gettype(q):
54 return int(q & 0xFFFF)
55 return int(q & 0xFFFF)
55
56
56 def offset_type(offset, type):
57 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
58 return long(long(offset) << 16 | type)
58
59
59 _nullhash = _sha(nullid)
60 _nullhash = _sha(nullid)
60
61
61 def hash(text, p1, p2):
62 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
63 """generate a hash from the given text and its parent hashes
63
64
64 This hash combines both the current file contents and its history
65 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
66 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
67 content in the revision graph.
67 """
68 """
68 # As of now, if one of the parent node is null, p2 is null
69 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
70 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
71 # deep copy of a hash is faster than creating one
71 s = _nullhash.copy()
72 s = _nullhash.copy()
72 s.update(p1)
73 s.update(p1)
73 else:
74 else:
74 # none of the parent nodes are nullid
75 # none of the parent nodes are nullid
75 l = [p1, p2]
76 l = [p1, p2]
76 l.sort()
77 l.sort()
77 s = _sha(l[0])
78 s = _sha(l[0])
78 s.update(l[1])
79 s.update(l[1])
79 s.update(text)
80 s.update(text)
80 return s.digest()
81 return s.digest()
81
82
82 def decompress(bin):
83 def decompress(bin):
83 """ decompress the given input """
84 """ decompress the given input """
84 if not bin:
85 if not bin:
85 return bin
86 return bin
86 t = bin[0]
87 t = bin[0]
87 if t == '\0':
88 if t == '\0':
88 return bin
89 return bin
89 if t == 'x':
90 if t == 'x':
90 try:
91 try:
91 return _decompress(bin)
92 return _decompress(bin)
92 except zlib.error as e:
93 except zlib.error as e:
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 if t == 'u':
95 if t == 'u':
95 return bin[1:]
96 return bin[1:]
96 raise RevlogError(_("unknown compression type %r") % t)
97 raise RevlogError(_("unknown compression type %r") % t)
97
98
98 # index v0:
99 # index v0:
99 # 4 bytes: offset
100 # 4 bytes: offset
100 # 4 bytes: compressed length
101 # 4 bytes: compressed length
101 # 4 bytes: base rev
102 # 4 bytes: base rev
102 # 4 bytes: link rev
103 # 4 bytes: link rev
103 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 2 nodeid
105 # 20 bytes: parent 2 nodeid
105 # 20 bytes: nodeid
106 # 20 bytes: nodeid
106 indexformatv0 = ">4l20s20s20s"
107 indexformatv0 = ">4l20s20s20s"
107
108
108 class revlogoldio(object):
109 class revlogoldio(object):
109 def __init__(self):
110 def __init__(self):
110 self.size = struct.calcsize(indexformatv0)
111 self.size = struct.calcsize(indexformatv0)
111
112
112 def parseindex(self, data, inline):
113 def parseindex(self, data, inline):
113 s = self.size
114 s = self.size
114 index = []
115 index = []
115 nodemap = {nullid: nullrev}
116 nodemap = {nullid: nullrev}
116 n = off = 0
117 n = off = 0
117 l = len(data)
118 l = len(data)
118 while off + s <= l:
119 while off + s <= l:
119 cur = data[off:off + s]
120 cur = data[off:off + s]
120 off += s
121 off += s
121 e = _unpack(indexformatv0, cur)
122 e = _unpack(indexformatv0, cur)
122 # transform to revlogv1 format
123 # transform to revlogv1 format
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 index.append(e2)
126 index.append(e2)
126 nodemap[e[6]] = n
127 nodemap[e[6]] = n
127 n += 1
128 n += 1
128
129
129 # add the magic null revision at -1
130 # add the magic null revision at -1
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131
132
132 return index, nodemap, None
133 return index, nodemap, None
133
134
134 def packentry(self, entry, node, version, rev):
135 def packentry(self, entry, node, version, rev):
135 if gettype(entry[0]):
136 if gettype(entry[0]):
136 raise RevlogError(_("index entry flags need RevlogNG"))
137 raise RevlogError(_("index entry flags need RevlogNG"))
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 node(entry[5]), node(entry[6]), entry[7])
139 node(entry[5]), node(entry[6]), entry[7])
139 return _pack(indexformatv0, *e2)
140 return _pack(indexformatv0, *e2)
140
141
141 # index ng:
142 # index ng:
142 # 6 bytes: offset
143 # 6 bytes: offset
143 # 2 bytes: flags
144 # 2 bytes: flags
144 # 4 bytes: compressed length
145 # 4 bytes: compressed length
145 # 4 bytes: uncompressed length
146 # 4 bytes: uncompressed length
146 # 4 bytes: base rev
147 # 4 bytes: base rev
147 # 4 bytes: link rev
148 # 4 bytes: link rev
148 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 2 rev
150 # 4 bytes: parent 2 rev
150 # 32 bytes: nodeid
151 # 32 bytes: nodeid
151 indexformatng = ">Qiiiiii20s12x"
152 indexformatng = ">Qiiiiii20s12x"
152 versionformat = ">I"
153 versionformat = ">I"
153
154
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # signed integer)
156 # signed integer)
156 _maxentrysize = 0x7fffffff
157 _maxentrysize = 0x7fffffff
157
158
158 class revlogio(object):
159 class revlogio(object):
159 def __init__(self):
160 def __init__(self):
160 self.size = struct.calcsize(indexformatng)
161 self.size = struct.calcsize(indexformatng)
161
162
162 def parseindex(self, data, inline):
163 def parseindex(self, data, inline):
163 # call the C implementation to parse the index data
164 # call the C implementation to parse the index data
164 index, cache = parsers.parse_index2(data, inline)
165 index, cache = parsers.parse_index2(data, inline)
165 return index, getattr(index, 'nodemap', None), cache
166 return index, getattr(index, 'nodemap', None), cache
166
167
167 def packentry(self, entry, node, version, rev):
168 def packentry(self, entry, node, version, rev):
168 p = _pack(indexformatng, *entry)
169 p = _pack(indexformatng, *entry)
169 if rev == 0:
170 if rev == 0:
170 p = _pack(versionformat, version) + p[4:]
171 p = _pack(versionformat, version) + p[4:]
171 return p
172 return p
172
173
173 class revlog(object):
174 class revlog(object):
174 """
175 """
175 the underlying revision storage object
176 the underlying revision storage object
176
177
177 A revlog consists of two parts, an index and the revision data.
178 A revlog consists of two parts, an index and the revision data.
178
179
179 The index is a file with a fixed record size containing
180 The index is a file with a fixed record size containing
180 information on each revision, including its nodeid (hash), the
181 information on each revision, including its nodeid (hash), the
181 nodeids of its parents, the position and offset of its data within
182 nodeids of its parents, the position and offset of its data within
182 the data file, and the revision it's based on. Finally, each entry
183 the data file, and the revision it's based on. Finally, each entry
183 contains a linkrev entry that can serve as a pointer to external
184 contains a linkrev entry that can serve as a pointer to external
184 data.
185 data.
185
186
186 The revision data itself is a linear collection of data chunks.
187 The revision data itself is a linear collection of data chunks.
187 Each chunk represents a revision and is usually represented as a
188 Each chunk represents a revision and is usually represented as a
188 delta against the previous chunk. To bound lookup time, runs of
189 delta against the previous chunk. To bound lookup time, runs of
189 deltas are limited to about 2 times the length of the original
190 deltas are limited to about 2 times the length of the original
190 version data. This makes retrieval of a version proportional to
191 version data. This makes retrieval of a version proportional to
191 its size, or O(1) relative to the number of revisions.
192 its size, or O(1) relative to the number of revisions.
192
193
193 Both pieces of the revlog are written to in an append-only
194 Both pieces of the revlog are written to in an append-only
194 fashion, which means we never need to rewrite a file to insert or
195 fashion, which means we never need to rewrite a file to insert or
195 remove data, and can use some simple techniques to avoid the need
196 remove data, and can use some simple techniques to avoid the need
196 for locking while reading.
197 for locking while reading.
197 """
198 """
198 def __init__(self, opener, indexfile):
199 def __init__(self, opener, indexfile):
199 """
200 """
200 create a revlog object
201 create a revlog object
201
202
202 opener is a function that abstracts the file opening operation
203 opener is a function that abstracts the file opening operation
203 and can be used to implement COW semantics or the like.
204 and can be used to implement COW semantics or the like.
204 """
205 """
205 self.indexfile = indexfile
206 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d"
207 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener
208 self.opener = opener
208 self._cache = None
209 self._cache = None
209 self._basecache = None
210 self._basecache = None
210 self._chunkcache = (0, '')
211 self._chunkcache = (0, '')
211 self._chunkcachesize = 65536
212 self._chunkcachesize = 65536
212 self._maxchainlen = None
213 self._maxchainlen = None
213 self._aggressivemergedeltas = False
214 self._aggressivemergedeltas = False
214 self.index = []
215 self.index = []
215 self._pcache = {}
216 self._pcache = {}
216 self._nodecache = {nullid: nullrev}
217 self._nodecache = {nullid: nullrev}
217 self._nodepos = None
218 self._nodepos = None
218
219
219 v = REVLOG_DEFAULT_VERSION
220 v = REVLOG_DEFAULT_VERSION
220 opts = getattr(opener, 'options', None)
221 opts = getattr(opener, 'options', None)
221 if opts is not None:
222 if opts is not None:
222 if 'revlogv1' in opts:
223 if 'revlogv1' in opts:
223 if 'generaldelta' in opts:
224 if 'generaldelta' in opts:
224 v |= REVLOGGENERALDELTA
225 v |= REVLOGGENERALDELTA
225 else:
226 else:
226 v = 0
227 v = 0
227 if 'chunkcachesize' in opts:
228 if 'chunkcachesize' in opts:
228 self._chunkcachesize = opts['chunkcachesize']
229 self._chunkcachesize = opts['chunkcachesize']
229 if 'maxchainlen' in opts:
230 if 'maxchainlen' in opts:
230 self._maxchainlen = opts['maxchainlen']
231 self._maxchainlen = opts['maxchainlen']
231 if 'aggressivemergedeltas' in opts:
232 if 'aggressivemergedeltas' in opts:
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233
234
234 if self._chunkcachesize <= 0:
235 if self._chunkcachesize <= 0:
235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 'than 0') % self._chunkcachesize)
237 'than 0') % self._chunkcachesize)
237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 'of 2') % self._chunkcachesize)
240 'of 2') % self._chunkcachesize)
240
241
241 indexdata = ''
242 indexdata = ''
242 self._initempty = True
243 self._initempty = True
243 try:
244 try:
244 f = self.opener(self.indexfile)
245 f = self.opener(self.indexfile)
245 indexdata = f.read()
246 indexdata = f.read()
246 f.close()
247 f.close()
247 if len(indexdata) > 0:
248 if len(indexdata) > 0:
248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 v = struct.unpack(versionformat, indexdata[:4])[0]
249 self._initempty = False
250 self._initempty = False
250 except IOError as inst:
251 except IOError as inst:
251 if inst.errno != errno.ENOENT:
252 if inst.errno != errno.ENOENT:
252 raise
253 raise
253
254
254 self.version = v
255 self.version = v
255 self._inline = v & REVLOGNGINLINEDATA
256 self._inline = v & REVLOGNGINLINEDATA
256 self._generaldelta = v & REVLOGGENERALDELTA
257 self._generaldelta = v & REVLOGGENERALDELTA
257 flags = v & ~0xFFFF
258 flags = v & ~0xFFFF
258 fmt = v & 0xFFFF
259 fmt = v & 0xFFFF
259 if fmt == REVLOGV0 and flags:
260 if fmt == REVLOGV0 and flags:
260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 % (self.indexfile, flags >> 16))
262 % (self.indexfile, flags >> 16))
262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 % (self.indexfile, flags >> 16))
265 % (self.indexfile, flags >> 16))
265 elif fmt > REVLOGNG:
266 elif fmt > REVLOGNG:
266 raise RevlogError(_("index %s unknown format %d")
267 raise RevlogError(_("index %s unknown format %d")
267 % (self.indexfile, fmt))
268 % (self.indexfile, fmt))
268
269
269 self._io = revlogio()
270 self._io = revlogio()
270 if self.version == REVLOGV0:
271 if self.version == REVLOGV0:
271 self._io = revlogoldio()
272 self._io = revlogoldio()
272 try:
273 try:
273 d = self._io.parseindex(indexdata, self._inline)
274 d = self._io.parseindex(indexdata, self._inline)
274 except (ValueError, IndexError):
275 except (ValueError, IndexError):
275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 self.index, nodemap, self._chunkcache = d
277 self.index, nodemap, self._chunkcache = d
277 if nodemap is not None:
278 if nodemap is not None:
278 self.nodemap = self._nodecache = nodemap
279 self.nodemap = self._nodecache = nodemap
279 if not self._chunkcache:
280 if not self._chunkcache:
280 self._chunkclear()
281 self._chunkclear()
281 # revnum -> (chain-length, sum-delta-length)
282 # revnum -> (chain-length, sum-delta-length)
282 self._chaininfocache = {}
283 self._chaininfocache = {}
283
284
284 def tip(self):
285 def tip(self):
285 return self.node(len(self.index) - 2)
286 return self.node(len(self.index) - 2)
286 def __contains__(self, rev):
287 def __contains__(self, rev):
287 return 0 <= rev < len(self)
288 return 0 <= rev < len(self)
288 def __len__(self):
289 def __len__(self):
289 return len(self.index) - 1
290 return len(self.index) - 1
290 def __iter__(self):
291 def __iter__(self):
291 return iter(xrange(len(self)))
292 return iter(xrange(len(self)))
292 def revs(self, start=0, stop=None):
293 def revs(self, start=0, stop=None):
293 """iterate over all rev in this revlog (from start to stop)"""
294 """iterate over all rev in this revlog (from start to stop)"""
294 step = 1
295 step = 1
295 if stop is not None:
296 if stop is not None:
296 if start > stop:
297 if start > stop:
297 step = -1
298 step = -1
298 stop += step
299 stop += step
299 else:
300 else:
300 stop = len(self)
301 stop = len(self)
301 return xrange(start, stop, step)
302 return xrange(start, stop, step)
302
303
303 @util.propertycache
304 @util.propertycache
304 def nodemap(self):
305 def nodemap(self):
305 self.rev(self.node(0))
306 self.rev(self.node(0))
306 return self._nodecache
307 return self._nodecache
307
308
308 def hasnode(self, node):
309 def hasnode(self, node):
309 try:
310 try:
310 self.rev(node)
311 self.rev(node)
311 return True
312 return True
312 except KeyError:
313 except KeyError:
313 return False
314 return False
314
315
315 def clearcaches(self):
316 def clearcaches(self):
316 try:
317 try:
317 self._nodecache.clearcaches()
318 self._nodecache.clearcaches()
318 except AttributeError:
319 except AttributeError:
319 self._nodecache = {nullid: nullrev}
320 self._nodecache = {nullid: nullrev}
320 self._nodepos = None
321 self._nodepos = None
321
322
322 def rev(self, node):
323 def rev(self, node):
323 try:
324 try:
324 return self._nodecache[node]
325 return self._nodecache[node]
325 except TypeError:
326 except TypeError:
326 raise
327 raise
327 except RevlogError:
328 except RevlogError:
328 # parsers.c radix tree lookup failed
329 # parsers.c radix tree lookup failed
329 raise LookupError(node, self.indexfile, _('no node'))
330 raise LookupError(node, self.indexfile, _('no node'))
330 except KeyError:
331 except KeyError:
331 # pure python cache lookup failed
332 # pure python cache lookup failed
332 n = self._nodecache
333 n = self._nodecache
333 i = self.index
334 i = self.index
334 p = self._nodepos
335 p = self._nodepos
335 if p is None:
336 if p is None:
336 p = len(i) - 2
337 p = len(i) - 2
337 for r in xrange(p, -1, -1):
338 for r in xrange(p, -1, -1):
338 v = i[r][7]
339 v = i[r][7]
339 n[v] = r
340 n[v] = r
340 if v == node:
341 if v == node:
341 self._nodepos = r - 1
342 self._nodepos = r - 1
342 return r
343 return r
343 raise LookupError(node, self.indexfile, _('no node'))
344 raise LookupError(node, self.indexfile, _('no node'))
344
345
345 def node(self, rev):
346 def node(self, rev):
346 return self.index[rev][7]
347 return self.index[rev][7]
347 def linkrev(self, rev):
348 def linkrev(self, rev):
348 return self.index[rev][4]
349 return self.index[rev][4]
349 def parents(self, node):
350 def parents(self, node):
350 i = self.index
351 i = self.index
351 d = i[self.rev(node)]
352 d = i[self.rev(node)]
352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 def parentrevs(self, rev):
354 def parentrevs(self, rev):
354 return self.index[rev][5:7]
355 return self.index[rev][5:7]
355 def start(self, rev):
356 def start(self, rev):
356 return int(self.index[rev][0] >> 16)
357 return int(self.index[rev][0] >> 16)
357 def end(self, rev):
358 def end(self, rev):
358 return self.start(rev) + self.length(rev)
359 return self.start(rev) + self.length(rev)
359 def length(self, rev):
360 def length(self, rev):
360 return self.index[rev][1]
361 return self.index[rev][1]
361 def chainbase(self, rev):
362 def chainbase(self, rev):
362 index = self.index
363 index = self.index
363 base = index[rev][3]
364 base = index[rev][3]
364 while base != rev:
365 while base != rev:
365 rev = base
366 rev = base
366 base = index[rev][3]
367 base = index[rev][3]
367 return base
368 return base
368 def chainlen(self, rev):
369 def chainlen(self, rev):
369 return self._chaininfo(rev)[0]
370 return self._chaininfo(rev)[0]
370
371
371 def _chaininfo(self, rev):
372 def _chaininfo(self, rev):
372 chaininfocache = self._chaininfocache
373 chaininfocache = self._chaininfocache
373 if rev in chaininfocache:
374 if rev in chaininfocache:
374 return chaininfocache[rev]
375 return chaininfocache[rev]
375 index = self.index
376 index = self.index
376 generaldelta = self._generaldelta
377 generaldelta = self._generaldelta
377 iterrev = rev
378 iterrev = rev
378 e = index[iterrev]
379 e = index[iterrev]
379 clen = 0
380 clen = 0
380 compresseddeltalen = 0
381 compresseddeltalen = 0
381 while iterrev != e[3]:
382 while iterrev != e[3]:
382 clen += 1
383 clen += 1
383 compresseddeltalen += e[1]
384 compresseddeltalen += e[1]
384 if generaldelta:
385 if generaldelta:
385 iterrev = e[3]
386 iterrev = e[3]
386 else:
387 else:
387 iterrev -= 1
388 iterrev -= 1
388 if iterrev in chaininfocache:
389 if iterrev in chaininfocache:
389 t = chaininfocache[iterrev]
390 t = chaininfocache[iterrev]
390 clen += t[0]
391 clen += t[0]
391 compresseddeltalen += t[1]
392 compresseddeltalen += t[1]
392 break
393 break
393 e = index[iterrev]
394 e = index[iterrev]
394 else:
395 else:
395 # Add text length of base since decompressing that also takes
396 # Add text length of base since decompressing that also takes
396 # work. For cache hits the length is already included.
397 # work. For cache hits the length is already included.
397 compresseddeltalen += e[1]
398 compresseddeltalen += e[1]
398 r = (clen, compresseddeltalen)
399 r = (clen, compresseddeltalen)
399 chaininfocache[rev] = r
400 chaininfocache[rev] = r
400 return r
401 return r
401
402
402 def flags(self, rev):
403 def flags(self, rev):
403 return self.index[rev][0] & 0xFFFF
404 return self.index[rev][0] & 0xFFFF
404 def rawsize(self, rev):
405 def rawsize(self, rev):
405 """return the length of the uncompressed text for a given revision"""
406 """return the length of the uncompressed text for a given revision"""
406 l = self.index[rev][2]
407 l = self.index[rev][2]
407 if l >= 0:
408 if l >= 0:
408 return l
409 return l
409
410
410 t = self.revision(self.node(rev))
411 t = self.revision(self.node(rev))
411 return len(t)
412 return len(t)
412 size = rawsize
413 size = rawsize
413
414
414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 def ancestors(self, revs, stoprev=0, inclusive=False):
415 """Generate the ancestors of 'revs' in reverse topological order.
416 """Generate the ancestors of 'revs' in reverse topological order.
416 Does not generate revs lower than stoprev.
417 Does not generate revs lower than stoprev.
417
418
418 See the documentation for ancestor.lazyancestors for more details."""
419 See the documentation for ancestor.lazyancestors for more details."""
419
420
420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 inclusive=inclusive)
422 inclusive=inclusive)
422
423
423 def descendants(self, revs):
424 def descendants(self, revs):
424 """Generate the descendants of 'revs' in revision order.
425 """Generate the descendants of 'revs' in revision order.
425
426
426 Yield a sequence of revision numbers starting with a child of
427 Yield a sequence of revision numbers starting with a child of
427 some rev in revs, i.e., each revision is *not* considered a
428 some rev in revs, i.e., each revision is *not* considered a
428 descendant of itself. Results are ordered by revision number (a
429 descendant of itself. Results are ordered by revision number (a
429 topological sort)."""
430 topological sort)."""
430 first = min(revs)
431 first = min(revs)
431 if first == nullrev:
432 if first == nullrev:
432 for i in self:
433 for i in self:
433 yield i
434 yield i
434 return
435 return
435
436
436 seen = set(revs)
437 seen = set(revs)
437 for i in self.revs(start=first + 1):
438 for i in self.revs(start=first + 1):
438 for x in self.parentrevs(i):
439 for x in self.parentrevs(i):
439 if x != nullrev and x in seen:
440 if x != nullrev and x in seen:
440 seen.add(i)
441 seen.add(i)
441 yield i
442 yield i
442 break
443 break
443
444
444 def findcommonmissing(self, common=None, heads=None):
445 def findcommonmissing(self, common=None, heads=None):
445 """Return a tuple of the ancestors of common and the ancestors of heads
446 """Return a tuple of the ancestors of common and the ancestors of heads
446 that are not ancestors of common. In revset terminology, we return the
447 that are not ancestors of common. In revset terminology, we return the
447 tuple:
448 tuple:
448
449
449 ::common, (::heads) - (::common)
450 ::common, (::heads) - (::common)
450
451
451 The list is sorted by revision number, meaning it is
452 The list is sorted by revision number, meaning it is
452 topologically sorted.
453 topologically sorted.
453
454
454 'heads' and 'common' are both lists of node IDs. If heads is
455 'heads' and 'common' are both lists of node IDs. If heads is
455 not supplied, uses all of the revlog's heads. If common is not
456 not supplied, uses all of the revlog's heads. If common is not
456 supplied, uses nullid."""
457 supplied, uses nullid."""
457 if common is None:
458 if common is None:
458 common = [nullid]
459 common = [nullid]
459 if heads is None:
460 if heads is None:
460 heads = self.heads()
461 heads = self.heads()
461
462
462 common = [self.rev(n) for n in common]
463 common = [self.rev(n) for n in common]
463 heads = [self.rev(n) for n in heads]
464 heads = [self.rev(n) for n in heads]
464
465
465 # we want the ancestors, but inclusive
466 # we want the ancestors, but inclusive
466 class lazyset(object):
467 class lazyset(object):
467 def __init__(self, lazyvalues):
468 def __init__(self, lazyvalues):
468 self.addedvalues = set()
469 self.addedvalues = set()
469 self.lazyvalues = lazyvalues
470 self.lazyvalues = lazyvalues
470
471
471 def __contains__(self, value):
472 def __contains__(self, value):
472 return value in self.addedvalues or value in self.lazyvalues
473 return value in self.addedvalues or value in self.lazyvalues
473
474
474 def __iter__(self):
475 def __iter__(self):
475 added = self.addedvalues
476 added = self.addedvalues
476 for r in added:
477 for r in added:
477 yield r
478 yield r
478 for r in self.lazyvalues:
479 for r in self.lazyvalues:
479 if not r in added:
480 if not r in added:
480 yield r
481 yield r
481
482
482 def add(self, value):
483 def add(self, value):
483 self.addedvalues.add(value)
484 self.addedvalues.add(value)
484
485
485 def update(self, values):
486 def update(self, values):
486 self.addedvalues.update(values)
487 self.addedvalues.update(values)
487
488
488 has = lazyset(self.ancestors(common))
489 has = lazyset(self.ancestors(common))
489 has.add(nullrev)
490 has.add(nullrev)
490 has.update(common)
491 has.update(common)
491
492
492 # take all ancestors from heads that aren't in has
493 # take all ancestors from heads that aren't in has
493 missing = set()
494 missing = set()
494 visit = collections.deque(r for r in heads if r not in has)
495 visit = collections.deque(r for r in heads if r not in has)
495 while visit:
496 while visit:
496 r = visit.popleft()
497 r = visit.popleft()
497 if r in missing:
498 if r in missing:
498 continue
499 continue
499 else:
500 else:
500 missing.add(r)
501 missing.add(r)
501 for p in self.parentrevs(r):
502 for p in self.parentrevs(r):
502 if p not in has:
503 if p not in has:
503 visit.append(p)
504 visit.append(p)
504 missing = list(missing)
505 missing = list(missing)
505 missing.sort()
506 missing.sort()
506 return has, [self.node(r) for r in missing]
507 return has, [self.node(r) for r in missing]
507
508
508 def incrementalmissingrevs(self, common=None):
509 def incrementalmissingrevs(self, common=None):
509 """Return an object that can be used to incrementally compute the
510 """Return an object that can be used to incrementally compute the
510 revision numbers of the ancestors of arbitrary sets that are not
511 revision numbers of the ancestors of arbitrary sets that are not
511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 ancestors of common. This is an ancestor.incrementalmissingancestors
512 object.
513 object.
513
514
514 'common' is a list of revision numbers. If common is not supplied, uses
515 'common' is a list of revision numbers. If common is not supplied, uses
515 nullrev.
516 nullrev.
516 """
517 """
517 if common is None:
518 if common is None:
518 common = [nullrev]
519 common = [nullrev]
519
520
520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521
522
522 def findmissingrevs(self, common=None, heads=None):
523 def findmissingrevs(self, common=None, heads=None):
523 """Return the revision numbers of the ancestors of heads that
524 """Return the revision numbers of the ancestors of heads that
524 are not ancestors of common.
525 are not ancestors of common.
525
526
526 More specifically, return a list of revision numbers corresponding to
527 More specifically, return a list of revision numbers corresponding to
527 nodes N such that every N satisfies the following constraints:
528 nodes N such that every N satisfies the following constraints:
528
529
529 1. N is an ancestor of some node in 'heads'
530 1. N is an ancestor of some node in 'heads'
530 2. N is not an ancestor of any node in 'common'
531 2. N is not an ancestor of any node in 'common'
531
532
532 The list is sorted by revision number, meaning it is
533 The list is sorted by revision number, meaning it is
533 topologically sorted.
534 topologically sorted.
534
535
535 'heads' and 'common' are both lists of revision numbers. If heads is
536 'heads' and 'common' are both lists of revision numbers. If heads is
536 not supplied, uses all of the revlog's heads. If common is not
537 not supplied, uses all of the revlog's heads. If common is not
537 supplied, uses nullid."""
538 supplied, uses nullid."""
538 if common is None:
539 if common is None:
539 common = [nullrev]
540 common = [nullrev]
540 if heads is None:
541 if heads is None:
541 heads = self.headrevs()
542 heads = self.headrevs()
542
543
543 inc = self.incrementalmissingrevs(common=common)
544 inc = self.incrementalmissingrevs(common=common)
544 return inc.missingancestors(heads)
545 return inc.missingancestors(heads)
545
546
546 def findmissing(self, common=None, heads=None):
547 def findmissing(self, common=None, heads=None):
547 """Return the ancestors of heads that are not ancestors of common.
548 """Return the ancestors of heads that are not ancestors of common.
548
549
549 More specifically, return a list of nodes N such that every N
550 More specifically, return a list of nodes N such that every N
550 satisfies the following constraints:
551 satisfies the following constraints:
551
552
552 1. N is an ancestor of some node in 'heads'
553 1. N is an ancestor of some node in 'heads'
553 2. N is not an ancestor of any node in 'common'
554 2. N is not an ancestor of any node in 'common'
554
555
555 The list is sorted by revision number, meaning it is
556 The list is sorted by revision number, meaning it is
556 topologically sorted.
557 topologically sorted.
557
558
558 'heads' and 'common' are both lists of node IDs. If heads is
559 'heads' and 'common' are both lists of node IDs. If heads is
559 not supplied, uses all of the revlog's heads. If common is not
560 not supplied, uses all of the revlog's heads. If common is not
560 supplied, uses nullid."""
561 supplied, uses nullid."""
561 if common is None:
562 if common is None:
562 common = [nullid]
563 common = [nullid]
563 if heads is None:
564 if heads is None:
564 heads = self.heads()
565 heads = self.heads()
565
566
566 common = [self.rev(n) for n in common]
567 common = [self.rev(n) for n in common]
567 heads = [self.rev(n) for n in heads]
568 heads = [self.rev(n) for n in heads]
568
569
569 inc = self.incrementalmissingrevs(common=common)
570 inc = self.incrementalmissingrevs(common=common)
570 return [self.node(r) for r in inc.missingancestors(heads)]
571 return [self.node(r) for r in inc.missingancestors(heads)]
571
572
572 def nodesbetween(self, roots=None, heads=None):
573 def nodesbetween(self, roots=None, heads=None):
573 """Return a topological path from 'roots' to 'heads'.
574 """Return a topological path from 'roots' to 'heads'.
574
575
575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 topologically sorted list of all nodes N that satisfy both of
577 topologically sorted list of all nodes N that satisfy both of
577 these constraints:
578 these constraints:
578
579
579 1. N is a descendant of some node in 'roots'
580 1. N is a descendant of some node in 'roots'
580 2. N is an ancestor of some node in 'heads'
581 2. N is an ancestor of some node in 'heads'
581
582
582 Every node is considered to be both a descendant and an ancestor
583 Every node is considered to be both a descendant and an ancestor
583 of itself, so every reachable node in 'roots' and 'heads' will be
584 of itself, so every reachable node in 'roots' and 'heads' will be
584 included in 'nodes'.
585 included in 'nodes'.
585
586
586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 subset of 'roots' that is returned in 'nodes'. Likewise,
588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589 'outheads' is the subset of 'heads' that is also in 'nodes'.
589
590
590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 unspecified, uses nullid as the only root. If 'heads' is
592 unspecified, uses nullid as the only root. If 'heads' is
592 unspecified, uses list of all of the revlog's heads."""
593 unspecified, uses list of all of the revlog's heads."""
593 nonodes = ([], [], [])
594 nonodes = ([], [], [])
594 if roots is not None:
595 if roots is not None:
595 roots = list(roots)
596 roots = list(roots)
596 if not roots:
597 if not roots:
597 return nonodes
598 return nonodes
598 lowestrev = min([self.rev(n) for n in roots])
599 lowestrev = min([self.rev(n) for n in roots])
599 else:
600 else:
600 roots = [nullid] # Everybody's a descendant of nullid
601 roots = [nullid] # Everybody's a descendant of nullid
601 lowestrev = nullrev
602 lowestrev = nullrev
602 if (lowestrev == nullrev) and (heads is None):
603 if (lowestrev == nullrev) and (heads is None):
603 # We want _all_ the nodes!
604 # We want _all_ the nodes!
604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 if heads is None:
606 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
607 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
608 # node.
608 highestrev = len(self) - 1
609 highestrev = len(self) - 1
609 # Set ancestors to None to signal that every node is an ancestor.
610 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
611 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
612 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
613 heads = {}
613 else:
614 else:
614 heads = list(heads)
615 heads = list(heads)
615 if not heads:
616 if not heads:
616 return nonodes
617 return nonodes
617 ancestors = set()
618 ancestors = set()
618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
620 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
621 # find from roots.
621 heads = dict.fromkeys(heads, False)
622 heads = dict.fromkeys(heads, False)
622 # Start at the top and keep marking parents until we're done.
623 # Start at the top and keep marking parents until we're done.
623 nodestotag = set(heads)
624 nodestotag = set(heads)
624 # Remember where the top was so we can use it as a limit later.
625 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
626 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
627 while nodestotag:
627 # grab a node to tag
628 # grab a node to tag
628 n = nodestotag.pop()
629 n = nodestotag.pop()
629 # Never tag nullid
630 # Never tag nullid
630 if n == nullid:
631 if n == nullid:
631 continue
632 continue
632 # A node's revision number represents its place in a
633 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
634 # topologically sorted list of nodes.
634 r = self.rev(n)
635 r = self.rev(n)
635 if r >= lowestrev:
636 if r >= lowestrev:
636 if n not in ancestors:
637 if n not in ancestors:
637 # If we are possibly a descendant of one of the roots
638 # If we are possibly a descendant of one of the roots
638 # and we haven't already been marked as an ancestor
639 # and we haven't already been marked as an ancestor
639 ancestors.add(n) # Mark as ancestor
640 ancestors.add(n) # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
641 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.update([p for p in self.parents(n) if
642 nodestotag.update([p for p in self.parents(n) if
642 p != nullid])
643 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
644 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
645 # So it is, real heads should not be the ancestors of
645 # any other heads.
646 # any other heads.
646 heads.pop(n)
647 heads.pop(n)
647 if not ancestors:
648 if not ancestors:
648 return nonodes
649 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
650 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
651 # roots that are not ancestors.
651
652
652 # If one of the roots was nullid, everything is included anyway.
653 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
654 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
655 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
656 # include roots that aren't ancestors.
656
657
657 # Filter out roots that aren't ancestors of heads
658 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
659 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
660 # Recompute the lowest revision
660 if roots:
661 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
662 lowestrev = min([self.rev(n) for n in roots])
662 else:
663 else:
663 # No more roots? Return empty list
664 # No more roots? Return empty list
664 return nonodes
665 return nonodes
665 else:
666 else:
666 # We are descending from nullid, and don't need to care about
667 # We are descending from nullid, and don't need to care about
667 # any other roots.
668 # any other roots.
668 lowestrev = nullrev
669 lowestrev = nullrev
669 roots = [nullid]
670 roots = [nullid]
670 # Transform our roots list into a set.
671 # Transform our roots list into a set.
671 descendants = set(roots)
672 descendants = set(roots)
672 # Also, keep the original roots so we can filter out roots that aren't
673 # Also, keep the original roots so we can filter out roots that aren't
673 # 'real' roots (i.e. are descended from other roots).
674 # 'real' roots (i.e. are descended from other roots).
674 roots = descendants.copy()
675 roots = descendants.copy()
675 # Our topologically sorted list of output nodes.
676 # Our topologically sorted list of output nodes.
676 orderedout = []
677 orderedout = []
677 # Don't start at nullid since we don't want nullid in our output list,
678 # Don't start at nullid since we don't want nullid in our output list,
678 # and if nullid shows up in descendants, empty parents will look like
679 # and if nullid shows up in descendants, empty parents will look like
679 # they're descendants.
680 # they're descendants.
680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 n = self.node(r)
682 n = self.node(r)
682 isdescendant = False
683 isdescendant = False
683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 isdescendant = True
685 isdescendant = True
685 elif n in descendants:
686 elif n in descendants:
686 # n is already a descendant
687 # n is already a descendant
687 isdescendant = True
688 isdescendant = True
688 # This check only needs to be done here because all the roots
689 # This check only needs to be done here because all the roots
689 # will start being marked is descendants before the loop.
690 # will start being marked is descendants before the loop.
690 if n in roots:
691 if n in roots:
691 # If n was a root, check if it's a 'real' root.
692 # If n was a root, check if it's a 'real' root.
692 p = tuple(self.parents(n))
693 p = tuple(self.parents(n))
693 # If any of its parents are descendants, it's not a root.
694 # If any of its parents are descendants, it's not a root.
694 if (p[0] in descendants) or (p[1] in descendants):
695 if (p[0] in descendants) or (p[1] in descendants):
695 roots.remove(n)
696 roots.remove(n)
696 else:
697 else:
697 p = tuple(self.parents(n))
698 p = tuple(self.parents(n))
698 # A node is a descendant if either of its parents are
699 # A node is a descendant if either of its parents are
699 # descendants. (We seeded the dependents list with the roots
700 # descendants. (We seeded the dependents list with the roots
700 # up there, remember?)
701 # up there, remember?)
701 if (p[0] in descendants) or (p[1] in descendants):
702 if (p[0] in descendants) or (p[1] in descendants):
702 descendants.add(n)
703 descendants.add(n)
703 isdescendant = True
704 isdescendant = True
704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 # Only include nodes that are both descendants and ancestors.
706 # Only include nodes that are both descendants and ancestors.
706 orderedout.append(n)
707 orderedout.append(n)
707 if (ancestors is not None) and (n in heads):
708 if (ancestors is not None) and (n in heads):
708 # We're trying to figure out which heads are reachable
709 # We're trying to figure out which heads are reachable
709 # from roots.
710 # from roots.
710 # Mark this head as having been reached
711 # Mark this head as having been reached
711 heads[n] = True
712 heads[n] = True
712 elif ancestors is None:
713 elif ancestors is None:
713 # Otherwise, we're trying to discover the heads.
714 # Otherwise, we're trying to discover the heads.
714 # Assume this is a head because if it isn't, the next step
715 # Assume this is a head because if it isn't, the next step
715 # will eventually remove it.
716 # will eventually remove it.
716 heads[n] = True
717 heads[n] = True
717 # But, obviously its parents aren't.
718 # But, obviously its parents aren't.
718 for p in self.parents(n):
719 for p in self.parents(n):
719 heads.pop(p, None)
720 heads.pop(p, None)
720 heads = [n for n, flag in heads.iteritems() if flag]
721 heads = [n for n, flag in heads.iteritems() if flag]
721 roots = list(roots)
722 roots = list(roots)
722 assert orderedout
723 assert orderedout
723 assert roots
724 assert roots
724 assert heads
725 assert heads
725 return (orderedout, roots, heads)
726 return (orderedout, roots, heads)
726
727
727 def headrevs(self):
728 def headrevs(self):
728 try:
729 try:
729 return self.index.headrevs()
730 return self.index.headrevs()
730 except AttributeError:
731 except AttributeError:
731 return self._headrevs()
732 return self._headrevs()
732
733
733 def computephases(self, roots):
734 def computephases(self, roots):
734 return self.index.computephasesmapsets(roots)
735 return self.index.computephasesmapsets(roots)
735
736
736 def _headrevs(self):
737 def _headrevs(self):
737 count = len(self)
738 count = len(self)
738 if not count:
739 if not count:
739 return [nullrev]
740 return [nullrev]
740 # we won't iter over filtered rev so nobody is a head at start
741 # we won't iter over filtered rev so nobody is a head at start
741 ishead = [0] * (count + 1)
742 ishead = [0] * (count + 1)
742 index = self.index
743 index = self.index
743 for r in self:
744 for r in self:
744 ishead[r] = 1 # I may be an head
745 ishead[r] = 1 # I may be an head
745 e = index[r]
746 e = index[r]
746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 return [r for r, val in enumerate(ishead) if val]
748 return [r for r, val in enumerate(ishead) if val]
748
749
749 def heads(self, start=None, stop=None):
750 def heads(self, start=None, stop=None):
750 """return the list of all nodes that have no children
751 """return the list of all nodes that have no children
751
752
752 if start is specified, only heads that are descendants of
753 if start is specified, only heads that are descendants of
753 start will be returned
754 start will be returned
754 if stop is specified, it will consider all the revs from stop
755 if stop is specified, it will consider all the revs from stop
755 as if they had no children
756 as if they had no children
756 """
757 """
757 if start is None and stop is None:
758 if start is None and stop is None:
758 if not len(self):
759 if not len(self):
759 return [nullid]
760 return [nullid]
760 return [self.node(r) for r in self.headrevs()]
761 return [self.node(r) for r in self.headrevs()]
761
762
762 if start is None:
763 if start is None:
763 start = nullid
764 start = nullid
764 if stop is None:
765 if stop is None:
765 stop = []
766 stop = []
766 stoprevs = set([self.rev(n) for n in stop])
767 stoprevs = set([self.rev(n) for n in stop])
767 startrev = self.rev(start)
768 startrev = self.rev(start)
768 reachable = set((startrev,))
769 reachable = set((startrev,))
769 heads = set((startrev,))
770 heads = set((startrev,))
770
771
771 parentrevs = self.parentrevs
772 parentrevs = self.parentrevs
772 for r in self.revs(start=startrev + 1):
773 for r in self.revs(start=startrev + 1):
773 for p in parentrevs(r):
774 for p in parentrevs(r):
774 if p in reachable:
775 if p in reachable:
775 if r not in stoprevs:
776 if r not in stoprevs:
776 reachable.add(r)
777 reachable.add(r)
777 heads.add(r)
778 heads.add(r)
778 if p in heads and p not in stoprevs:
779 if p in heads and p not in stoprevs:
779 heads.remove(p)
780 heads.remove(p)
780
781
781 return [self.node(r) for r in heads]
782 return [self.node(r) for r in heads]
782
783
783 def children(self, node):
784 def children(self, node):
784 """find the children of a given node"""
785 """find the children of a given node"""
785 c = []
786 c = []
786 p = self.rev(node)
787 p = self.rev(node)
787 for r in self.revs(start=p + 1):
788 for r in self.revs(start=p + 1):
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 if prevs:
790 if prevs:
790 for pr in prevs:
791 for pr in prevs:
791 if pr == p:
792 if pr == p:
792 c.append(self.node(r))
793 c.append(self.node(r))
793 elif p == nullrev:
794 elif p == nullrev:
794 c.append(self.node(r))
795 c.append(self.node(r))
795 return c
796 return c
796
797
797 def descendant(self, start, end):
798 def descendant(self, start, end):
798 if start == nullrev:
799 if start == nullrev:
799 return True
800 return True
800 for i in self.descendants([start]):
801 for i in self.descendants([start]):
801 if i == end:
802 if i == end:
802 return True
803 return True
803 elif i > end:
804 elif i > end:
804 break
805 break
805 return False
806 return False
806
807
807 def commonancestorsheads(self, a, b):
808 def commonancestorsheads(self, a, b):
808 """calculate all the heads of the common ancestors of nodes a and b"""
809 """calculate all the heads of the common ancestors of nodes a and b"""
809 a, b = self.rev(a), self.rev(b)
810 a, b = self.rev(a), self.rev(b)
810 try:
811 try:
811 ancs = self.index.commonancestorsheads(a, b)
812 ancs = self.index.commonancestorsheads(a, b)
812 except (AttributeError, OverflowError): # C implementation failed
813 except (AttributeError, OverflowError): # C implementation failed
813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 return map(self.node, ancs)
815 return map(self.node, ancs)
815
816
816 def isancestor(self, a, b):
817 def isancestor(self, a, b):
817 """return True if node a is an ancestor of node b
818 """return True if node a is an ancestor of node b
818
819
819 The implementation of this is trivial but the use of
820 The implementation of this is trivial but the use of
820 commonancestorsheads is not."""
821 commonancestorsheads is not."""
821 return a in self.commonancestorsheads(a, b)
822 return a in self.commonancestorsheads(a, b)
822
823
823 def ancestor(self, a, b):
824 def ancestor(self, a, b):
824 """calculate the "best" common ancestor of nodes a and b"""
825 """calculate the "best" common ancestor of nodes a and b"""
825
826
826 a, b = self.rev(a), self.rev(b)
827 a, b = self.rev(a), self.rev(b)
827 try:
828 try:
828 ancs = self.index.ancestors(a, b)
829 ancs = self.index.ancestors(a, b)
829 except (AttributeError, OverflowError):
830 except (AttributeError, OverflowError):
830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 if ancs:
832 if ancs:
832 # choose a consistent winner when there's a tie
833 # choose a consistent winner when there's a tie
833 return min(map(self.node, ancs))
834 return min(map(self.node, ancs))
834 return nullid
835 return nullid
835
836
836 def _match(self, id):
837 def _match(self, id):
837 if isinstance(id, int):
838 if isinstance(id, int):
838 # rev
839 # rev
839 return self.node(id)
840 return self.node(id)
840 if len(id) == 20:
841 if len(id) == 20:
841 # possibly a binary node
842 # possibly a binary node
842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 try:
844 try:
844 node = id
845 node = id
845 self.rev(node) # quick search the index
846 self.rev(node) # quick search the index
846 return node
847 return node
847 except LookupError:
848 except LookupError:
848 pass # may be partial hex id
849 pass # may be partial hex id
849 try:
850 try:
850 # str(rev)
851 # str(rev)
851 rev = int(id)
852 rev = int(id)
852 if str(rev) != id:
853 if str(rev) != id:
853 raise ValueError
854 raise ValueError
854 if rev < 0:
855 if rev < 0:
855 rev = len(self) + rev
856 rev = len(self) + rev
856 if rev < 0 or rev >= len(self):
857 if rev < 0 or rev >= len(self):
857 raise ValueError
858 raise ValueError
858 return self.node(rev)
859 return self.node(rev)
859 except (ValueError, OverflowError):
860 except (ValueError, OverflowError):
860 pass
861 pass
861 if len(id) == 40:
862 if len(id) == 40:
862 try:
863 try:
863 # a full hex nodeid?
864 # a full hex nodeid?
864 node = bin(id)
865 node = bin(id)
865 self.rev(node)
866 self.rev(node)
866 return node
867 return node
867 except (TypeError, LookupError):
868 except (TypeError, LookupError):
868 pass
869 pass
869
870
870 def _partialmatch(self, id):
871 def _partialmatch(self, id):
871 try:
872 try:
872 n = self.index.partialmatch(id)
873 n = self.index.partialmatch(id)
873 if n and self.hasnode(n):
874 if n and self.hasnode(n):
874 return n
875 return n
875 return None
876 return None
876 except RevlogError:
877 except RevlogError:
877 # parsers.c radix tree lookup gave multiple matches
878 # parsers.c radix tree lookup gave multiple matches
878 # fall through to slow path that filters hidden revisions
879 # fall through to slow path that filters hidden revisions
879 pass
880 pass
880 except (AttributeError, ValueError):
881 except (AttributeError, ValueError):
881 # we are pure python, or key was too short to search radix tree
882 # we are pure python, or key was too short to search radix tree
882 pass
883 pass
883
884
884 if id in self._pcache:
885 if id in self._pcache:
885 return self._pcache[id]
886 return self._pcache[id]
886
887
887 if len(id) < 40:
888 if len(id) < 40:
888 try:
889 try:
889 # hex(node)[:...]
890 # hex(node)[:...]
890 l = len(id) // 2 # grab an even number of digits
891 l = len(id) // 2 # grab an even number of digits
891 prefix = bin(id[:l * 2])
892 prefix = bin(id[:l * 2])
892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 nl = [n for n in nl if hex(n).startswith(id) and
894 nl = [n for n in nl if hex(n).startswith(id) and
894 self.hasnode(n)]
895 self.hasnode(n)]
895 if len(nl) > 0:
896 if len(nl) > 0:
896 if len(nl) == 1:
897 if len(nl) == 1:
897 self._pcache[id] = nl[0]
898 self._pcache[id] = nl[0]
898 return nl[0]
899 return nl[0]
899 raise LookupError(id, self.indexfile,
900 raise LookupError(id, self.indexfile,
900 _('ambiguous identifier'))
901 _('ambiguous identifier'))
901 return None
902 return None
902 except TypeError:
903 except TypeError:
903 pass
904 pass
904
905
905 def lookup(self, id):
906 def lookup(self, id):
906 """locate a node based on:
907 """locate a node based on:
907 - revision number or str(revision number)
908 - revision number or str(revision number)
908 - nodeid or subset of hex nodeid
909 - nodeid or subset of hex nodeid
909 """
910 """
910 n = self._match(id)
911 n = self._match(id)
911 if n is not None:
912 if n is not None:
912 return n
913 return n
913 n = self._partialmatch(id)
914 n = self._partialmatch(id)
914 if n:
915 if n:
915 return n
916 return n
916
917
917 raise LookupError(id, self.indexfile, _('no match found'))
918 raise LookupError(id, self.indexfile, _('no match found'))
918
919
919 def cmp(self, node, text):
920 def cmp(self, node, text):
920 """compare text with a given file revision
921 """compare text with a given file revision
921
922
922 returns True if text is different than what is stored.
923 returns True if text is different than what is stored.
923 """
924 """
924 p1, p2 = self.parents(node)
925 p1, p2 = self.parents(node)
925 return hash(text, p1, p2) != node
926 return hash(text, p1, p2) != node
926
927
927 def _addchunk(self, offset, data):
928 def _addchunk(self, offset, data):
928 o, d = self._chunkcache
929 o, d = self._chunkcache
929 # try to add to existing cache
930 # try to add to existing cache
930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 self._chunkcache = o, d + data
932 self._chunkcache = o, d + data
932 else:
933 else:
933 self._chunkcache = offset, data
934 self._chunkcache = offset, data
934
935
935 def _loadchunk(self, offset, length, df=None):
936 def _loadchunk(self, offset, length, df=None):
936 """Load a chunk/segment from the revlog.
937 """Load a chunk/segment from the revlog.
937
938
938 Accepts absolute offset, length to read, and an optional existing
939 Accepts absolute offset, length to read, and an optional existing
939 file handle to read from.
940 file handle to read from.
940
941
941 If an existing file handle is passed, it will be seeked and the
942 If an existing file handle is passed, it will be seeked and the
942 original seek position will NOT be restored.
943 original seek position will NOT be restored.
943 """
944 """
944 if df is not None:
945 if df is not None:
945 closehandle = False
946 closehandle = False
946 else:
947 else:
947 if self._inline:
948 if self._inline:
948 df = self.opener(self.indexfile)
949 df = self.opener(self.indexfile)
949 else:
950 else:
950 df = self.opener(self.datafile)
951 df = self.opener(self.datafile)
951 closehandle = True
952 closehandle = True
952
953
953 # Cache data both forward and backward around the requested
954 # Cache data both forward and backward around the requested
954 # data, in a fixed size window. This helps speed up operations
955 # data, in a fixed size window. This helps speed up operations
955 # involving reading the revlog backwards.
956 # involving reading the revlog backwards.
956 cachesize = self._chunkcachesize
957 cachesize = self._chunkcachesize
957 realoffset = offset & ~(cachesize - 1)
958 realoffset = offset & ~(cachesize - 1)
958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
959 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
959 - realoffset)
960 - realoffset)
960 df.seek(realoffset)
961 df.seek(realoffset)
961 d = df.read(reallength)
962 d = df.read(reallength)
962 if closehandle:
963 if closehandle:
963 df.close()
964 df.close()
964 self._addchunk(realoffset, d)
965 self._addchunk(realoffset, d)
965 if offset != realoffset or reallength != length:
966 if offset != realoffset or reallength != length:
966 return util.buffer(d, offset - realoffset, length)
967 return util.buffer(d, offset - realoffset, length)
967 return d
968 return d
968
969
969 def _getchunk(self, offset, length, df=None):
970 def _getchunk(self, offset, length, df=None):
970 o, d = self._chunkcache
971 o, d = self._chunkcache
971 l = len(d)
972 l = len(d)
972
973
973 # is it in the cache?
974 # is it in the cache?
974 cachestart = offset - o
975 cachestart = offset - o
975 cacheend = cachestart + length
976 cacheend = cachestart + length
976 if cachestart >= 0 and cacheend <= l:
977 if cachestart >= 0 and cacheend <= l:
977 if cachestart == 0 and cacheend == l:
978 if cachestart == 0 and cacheend == l:
978 return d # avoid a copy
979 return d # avoid a copy
979 return util.buffer(d, cachestart, cacheend - cachestart)
980 return util.buffer(d, cachestart, cacheend - cachestart)
980
981
981 return self._loadchunk(offset, length, df=df)
982 return self._loadchunk(offset, length, df=df)
982
983
983 def _chunkraw(self, startrev, endrev, df=None):
984 def _chunkraw(self, startrev, endrev, df=None):
984 start = self.start(startrev)
985 start = self.start(startrev)
985 end = self.end(endrev)
986 end = self.end(endrev)
986 if self._inline:
987 if self._inline:
987 start += (startrev + 1) * self._io.size
988 start += (startrev + 1) * self._io.size
988 end += (endrev + 1) * self._io.size
989 end += (endrev + 1) * self._io.size
989 length = end - start
990 length = end - start
990 return self._getchunk(start, length, df=df)
991 return self._getchunk(start, length, df=df)
991
992
992 def _chunk(self, rev, df=None):
993 def _chunk(self, rev, df=None):
993 return decompress(self._chunkraw(rev, rev, df=df))
994 return decompress(self._chunkraw(rev, rev, df=df))
994
995
995 def _chunks(self, revs, df=None):
996 def _chunks(self, revs, df=None):
996 '''faster version of [self._chunk(rev) for rev in revs]
997 '''faster version of [self._chunk(rev) for rev in revs]
997
998
998 Assumes that revs is in ascending order.'''
999 Assumes that revs is in ascending order.'''
999 if not revs:
1000 if not revs:
1000 return []
1001 return []
1001 start = self.start
1002 start = self.start
1002 length = self.length
1003 length = self.length
1003 inline = self._inline
1004 inline = self._inline
1004 iosize = self._io.size
1005 iosize = self._io.size
1005 buffer = util.buffer
1006 buffer = util.buffer
1006
1007
1007 l = []
1008 l = []
1008 ladd = l.append
1009 ladd = l.append
1009
1010
1010 # preload the cache
1011 # preload the cache
1011 try:
1012 try:
1012 while True:
1013 while True:
1013 # ensure that the cache doesn't change out from under us
1014 # ensure that the cache doesn't change out from under us
1014 _cache = self._chunkcache
1015 _cache = self._chunkcache
1015 self._chunkraw(revs[0], revs[-1], df=df)
1016 self._chunkraw(revs[0], revs[-1], df=df)
1016 if _cache == self._chunkcache:
1017 if _cache == self._chunkcache:
1017 break
1018 break
1018 offset, data = _cache
1019 offset, data = _cache
1019 except OverflowError:
1020 except OverflowError:
1020 # issue4215 - we can't cache a run of chunks greater than
1021 # issue4215 - we can't cache a run of chunks greater than
1021 # 2G on Windows
1022 # 2G on Windows
1022 return [self._chunk(rev, df=df) for rev in revs]
1023 return [self._chunk(rev, df=df) for rev in revs]
1023
1024
1024 for rev in revs:
1025 for rev in revs:
1025 chunkstart = start(rev)
1026 chunkstart = start(rev)
1026 if inline:
1027 if inline:
1027 chunkstart += (rev + 1) * iosize
1028 chunkstart += (rev + 1) * iosize
1028 chunklength = length(rev)
1029 chunklength = length(rev)
1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1030 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1030
1031
1031 return l
1032 return l
1032
1033
1033 def _chunkclear(self):
1034 def _chunkclear(self):
1034 self._chunkcache = (0, '')
1035 self._chunkcache = (0, '')
1035
1036
1036 def deltaparent(self, rev):
1037 def deltaparent(self, rev):
1037 """return deltaparent of the given revision"""
1038 """return deltaparent of the given revision"""
1038 base = self.index[rev][3]
1039 base = self.index[rev][3]
1039 if base == rev:
1040 if base == rev:
1040 return nullrev
1041 return nullrev
1041 elif self._generaldelta:
1042 elif self._generaldelta:
1042 return base
1043 return base
1043 else:
1044 else:
1044 return rev - 1
1045 return rev - 1
1045
1046
1046 def revdiff(self, rev1, rev2):
1047 def revdiff(self, rev1, rev2):
1047 """return or calculate a delta between two revisions"""
1048 """return or calculate a delta between two revisions"""
1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1049 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1049 return str(self._chunk(rev2))
1050 return str(self._chunk(rev2))
1050
1051
1051 return mdiff.textdiff(self.revision(rev1),
1052 return mdiff.textdiff(self.revision(rev1),
1052 self.revision(rev2))
1053 self.revision(rev2))
1053
1054
1054 def revision(self, nodeorrev, _df=None):
1055 def revision(self, nodeorrev, _df=None):
1055 """return an uncompressed revision of a given node or revision
1056 """return an uncompressed revision of a given node or revision
1056 number.
1057 number.
1057
1058
1058 _df is an existing file handle to read from. It is meant to only be
1059 _df is an existing file handle to read from. It is meant to only be
1059 used internally.
1060 used internally.
1060 """
1061 """
1061 if isinstance(nodeorrev, int):
1062 if isinstance(nodeorrev, int):
1062 rev = nodeorrev
1063 rev = nodeorrev
1063 node = self.node(rev)
1064 node = self.node(rev)
1064 else:
1065 else:
1065 node = nodeorrev
1066 node = nodeorrev
1066 rev = None
1067 rev = None
1067
1068
1068 cachedrev = None
1069 cachedrev = None
1069 if node == nullid:
1070 if node == nullid:
1070 return ""
1071 return ""
1071 if self._cache:
1072 if self._cache:
1072 if self._cache[0] == node:
1073 if self._cache[0] == node:
1073 return self._cache[2]
1074 return self._cache[2]
1074 cachedrev = self._cache[1]
1075 cachedrev = self._cache[1]
1075
1076
1076 # look up what we need to read
1077 # look up what we need to read
1077 text = None
1078 text = None
1078 if rev is None:
1079 if rev is None:
1079 rev = self.rev(node)
1080 rev = self.rev(node)
1080
1081
1081 # check rev flags
1082 # check rev flags
1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1083 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1083 raise RevlogError(_('incompatible revision flag %x') %
1084 raise RevlogError(_('incompatible revision flag %x') %
1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1085 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1085
1086
1086 # build delta chain
1087 # build delta chain
1087 chain = []
1088 chain = []
1088 index = self.index # for performance
1089 index = self.index # for performance
1089 generaldelta = self._generaldelta
1090 generaldelta = self._generaldelta
1090 iterrev = rev
1091 iterrev = rev
1091 e = index[iterrev]
1092 e = index[iterrev]
1092 while iterrev != e[3] and iterrev != cachedrev:
1093 while iterrev != e[3] and iterrev != cachedrev:
1093 chain.append(iterrev)
1094 chain.append(iterrev)
1094 if generaldelta:
1095 if generaldelta:
1095 iterrev = e[3]
1096 iterrev = e[3]
1096 else:
1097 else:
1097 iterrev -= 1
1098 iterrev -= 1
1098 e = index[iterrev]
1099 e = index[iterrev]
1099
1100
1100 if iterrev == cachedrev:
1101 if iterrev == cachedrev:
1101 # cache hit
1102 # cache hit
1102 text = self._cache[2]
1103 text = self._cache[2]
1103 else:
1104 else:
1104 chain.append(iterrev)
1105 chain.append(iterrev)
1105 chain.reverse()
1106 chain.reverse()
1106
1107
1107 # drop cache to save memory
1108 # drop cache to save memory
1108 self._cache = None
1109 self._cache = None
1109
1110
1110 bins = self._chunks(chain, df=_df)
1111 bins = self._chunks(chain, df=_df)
1111 if text is None:
1112 if text is None:
1112 text = str(bins[0])
1113 text = str(bins[0])
1113 bins = bins[1:]
1114 bins = bins[1:]
1114
1115
1115 text = mdiff.patches(text, bins)
1116 text = mdiff.patches(text, bins)
1116
1117
1117 text = self._checkhash(text, node, rev)
1118 text = self._checkhash(text, node, rev)
1118
1119
1119 self._cache = (node, rev, text)
1120 self._cache = (node, rev, text)
1120 return text
1121 return text
1121
1122
1122 def hash(self, text, p1, p2):
1123 def hash(self, text, p1, p2):
1123 """Compute a node hash.
1124 """Compute a node hash.
1124
1125
1125 Available as a function so that subclasses can replace the hash
1126 Available as a function so that subclasses can replace the hash
1126 as needed.
1127 as needed.
1127 """
1128 """
1128 return hash(text, p1, p2)
1129 return hash(text, p1, p2)
1129
1130
1130 def _checkhash(self, text, node, rev):
1131 def _checkhash(self, text, node, rev):
1131 p1, p2 = self.parents(node)
1132 p1, p2 = self.parents(node)
1132 self.checkhash(text, p1, p2, node, rev)
1133 self.checkhash(text, p1, p2, node, rev)
1133 return text
1134 return text
1134
1135
1135 def checkhash(self, text, p1, p2, node, rev=None):
1136 def checkhash(self, text, p1, p2, node, rev=None):
1136 if node != self.hash(text, p1, p2):
1137 if node != self.hash(text, p1, p2):
1137 revornode = rev
1138 revornode = rev
1138 if revornode is None:
1139 if revornode is None:
1139 revornode = templatefilters.short(hex(node))
1140 revornode = templatefilters.short(hex(node))
1140 raise RevlogError(_("integrity check failed on %s:%s")
1141 raise RevlogError(_("integrity check failed on %s:%s")
1141 % (self.indexfile, revornode))
1142 % (self.indexfile, revornode))
1142
1143
1143 def checkinlinesize(self, tr, fp=None):
1144 def checkinlinesize(self, tr, fp=None):
1144 """Check if the revlog is too big for inline and convert if so.
1145 """Check if the revlog is too big for inline and convert if so.
1145
1146
1146 This should be called after revisions are added to the revlog. If the
1147 This should be called after revisions are added to the revlog. If the
1147 revlog has grown too large to be an inline revlog, it will convert it
1148 revlog has grown too large to be an inline revlog, it will convert it
1148 to use multiple index and data files.
1149 to use multiple index and data files.
1149 """
1150 """
1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1151 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1151 return
1152 return
1152
1153
1153 trinfo = tr.find(self.indexfile)
1154 trinfo = tr.find(self.indexfile)
1154 if trinfo is None:
1155 if trinfo is None:
1155 raise RevlogError(_("%s not found in the transaction")
1156 raise RevlogError(_("%s not found in the transaction")
1156 % self.indexfile)
1157 % self.indexfile)
1157
1158
1158 trindex = trinfo[2]
1159 trindex = trinfo[2]
1159 if trindex is not None:
1160 if trindex is not None:
1160 dataoff = self.start(trindex)
1161 dataoff = self.start(trindex)
1161 else:
1162 else:
1162 # revlog was stripped at start of transaction, use all leftover data
1163 # revlog was stripped at start of transaction, use all leftover data
1163 trindex = len(self) - 1
1164 trindex = len(self) - 1
1164 dataoff = self.end(-2)
1165 dataoff = self.end(-2)
1165
1166
1166 tr.add(self.datafile, dataoff)
1167 tr.add(self.datafile, dataoff)
1167
1168
1168 if fp:
1169 if fp:
1169 fp.flush()
1170 fp.flush()
1170 fp.close()
1171 fp.close()
1171
1172
1172 df = self.opener(self.datafile, 'w')
1173 df = self.opener(self.datafile, 'w')
1173 try:
1174 try:
1174 for r in self:
1175 for r in self:
1175 df.write(self._chunkraw(r, r))
1176 df.write(self._chunkraw(r, r))
1176 finally:
1177 finally:
1177 df.close()
1178 df.close()
1178
1179
1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1180 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1180 self.version &= ~(REVLOGNGINLINEDATA)
1181 self.version &= ~(REVLOGNGINLINEDATA)
1181 self._inline = False
1182 self._inline = False
1182 for i in self:
1183 for i in self:
1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1184 e = self._io.packentry(self.index[i], self.node, self.version, i)
1184 fp.write(e)
1185 fp.write(e)
1185
1186
1186 # if we don't call close, the temp file will never replace the
1187 # if we don't call close, the temp file will never replace the
1187 # real index
1188 # real index
1188 fp.close()
1189 fp.close()
1189
1190
1190 tr.replace(self.indexfile, trindex * self._io.size)
1191 tr.replace(self.indexfile, trindex * self._io.size)
1191 self._chunkclear()
1192 self._chunkclear()
1192
1193
1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1194 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1194 node=None):
1195 node=None):
1195 """add a revision to the log
1196 """add a revision to the log
1196
1197
1197 text - the revision data to add
1198 text - the revision data to add
1198 transaction - the transaction object used for rollback
1199 transaction - the transaction object used for rollback
1199 link - the linkrev data to add
1200 link - the linkrev data to add
1200 p1, p2 - the parent nodeids of the revision
1201 p1, p2 - the parent nodeids of the revision
1201 cachedelta - an optional precomputed delta
1202 cachedelta - an optional precomputed delta
1202 node - nodeid of revision; typically node is not specified, and it is
1203 node - nodeid of revision; typically node is not specified, and it is
1203 computed by default as hash(text, p1, p2), however subclasses might
1204 computed by default as hash(text, p1, p2), however subclasses might
1204 use different hashing method (and override checkhash() in such case)
1205 use different hashing method (and override checkhash() in such case)
1205 """
1206 """
1206 if link == nullrev:
1207 if link == nullrev:
1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1208 raise RevlogError(_("attempted to add linkrev -1 to %s")
1208 % self.indexfile)
1209 % self.indexfile)
1209
1210
1210 if len(text) > _maxentrysize:
1211 if len(text) > _maxentrysize:
1211 raise RevlogError(
1212 raise RevlogError(
1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1213 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1213 % (self.indexfile, len(text)))
1214 % (self.indexfile, len(text)))
1214
1215
1215 node = node or self.hash(text, p1, p2)
1216 node = node or self.hash(text, p1, p2)
1216 if node in self.nodemap:
1217 if node in self.nodemap:
1217 return node
1218 return node
1218
1219
1219 dfh = None
1220 dfh = None
1220 if not self._inline:
1221 if not self._inline:
1221 dfh = self.opener(self.datafile, "a+")
1222 dfh = self.opener(self.datafile, "a+")
1222 ifh = self.opener(self.indexfile, "a+")
1223 ifh = self.opener(self.indexfile, "a+")
1223 try:
1224 try:
1224 return self._addrevision(node, text, transaction, link, p1, p2,
1225 return self._addrevision(node, text, transaction, link, p1, p2,
1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1226 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1226 finally:
1227 finally:
1227 if dfh:
1228 if dfh:
1228 dfh.close()
1229 dfh.close()
1229 ifh.close()
1230 ifh.close()
1230
1231
1231 def compress(self, text):
1232 def compress(self, text):
1232 """ generate a possibly-compressed representation of text """
1233 """ generate a possibly-compressed representation of text """
1233 if not text:
1234 if not text:
1234 return ("", text)
1235 return ("", text)
1235 l = len(text)
1236 l = len(text)
1236 bin = None
1237 bin = None
1237 if l < 44:
1238 if l < 44:
1238 pass
1239 pass
1239 elif l > 1000000:
1240 elif l > 1000000:
1240 # zlib makes an internal copy, thus doubling memory usage for
1241 # zlib makes an internal copy, thus doubling memory usage for
1241 # large files, so lets do this in pieces
1242 # large files, so lets do this in pieces
1242 z = zlib.compressobj()
1243 z = zlib.compressobj()
1243 p = []
1244 p = []
1244 pos = 0
1245 pos = 0
1245 while pos < l:
1246 while pos < l:
1246 pos2 = pos + 2**20
1247 pos2 = pos + 2**20
1247 p.append(z.compress(text[pos:pos2]))
1248 p.append(z.compress(text[pos:pos2]))
1248 pos = pos2
1249 pos = pos2
1249 p.append(z.flush())
1250 p.append(z.flush())
1250 if sum(map(len, p)) < l:
1251 if sum(map(len, p)) < l:
1251 bin = "".join(p)
1252 bin = "".join(p)
1252 else:
1253 else:
1253 bin = _compress(text)
1254 bin = _compress(text)
1254 if bin is None or len(bin) > l:
1255 if bin is None or len(bin) > l:
1255 if text[0] == '\0':
1256 if text[0] == '\0':
1256 return ("", text)
1257 return ("", text)
1257 return ('u', text)
1258 return ('u', text)
1258 return ("", bin)
1259 return ("", bin)
1259
1260
1260 def _isgooddelta(self, d, textlen):
1261 def _isgooddelta(self, d, textlen):
1261 """Returns True if the given delta is good. Good means that it is within
1262 """Returns True if the given delta is good. Good means that it is within
1262 the disk span, disk size, and chain length bounds that we know to be
1263 the disk span, disk size, and chain length bounds that we know to be
1263 performant."""
1264 performant."""
1264 if d is None:
1265 if d is None:
1265 return False
1266 return False
1266
1267
1267 # - 'dist' is the distance from the base revision -- bounding it limits
1268 # - 'dist' is the distance from the base revision -- bounding it limits
1268 # the amount of I/O we need to do.
1269 # the amount of I/O we need to do.
1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1270 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1270 # to apply -- bounding it limits the amount of CPU we consume.
1271 # to apply -- bounding it limits the amount of CPU we consume.
1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1272 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1272 if (dist > textlen * 4 or l > textlen or
1273 if (dist > textlen * 4 or l > textlen or
1273 compresseddeltalen > textlen * 2 or
1274 compresseddeltalen > textlen * 2 or
1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1275 (self._maxchainlen and chainlen > self._maxchainlen)):
1275 return False
1276 return False
1276
1277
1277 return True
1278 return True
1278
1279
1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1280 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1280 cachedelta, ifh, dfh, alwayscache=False):
1281 cachedelta, ifh, dfh, alwayscache=False):
1281 """internal function to add revisions to the log
1282 """internal function to add revisions to the log
1282
1283
1283 see addrevision for argument descriptions.
1284 see addrevision for argument descriptions.
1284 invariants:
1285 invariants:
1285 - text is optional (can be None); if not set, cachedelta must be set.
1286 - text is optional (can be None); if not set, cachedelta must be set.
1286 if both are set, they must correspond to each other.
1287 if both are set, they must correspond to each other.
1287 """
1288 """
1288 btext = [text]
1289 btext = [text]
1289 def buildtext():
1290 def buildtext():
1290 if btext[0] is not None:
1291 if btext[0] is not None:
1291 return btext[0]
1292 return btext[0]
1292 baserev = cachedelta[0]
1293 baserev = cachedelta[0]
1293 delta = cachedelta[1]
1294 delta = cachedelta[1]
1294 # special case deltas which replace entire base; no need to decode
1295 # special case deltas which replace entire base; no need to decode
1295 # base revision. this neatly avoids censored bases, which throw when
1296 # base revision. this neatly avoids censored bases, which throw when
1296 # they're decoded.
1297 # they're decoded.
1297 hlen = struct.calcsize(">lll")
1298 hlen = struct.calcsize(">lll")
1298 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1299 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1299 len(delta) - hlen):
1300 len(delta) - hlen):
1300 btext[0] = delta[hlen:]
1301 btext[0] = delta[hlen:]
1301 else:
1302 else:
1302 if self._inline:
1303 if self._inline:
1303 fh = ifh
1304 fh = ifh
1304 else:
1305 else:
1305 fh = dfh
1306 fh = dfh
1306 basetext = self.revision(self.node(baserev), _df=fh)
1307 basetext = self.revision(self.node(baserev), _df=fh)
1307 btext[0] = mdiff.patch(basetext, delta)
1308 btext[0] = mdiff.patch(basetext, delta)
1308 try:
1309 try:
1309 self.checkhash(btext[0], p1, p2, node)
1310 self.checkhash(btext[0], p1, p2, node)
1310 if flags & REVIDX_ISCENSORED:
1311 if flags & REVIDX_ISCENSORED:
1311 raise RevlogError(_('node %s is not censored') % node)
1312 raise RevlogError(_('node %s is not censored') % node)
1312 except CensoredNodeError:
1313 except CensoredNodeError:
1313 # must pass the censored index flag to add censored revisions
1314 # must pass the censored index flag to add censored revisions
1314 if not flags & REVIDX_ISCENSORED:
1315 if not flags & REVIDX_ISCENSORED:
1315 raise
1316 raise
1316 return btext[0]
1317 return btext[0]
1317
1318
1318 def builddelta(rev):
1319 def builddelta(rev):
1319 # can we use the cached delta?
1320 # can we use the cached delta?
1320 if cachedelta and cachedelta[0] == rev:
1321 if cachedelta and cachedelta[0] == rev:
1321 delta = cachedelta[1]
1322 delta = cachedelta[1]
1322 else:
1323 else:
1323 t = buildtext()
1324 t = buildtext()
1324 if self.iscensored(rev):
1325 if self.iscensored(rev):
1325 # deltas based on a censored revision must replace the
1326 # deltas based on a censored revision must replace the
1326 # full content in one patch, so delta works everywhere
1327 # full content in one patch, so delta works everywhere
1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1328 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1328 delta = header + t
1329 delta = header + t
1329 else:
1330 else:
1330 if self._inline:
1331 if self._inline:
1331 fh = ifh
1332 fh = ifh
1332 else:
1333 else:
1333 fh = dfh
1334 fh = dfh
1334 ptext = self.revision(self.node(rev), _df=fh)
1335 ptext = self.revision(self.node(rev), _df=fh)
1335 delta = mdiff.textdiff(ptext, t)
1336 delta = mdiff.textdiff(ptext, t)
1336 data = self.compress(delta)
1337 data = self.compress(delta)
1337 l = len(data[1]) + len(data[0])
1338 l = len(data[1]) + len(data[0])
1338 if basecache[0] == rev:
1339 if basecache[0] == rev:
1339 chainbase = basecache[1]
1340 chainbase = basecache[1]
1340 else:
1341 else:
1341 chainbase = self.chainbase(rev)
1342 chainbase = self.chainbase(rev)
1342 dist = l + offset - self.start(chainbase)
1343 dist = l + offset - self.start(chainbase)
1343 if self._generaldelta:
1344 if self._generaldelta:
1344 base = rev
1345 base = rev
1345 else:
1346 else:
1346 base = chainbase
1347 base = chainbase
1347 chainlen, compresseddeltalen = self._chaininfo(rev)
1348 chainlen, compresseddeltalen = self._chaininfo(rev)
1348 chainlen += 1
1349 chainlen += 1
1349 compresseddeltalen += l
1350 compresseddeltalen += l
1350 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1351 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1351
1352
1352 curr = len(self)
1353 curr = len(self)
1353 prev = curr - 1
1354 prev = curr - 1
1354 base = chainbase = curr
1355 base = chainbase = curr
1355 chainlen = None
1356 chainlen = None
1356 offset = self.end(prev)
1357 offset = self.end(prev)
1357 d = None
1358 d = None
1358 if self._basecache is None:
1359 if self._basecache is None:
1359 self._basecache = (prev, self.chainbase(prev))
1360 self._basecache = (prev, self.chainbase(prev))
1360 basecache = self._basecache
1361 basecache = self._basecache
1361 p1r, p2r = self.rev(p1), self.rev(p2)
1362 p1r, p2r = self.rev(p1), self.rev(p2)
1362
1363
1363 # full versions are inserted when the needed deltas
1364 # full versions are inserted when the needed deltas
1364 # become comparable to the uncompressed text
1365 # become comparable to the uncompressed text
1365 if text is None:
1366 if text is None:
1366 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1367 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1367 cachedelta[1])
1368 cachedelta[1])
1368 else:
1369 else:
1369 textlen = len(text)
1370 textlen = len(text)
1370
1371
1371 # should we try to build a delta?
1372 # should we try to build a delta?
1372 if prev != nullrev:
1373 if prev != nullrev:
1373 if self._generaldelta:
1374 if self._generaldelta:
1374 if p2r != nullrev and self._aggressivemergedeltas:
1375 if p2r != nullrev and self._aggressivemergedeltas:
1375 d = builddelta(p1r)
1376 d = builddelta(p1r)
1376 d2 = builddelta(p2r)
1377 d2 = builddelta(p2r)
1377 p1good = self._isgooddelta(d, textlen)
1378 p1good = self._isgooddelta(d, textlen)
1378 p2good = self._isgooddelta(d2, textlen)
1379 p2good = self._isgooddelta(d2, textlen)
1379 if p1good and p2good:
1380 if p1good and p2good:
1380 # If both are good deltas, choose the smallest
1381 # If both are good deltas, choose the smallest
1381 if d2[1] < d[1]:
1382 if d2[1] < d[1]:
1382 d = d2
1383 d = d2
1383 elif p2good:
1384 elif p2good:
1384 # If only p2 is good, use it
1385 # If only p2 is good, use it
1385 d = d2
1386 d = d2
1386 elif p1good:
1387 elif p1good:
1387 pass
1388 pass
1388 else:
1389 else:
1389 # Neither is good, try against prev to hopefully save us
1390 # Neither is good, try against prev to hopefully save us
1390 # a fulltext.
1391 # a fulltext.
1391 d = builddelta(prev)
1392 d = builddelta(prev)
1392 else:
1393 else:
1393 # Pick whichever parent is closer to us (to minimize the
1394 # Pick whichever parent is closer to us (to minimize the
1394 # chance of having to build a fulltext). Since
1395 # chance of having to build a fulltext). Since
1395 # nullrev == -1, any non-merge commit will always pick p1r.
1396 # nullrev == -1, any non-merge commit will always pick p1r.
1396 drev = p2r if p2r > p1r else p1r
1397 drev = p2r if p2r > p1r else p1r
1397 d = builddelta(drev)
1398 d = builddelta(drev)
1398 # If the chosen delta will result in us making a full text,
1399 # If the chosen delta will result in us making a full text,
1399 # give it one last try against prev.
1400 # give it one last try against prev.
1400 if drev != prev and not self._isgooddelta(d, textlen):
1401 if drev != prev and not self._isgooddelta(d, textlen):
1401 d = builddelta(prev)
1402 d = builddelta(prev)
1402 else:
1403 else:
1403 d = builddelta(prev)
1404 d = builddelta(prev)
1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405
1406
1406 if not self._isgooddelta(d, textlen):
1407 if not self._isgooddelta(d, textlen):
1407 text = buildtext()
1408 text = buildtext()
1408 data = self.compress(text)
1409 data = self.compress(text)
1409 l = len(data[1]) + len(data[0])
1410 l = len(data[1]) + len(data[0])
1410 base = chainbase = curr
1411 base = chainbase = curr
1411
1412
1412 e = (offset_type(offset, flags), l, textlen,
1413 e = (offset_type(offset, flags), l, textlen,
1413 base, link, p1r, p2r, node)
1414 base, link, p1r, p2r, node)
1414 self.index.insert(-1, e)
1415 self.index.insert(-1, e)
1415 self.nodemap[node] = curr
1416 self.nodemap[node] = curr
1416
1417
1417 entry = self._io.packentry(e, self.node, self.version, curr)
1418 entry = self._io.packentry(e, self.node, self.version, curr)
1418 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1419 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1419
1420
1420 if alwayscache and text is None:
1421 if alwayscache and text is None:
1421 text = buildtext()
1422 text = buildtext()
1422
1423
1423 if type(text) == str: # only accept immutable objects
1424 if type(text) == str: # only accept immutable objects
1424 self._cache = (node, curr, text)
1425 self._cache = (node, curr, text)
1425 self._basecache = (curr, chainbase)
1426 self._basecache = (curr, chainbase)
1426 return node
1427 return node
1427
1428
1428 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1429 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1430 # Files opened in a+ mode have inconsistent behavior on various
1431 # platforms. Windows requires that a file positioning call be made
1432 # when the file handle transitions between reads and writes. See
1433 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1434 # platforms, Python or the platform itself can be buggy. Some versions
1435 # of Solaris have been observed to not append at the end of the file
1436 # if the file was seeked to before the end. See issue4943 for more.
1437 #
1438 # We work around this issue by inserting a seek() before writing.
1439 # Note: This is likely not necessary on Python 3.
1440 ifh.seek(0, os.SEEK_END)
1441 if dfh:
1442 dfh.seek(0, os.SEEK_END)
1443
1429 curr = len(self) - 1
1444 curr = len(self) - 1
1430 if not self._inline:
1445 if not self._inline:
1431 transaction.add(self.datafile, offset)
1446 transaction.add(self.datafile, offset)
1432 transaction.add(self.indexfile, curr * len(entry))
1447 transaction.add(self.indexfile, curr * len(entry))
1433 if data[0]:
1448 if data[0]:
1434 dfh.write(data[0])
1449 dfh.write(data[0])
1435 dfh.write(data[1])
1450 dfh.write(data[1])
1436 ifh.write(entry)
1451 ifh.write(entry)
1437 else:
1452 else:
1438 offset += curr * self._io.size
1453 offset += curr * self._io.size
1439 transaction.add(self.indexfile, offset, curr)
1454 transaction.add(self.indexfile, offset, curr)
1440 ifh.write(entry)
1455 ifh.write(entry)
1441 ifh.write(data[0])
1456 ifh.write(data[0])
1442 ifh.write(data[1])
1457 ifh.write(data[1])
1443 self.checkinlinesize(transaction, ifh)
1458 self.checkinlinesize(transaction, ifh)
1444
1459
1445 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1460 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1446 """
1461 """
1447 add a delta group
1462 add a delta group
1448
1463
1449 given a set of deltas, add them to the revision log. the
1464 given a set of deltas, add them to the revision log. the
1450 first delta is against its parent, which should be in our
1465 first delta is against its parent, which should be in our
1451 log, the rest are against the previous delta.
1466 log, the rest are against the previous delta.
1452
1467
1453 If ``addrevisioncb`` is defined, it will be called with arguments of
1468 If ``addrevisioncb`` is defined, it will be called with arguments of
1454 this revlog and the node that was added.
1469 this revlog and the node that was added.
1455 """
1470 """
1456
1471
1457 # track the base of the current delta log
1472 # track the base of the current delta log
1458 content = []
1473 content = []
1459 node = None
1474 node = None
1460
1475
1461 r = len(self)
1476 r = len(self)
1462 end = 0
1477 end = 0
1463 if r:
1478 if r:
1464 end = self.end(r - 1)
1479 end = self.end(r - 1)
1465 ifh = self.opener(self.indexfile, "a+")
1480 ifh = self.opener(self.indexfile, "a+")
1466 isize = r * self._io.size
1481 isize = r * self._io.size
1467 if self._inline:
1482 if self._inline:
1468 transaction.add(self.indexfile, end + isize, r)
1483 transaction.add(self.indexfile, end + isize, r)
1469 dfh = None
1484 dfh = None
1470 else:
1485 else:
1471 transaction.add(self.indexfile, isize, r)
1486 transaction.add(self.indexfile, isize, r)
1472 transaction.add(self.datafile, end)
1487 transaction.add(self.datafile, end)
1473 dfh = self.opener(self.datafile, "a+")
1488 dfh = self.opener(self.datafile, "a+")
1474 def flush():
1489 def flush():
1475 if dfh:
1490 if dfh:
1476 dfh.flush()
1491 dfh.flush()
1477 ifh.flush()
1492 ifh.flush()
1478 try:
1493 try:
1479 # loop through our set of deltas
1494 # loop through our set of deltas
1480 chain = None
1495 chain = None
1481 while True:
1496 while True:
1482 chunkdata = cg.deltachunk(chain)
1497 chunkdata = cg.deltachunk(chain)
1483 if not chunkdata:
1498 if not chunkdata:
1484 break
1499 break
1485 node = chunkdata['node']
1500 node = chunkdata['node']
1486 p1 = chunkdata['p1']
1501 p1 = chunkdata['p1']
1487 p2 = chunkdata['p2']
1502 p2 = chunkdata['p2']
1488 cs = chunkdata['cs']
1503 cs = chunkdata['cs']
1489 deltabase = chunkdata['deltabase']
1504 deltabase = chunkdata['deltabase']
1490 delta = chunkdata['delta']
1505 delta = chunkdata['delta']
1491
1506
1492 content.append(node)
1507 content.append(node)
1493
1508
1494 link = linkmapper(cs)
1509 link = linkmapper(cs)
1495 if node in self.nodemap:
1510 if node in self.nodemap:
1496 # this can happen if two branches make the same change
1511 # this can happen if two branches make the same change
1497 chain = node
1512 chain = node
1498 continue
1513 continue
1499
1514
1500 for p in (p1, p2):
1515 for p in (p1, p2):
1501 if p not in self.nodemap:
1516 if p not in self.nodemap:
1502 raise LookupError(p, self.indexfile,
1517 raise LookupError(p, self.indexfile,
1503 _('unknown parent'))
1518 _('unknown parent'))
1504
1519
1505 if deltabase not in self.nodemap:
1520 if deltabase not in self.nodemap:
1506 raise LookupError(deltabase, self.indexfile,
1521 raise LookupError(deltabase, self.indexfile,
1507 _('unknown delta base'))
1522 _('unknown delta base'))
1508
1523
1509 baserev = self.rev(deltabase)
1524 baserev = self.rev(deltabase)
1510
1525
1511 if baserev != nullrev and self.iscensored(baserev):
1526 if baserev != nullrev and self.iscensored(baserev):
1512 # if base is censored, delta must be full replacement in a
1527 # if base is censored, delta must be full replacement in a
1513 # single patch operation
1528 # single patch operation
1514 hlen = struct.calcsize(">lll")
1529 hlen = struct.calcsize(">lll")
1515 oldlen = self.rawsize(baserev)
1530 oldlen = self.rawsize(baserev)
1516 newlen = len(delta) - hlen
1531 newlen = len(delta) - hlen
1517 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1532 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1518 raise error.CensoredBaseError(self.indexfile,
1533 raise error.CensoredBaseError(self.indexfile,
1519 self.node(baserev))
1534 self.node(baserev))
1520
1535
1521 flags = REVIDX_DEFAULT_FLAGS
1536 flags = REVIDX_DEFAULT_FLAGS
1522 if self._peek_iscensored(baserev, delta, flush):
1537 if self._peek_iscensored(baserev, delta, flush):
1523 flags |= REVIDX_ISCENSORED
1538 flags |= REVIDX_ISCENSORED
1524
1539
1525 # We assume consumers of addrevisioncb will want to retrieve
1540 # We assume consumers of addrevisioncb will want to retrieve
1526 # the added revision, which will require a call to
1541 # the added revision, which will require a call to
1527 # revision(). revision() will fast path if there is a cache
1542 # revision(). revision() will fast path if there is a cache
1528 # hit. So, we tell _addrevision() to always cache in this case.
1543 # hit. So, we tell _addrevision() to always cache in this case.
1529 chain = self._addrevision(node, None, transaction, link,
1544 chain = self._addrevision(node, None, transaction, link,
1530 p1, p2, flags, (baserev, delta),
1545 p1, p2, flags, (baserev, delta),
1531 ifh, dfh,
1546 ifh, dfh,
1532 alwayscache=bool(addrevisioncb))
1547 alwayscache=bool(addrevisioncb))
1533
1548
1534 if addrevisioncb:
1549 if addrevisioncb:
1535 addrevisioncb(self, chain)
1550 addrevisioncb(self, chain)
1536
1551
1537 if not dfh and not self._inline:
1552 if not dfh and not self._inline:
1538 # addrevision switched from inline to conventional
1553 # addrevision switched from inline to conventional
1539 # reopen the index
1554 # reopen the index
1540 ifh.close()
1555 ifh.close()
1541 dfh = self.opener(self.datafile, "a+")
1556 dfh = self.opener(self.datafile, "a+")
1542 ifh = self.opener(self.indexfile, "a+")
1557 ifh = self.opener(self.indexfile, "a+")
1543 finally:
1558 finally:
1544 if dfh:
1559 if dfh:
1545 dfh.close()
1560 dfh.close()
1546 ifh.close()
1561 ifh.close()
1547
1562
1548 return content
1563 return content
1549
1564
1550 def iscensored(self, rev):
1565 def iscensored(self, rev):
1551 """Check if a file revision is censored."""
1566 """Check if a file revision is censored."""
1552 return False
1567 return False
1553
1568
1554 def _peek_iscensored(self, baserev, delta, flush):
1569 def _peek_iscensored(self, baserev, delta, flush):
1555 """Quickly check if a delta produces a censored revision."""
1570 """Quickly check if a delta produces a censored revision."""
1556 return False
1571 return False
1557
1572
1558 def getstrippoint(self, minlink):
1573 def getstrippoint(self, minlink):
1559 """find the minimum rev that must be stripped to strip the linkrev
1574 """find the minimum rev that must be stripped to strip the linkrev
1560
1575
1561 Returns a tuple containing the minimum rev and a set of all revs that
1576 Returns a tuple containing the minimum rev and a set of all revs that
1562 have linkrevs that will be broken by this strip.
1577 have linkrevs that will be broken by this strip.
1563 """
1578 """
1564 brokenrevs = set()
1579 brokenrevs = set()
1565 strippoint = len(self)
1580 strippoint = len(self)
1566
1581
1567 heads = {}
1582 heads = {}
1568 futurelargelinkrevs = set()
1583 futurelargelinkrevs = set()
1569 for head in self.headrevs():
1584 for head in self.headrevs():
1570 headlinkrev = self.linkrev(head)
1585 headlinkrev = self.linkrev(head)
1571 heads[head] = headlinkrev
1586 heads[head] = headlinkrev
1572 if headlinkrev >= minlink:
1587 if headlinkrev >= minlink:
1573 futurelargelinkrevs.add(headlinkrev)
1588 futurelargelinkrevs.add(headlinkrev)
1574
1589
1575 # This algorithm involves walking down the rev graph, starting at the
1590 # This algorithm involves walking down the rev graph, starting at the
1576 # heads. Since the revs are topologically sorted according to linkrev,
1591 # heads. Since the revs are topologically sorted according to linkrev,
1577 # once all head linkrevs are below the minlink, we know there are
1592 # once all head linkrevs are below the minlink, we know there are
1578 # no more revs that could have a linkrev greater than minlink.
1593 # no more revs that could have a linkrev greater than minlink.
1579 # So we can stop walking.
1594 # So we can stop walking.
1580 while futurelargelinkrevs:
1595 while futurelargelinkrevs:
1581 strippoint -= 1
1596 strippoint -= 1
1582 linkrev = heads.pop(strippoint)
1597 linkrev = heads.pop(strippoint)
1583
1598
1584 if linkrev < minlink:
1599 if linkrev < minlink:
1585 brokenrevs.add(strippoint)
1600 brokenrevs.add(strippoint)
1586 else:
1601 else:
1587 futurelargelinkrevs.remove(linkrev)
1602 futurelargelinkrevs.remove(linkrev)
1588
1603
1589 for p in self.parentrevs(strippoint):
1604 for p in self.parentrevs(strippoint):
1590 if p != nullrev:
1605 if p != nullrev:
1591 plinkrev = self.linkrev(p)
1606 plinkrev = self.linkrev(p)
1592 heads[p] = plinkrev
1607 heads[p] = plinkrev
1593 if plinkrev >= minlink:
1608 if plinkrev >= minlink:
1594 futurelargelinkrevs.add(plinkrev)
1609 futurelargelinkrevs.add(plinkrev)
1595
1610
1596 return strippoint, brokenrevs
1611 return strippoint, brokenrevs
1597
1612
1598 def strip(self, minlink, transaction):
1613 def strip(self, minlink, transaction):
1599 """truncate the revlog on the first revision with a linkrev >= minlink
1614 """truncate the revlog on the first revision with a linkrev >= minlink
1600
1615
1601 This function is called when we're stripping revision minlink and
1616 This function is called when we're stripping revision minlink and
1602 its descendants from the repository.
1617 its descendants from the repository.
1603
1618
1604 We have to remove all revisions with linkrev >= minlink, because
1619 We have to remove all revisions with linkrev >= minlink, because
1605 the equivalent changelog revisions will be renumbered after the
1620 the equivalent changelog revisions will be renumbered after the
1606 strip.
1621 strip.
1607
1622
1608 So we truncate the revlog on the first of these revisions, and
1623 So we truncate the revlog on the first of these revisions, and
1609 trust that the caller has saved the revisions that shouldn't be
1624 trust that the caller has saved the revisions that shouldn't be
1610 removed and that it'll re-add them after this truncation.
1625 removed and that it'll re-add them after this truncation.
1611 """
1626 """
1612 if len(self) == 0:
1627 if len(self) == 0:
1613 return
1628 return
1614
1629
1615 rev, _ = self.getstrippoint(minlink)
1630 rev, _ = self.getstrippoint(minlink)
1616 if rev == len(self):
1631 if rev == len(self):
1617 return
1632 return
1618
1633
1619 # first truncate the files on disk
1634 # first truncate the files on disk
1620 end = self.start(rev)
1635 end = self.start(rev)
1621 if not self._inline:
1636 if not self._inline:
1622 transaction.add(self.datafile, end)
1637 transaction.add(self.datafile, end)
1623 end = rev * self._io.size
1638 end = rev * self._io.size
1624 else:
1639 else:
1625 end += rev * self._io.size
1640 end += rev * self._io.size
1626
1641
1627 transaction.add(self.indexfile, end)
1642 transaction.add(self.indexfile, end)
1628
1643
1629 # then reset internal state in memory to forget those revisions
1644 # then reset internal state in memory to forget those revisions
1630 self._cache = None
1645 self._cache = None
1631 self._chaininfocache = {}
1646 self._chaininfocache = {}
1632 self._chunkclear()
1647 self._chunkclear()
1633 for x in xrange(rev, len(self)):
1648 for x in xrange(rev, len(self)):
1634 del self.nodemap[self.node(x)]
1649 del self.nodemap[self.node(x)]
1635
1650
1636 del self.index[rev:-1]
1651 del self.index[rev:-1]
1637
1652
1638 def checksize(self):
1653 def checksize(self):
1639 expected = 0
1654 expected = 0
1640 if len(self):
1655 if len(self):
1641 expected = max(0, self.end(len(self) - 1))
1656 expected = max(0, self.end(len(self) - 1))
1642
1657
1643 try:
1658 try:
1644 f = self.opener(self.datafile)
1659 f = self.opener(self.datafile)
1645 f.seek(0, 2)
1660 f.seek(0, 2)
1646 actual = f.tell()
1661 actual = f.tell()
1647 f.close()
1662 f.close()
1648 dd = actual - expected
1663 dd = actual - expected
1649 except IOError as inst:
1664 except IOError as inst:
1650 if inst.errno != errno.ENOENT:
1665 if inst.errno != errno.ENOENT:
1651 raise
1666 raise
1652 dd = 0
1667 dd = 0
1653
1668
1654 try:
1669 try:
1655 f = self.opener(self.indexfile)
1670 f = self.opener(self.indexfile)
1656 f.seek(0, 2)
1671 f.seek(0, 2)
1657 actual = f.tell()
1672 actual = f.tell()
1658 f.close()
1673 f.close()
1659 s = self._io.size
1674 s = self._io.size
1660 i = max(0, actual // s)
1675 i = max(0, actual // s)
1661 di = actual - (i * s)
1676 di = actual - (i * s)
1662 if self._inline:
1677 if self._inline:
1663 databytes = 0
1678 databytes = 0
1664 for r in self:
1679 for r in self:
1665 databytes += max(0, self.length(r))
1680 databytes += max(0, self.length(r))
1666 dd = 0
1681 dd = 0
1667 di = actual - len(self) * s - databytes
1682 di = actual - len(self) * s - databytes
1668 except IOError as inst:
1683 except IOError as inst:
1669 if inst.errno != errno.ENOENT:
1684 if inst.errno != errno.ENOENT:
1670 raise
1685 raise
1671 di = 0
1686 di = 0
1672
1687
1673 return (dd, di)
1688 return (dd, di)
1674
1689
1675 def files(self):
1690 def files(self):
1676 res = [self.indexfile]
1691 res = [self.indexfile]
1677 if not self._inline:
1692 if not self._inline:
1678 res.append(self.datafile)
1693 res.append(self.datafile)
1679 return res
1694 return res
General Comments 0
You need to be logged in to leave comments. Login now