##// END OF EJS Templates
revlog: switch findmissing* methods to incrementalmissingrevs...
Siddharth Agarwal -
r23338:d8f5b2f5 default
parent child Browse files
Show More
@@ -1,1534 +1,1535 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45 CensoredNodeError = error.CensoredNodeError
46
46
47 def getoffset(q):
47 def getoffset(q):
48 return int(q >> 16)
48 return int(q >> 16)
49
49
50 def gettype(q):
50 def gettype(q):
51 return int(q & 0xFFFF)
51 return int(q & 0xFFFF)
52
52
53 def offset_type(offset, type):
53 def offset_type(offset, type):
54 return long(long(offset) << 16 | type)
54 return long(long(offset) << 16 | type)
55
55
56 _nullhash = _sha(nullid)
56 _nullhash = _sha(nullid)
57
57
58 def hash(text, p1, p2):
58 def hash(text, p1, p2):
59 """generate a hash from the given text and its parent hashes
59 """generate a hash from the given text and its parent hashes
60
60
61 This hash combines both the current file contents and its history
61 This hash combines both the current file contents and its history
62 in a manner that makes it easy to distinguish nodes with the same
62 in a manner that makes it easy to distinguish nodes with the same
63 content in the revision graph.
63 content in the revision graph.
64 """
64 """
65 # As of now, if one of the parent node is null, p2 is null
65 # As of now, if one of the parent node is null, p2 is null
66 if p2 == nullid:
66 if p2 == nullid:
67 # deep copy of a hash is faster than creating one
67 # deep copy of a hash is faster than creating one
68 s = _nullhash.copy()
68 s = _nullhash.copy()
69 s.update(p1)
69 s.update(p1)
70 else:
70 else:
71 # none of the parent nodes are nullid
71 # none of the parent nodes are nullid
72 l = [p1, p2]
72 l = [p1, p2]
73 l.sort()
73 l.sort()
74 s = _sha(l[0])
74 s = _sha(l[0])
75 s.update(l[1])
75 s.update(l[1])
76 s.update(text)
76 s.update(text)
77 return s.digest()
77 return s.digest()
78
78
79 def decompress(bin):
79 def decompress(bin):
80 """ decompress the given input """
80 """ decompress the given input """
81 if not bin:
81 if not bin:
82 return bin
82 return bin
83 t = bin[0]
83 t = bin[0]
84 if t == '\0':
84 if t == '\0':
85 return bin
85 return bin
86 if t == 'x':
86 if t == 'x':
87 try:
87 try:
88 return _decompress(bin)
88 return _decompress(bin)
89 except zlib.error, e:
89 except zlib.error, e:
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 if t == 'u':
91 if t == 'u':
92 return bin[1:]
92 return bin[1:]
93 raise RevlogError(_("unknown compression type %r") % t)
93 raise RevlogError(_("unknown compression type %r") % t)
94
94
95 # index v0:
95 # index v0:
96 # 4 bytes: offset
96 # 4 bytes: offset
97 # 4 bytes: compressed length
97 # 4 bytes: compressed length
98 # 4 bytes: base rev
98 # 4 bytes: base rev
99 # 4 bytes: link rev
99 # 4 bytes: link rev
100 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 1 nodeid
101 # 32 bytes: parent 2 nodeid
101 # 32 bytes: parent 2 nodeid
102 # 32 bytes: nodeid
102 # 32 bytes: nodeid
103 indexformatv0 = ">4l20s20s20s"
103 indexformatv0 = ">4l20s20s20s"
104 v0shaoffset = 56
104 v0shaoffset = 56
105
105
106 class revlogoldio(object):
106 class revlogoldio(object):
107 def __init__(self):
107 def __init__(self):
108 self.size = struct.calcsize(indexformatv0)
108 self.size = struct.calcsize(indexformatv0)
109
109
110 def parseindex(self, data, inline):
110 def parseindex(self, data, inline):
111 s = self.size
111 s = self.size
112 index = []
112 index = []
113 nodemap = {nullid: nullrev}
113 nodemap = {nullid: nullrev}
114 n = off = 0
114 n = off = 0
115 l = len(data)
115 l = len(data)
116 while off + s <= l:
116 while off + s <= l:
117 cur = data[off:off + s]
117 cur = data[off:off + s]
118 off += s
118 off += s
119 e = _unpack(indexformatv0, cur)
119 e = _unpack(indexformatv0, cur)
120 # transform to revlogv1 format
120 # transform to revlogv1 format
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 index.append(e2)
123 index.append(e2)
124 nodemap[e[6]] = n
124 nodemap[e[6]] = n
125 n += 1
125 n += 1
126
126
127 # add the magic null revision at -1
127 # add the magic null revision at -1
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129
129
130 return index, nodemap, None
130 return index, nodemap, None
131
131
132 def packentry(self, entry, node, version, rev):
132 def packentry(self, entry, node, version, rev):
133 if gettype(entry[0]):
133 if gettype(entry[0]):
134 raise RevlogError(_("index entry flags need RevlogNG"))
134 raise RevlogError(_("index entry flags need RevlogNG"))
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 node(entry[5]), node(entry[6]), entry[7])
136 node(entry[5]), node(entry[6]), entry[7])
137 return _pack(indexformatv0, *e2)
137 return _pack(indexformatv0, *e2)
138
138
139 # index ng:
139 # index ng:
140 # 6 bytes: offset
140 # 6 bytes: offset
141 # 2 bytes: flags
141 # 2 bytes: flags
142 # 4 bytes: compressed length
142 # 4 bytes: compressed length
143 # 4 bytes: uncompressed length
143 # 4 bytes: uncompressed length
144 # 4 bytes: base rev
144 # 4 bytes: base rev
145 # 4 bytes: link rev
145 # 4 bytes: link rev
146 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 1 rev
147 # 4 bytes: parent 2 rev
147 # 4 bytes: parent 2 rev
148 # 32 bytes: nodeid
148 # 32 bytes: nodeid
149 indexformatng = ">Qiiiiii20s12x"
149 indexformatng = ">Qiiiiii20s12x"
150 ngshaoffset = 32
150 ngshaoffset = 32
151 versionformat = ">I"
151 versionformat = ">I"
152
152
153 class revlogio(object):
153 class revlogio(object):
154 def __init__(self):
154 def __init__(self):
155 self.size = struct.calcsize(indexformatng)
155 self.size = struct.calcsize(indexformatng)
156
156
157 def parseindex(self, data, inline):
157 def parseindex(self, data, inline):
158 # call the C implementation to parse the index data
158 # call the C implementation to parse the index data
159 index, cache = parsers.parse_index2(data, inline)
159 index, cache = parsers.parse_index2(data, inline)
160 return index, getattr(index, 'nodemap', None), cache
160 return index, getattr(index, 'nodemap', None), cache
161
161
162 def packentry(self, entry, node, version, rev):
162 def packentry(self, entry, node, version, rev):
163 p = _pack(indexformatng, *entry)
163 p = _pack(indexformatng, *entry)
164 if rev == 0:
164 if rev == 0:
165 p = _pack(versionformat, version) + p[4:]
165 p = _pack(versionformat, version) + p[4:]
166 return p
166 return p
167
167
168 class revlog(object):
168 class revlog(object):
169 """
169 """
170 the underlying revision storage object
170 the underlying revision storage object
171
171
172 A revlog consists of two parts, an index and the revision data.
172 A revlog consists of two parts, an index and the revision data.
173
173
174 The index is a file with a fixed record size containing
174 The index is a file with a fixed record size containing
175 information on each revision, including its nodeid (hash), the
175 information on each revision, including its nodeid (hash), the
176 nodeids of its parents, the position and offset of its data within
176 nodeids of its parents, the position and offset of its data within
177 the data file, and the revision it's based on. Finally, each entry
177 the data file, and the revision it's based on. Finally, each entry
178 contains a linkrev entry that can serve as a pointer to external
178 contains a linkrev entry that can serve as a pointer to external
179 data.
179 data.
180
180
181 The revision data itself is a linear collection of data chunks.
181 The revision data itself is a linear collection of data chunks.
182 Each chunk represents a revision and is usually represented as a
182 Each chunk represents a revision and is usually represented as a
183 delta against the previous chunk. To bound lookup time, runs of
183 delta against the previous chunk. To bound lookup time, runs of
184 deltas are limited to about 2 times the length of the original
184 deltas are limited to about 2 times the length of the original
185 version data. This makes retrieval of a version proportional to
185 version data. This makes retrieval of a version proportional to
186 its size, or O(1) relative to the number of revisions.
186 its size, or O(1) relative to the number of revisions.
187
187
188 Both pieces of the revlog are written to in an append-only
188 Both pieces of the revlog are written to in an append-only
189 fashion, which means we never need to rewrite a file to insert or
189 fashion, which means we never need to rewrite a file to insert or
190 remove data, and can use some simple techniques to avoid the need
190 remove data, and can use some simple techniques to avoid the need
191 for locking while reading.
191 for locking while reading.
192 """
192 """
193 def __init__(self, opener, indexfile):
193 def __init__(self, opener, indexfile):
194 """
194 """
195 create a revlog object
195 create a revlog object
196
196
197 opener is a function that abstracts the file opening operation
197 opener is a function that abstracts the file opening operation
198 and can be used to implement COW semantics or the like.
198 and can be used to implement COW semantics or the like.
199 """
199 """
200 self.indexfile = indexfile
200 self.indexfile = indexfile
201 self.datafile = indexfile[:-2] + ".d"
201 self.datafile = indexfile[:-2] + ".d"
202 self.opener = opener
202 self.opener = opener
203 self._cache = None
203 self._cache = None
204 self._basecache = None
204 self._basecache = None
205 self._chunkcache = (0, '')
205 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536
206 self._chunkcachesize = 65536
207 self._maxchainlen = None
207 self._maxchainlen = None
208 self.index = []
208 self.index = []
209 self._pcache = {}
209 self._pcache = {}
210 self._nodecache = {nullid: nullrev}
210 self._nodecache = {nullid: nullrev}
211 self._nodepos = None
211 self._nodepos = None
212
212
213 v = REVLOG_DEFAULT_VERSION
213 v = REVLOG_DEFAULT_VERSION
214 opts = getattr(opener, 'options', None)
214 opts = getattr(opener, 'options', None)
215 if opts is not None:
215 if opts is not None:
216 if 'revlogv1' in opts:
216 if 'revlogv1' in opts:
217 if 'generaldelta' in opts:
217 if 'generaldelta' in opts:
218 v |= REVLOGGENERALDELTA
218 v |= REVLOGGENERALDELTA
219 else:
219 else:
220 v = 0
220 v = 0
221 if 'chunkcachesize' in opts:
221 if 'chunkcachesize' in opts:
222 self._chunkcachesize = opts['chunkcachesize']
222 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
223 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
224 self._maxchainlen = opts['maxchainlen']
225
225
226 if self._chunkcachesize <= 0:
226 if self._chunkcachesize <= 0:
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 'than 0') % self._chunkcachesize)
228 'than 0') % self._chunkcachesize)
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 'of 2') % self._chunkcachesize)
231 'of 2') % self._chunkcachesize)
232
232
233 i = ''
233 i = ''
234 self._initempty = True
234 self._initempty = True
235 try:
235 try:
236 f = self.opener(self.indexfile)
236 f = self.opener(self.indexfile)
237 i = f.read()
237 i = f.read()
238 f.close()
238 f.close()
239 if len(i) > 0:
239 if len(i) > 0:
240 v = struct.unpack(versionformat, i[:4])[0]
240 v = struct.unpack(versionformat, i[:4])[0]
241 self._initempty = False
241 self._initempty = False
242 except IOError, inst:
242 except IOError, inst:
243 if inst.errno != errno.ENOENT:
243 if inst.errno != errno.ENOENT:
244 raise
244 raise
245
245
246 self.version = v
246 self.version = v
247 self._inline = v & REVLOGNGINLINEDATA
247 self._inline = v & REVLOGNGINLINEDATA
248 self._generaldelta = v & REVLOGGENERALDELTA
248 self._generaldelta = v & REVLOGGENERALDELTA
249 flags = v & ~0xFFFF
249 flags = v & ~0xFFFF
250 fmt = v & 0xFFFF
250 fmt = v & 0xFFFF
251 if fmt == REVLOGV0 and flags:
251 if fmt == REVLOGV0 and flags:
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 % (self.indexfile, flags >> 16))
253 % (self.indexfile, flags >> 16))
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 % (self.indexfile, flags >> 16))
256 % (self.indexfile, flags >> 16))
257 elif fmt > REVLOGNG:
257 elif fmt > REVLOGNG:
258 raise RevlogError(_("index %s unknown format %d")
258 raise RevlogError(_("index %s unknown format %d")
259 % (self.indexfile, fmt))
259 % (self.indexfile, fmt))
260
260
261 self._io = revlogio()
261 self._io = revlogio()
262 if self.version == REVLOGV0:
262 if self.version == REVLOGV0:
263 self._io = revlogoldio()
263 self._io = revlogoldio()
264 try:
264 try:
265 d = self._io.parseindex(i, self._inline)
265 d = self._io.parseindex(i, self._inline)
266 except (ValueError, IndexError):
266 except (ValueError, IndexError):
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 self.index, nodemap, self._chunkcache = d
268 self.index, nodemap, self._chunkcache = d
269 if nodemap is not None:
269 if nodemap is not None:
270 self.nodemap = self._nodecache = nodemap
270 self.nodemap = self._nodecache = nodemap
271 if not self._chunkcache:
271 if not self._chunkcache:
272 self._chunkclear()
272 self._chunkclear()
273 # revnum -> (chain-length, sum-delta-length)
273 # revnum -> (chain-length, sum-delta-length)
274 self._chaininfocache = {}
274 self._chaininfocache = {}
275
275
276 def tip(self):
276 def tip(self):
277 return self.node(len(self.index) - 2)
277 return self.node(len(self.index) - 2)
278 def __len__(self):
278 def __len__(self):
279 return len(self.index) - 1
279 return len(self.index) - 1
280 def __iter__(self):
280 def __iter__(self):
281 return iter(xrange(len(self)))
281 return iter(xrange(len(self)))
282 def revs(self, start=0, stop=None):
282 def revs(self, start=0, stop=None):
283 """iterate over all rev in this revlog (from start to stop)"""
283 """iterate over all rev in this revlog (from start to stop)"""
284 step = 1
284 step = 1
285 if stop is not None:
285 if stop is not None:
286 if start > stop:
286 if start > stop:
287 step = -1
287 step = -1
288 stop += step
288 stop += step
289 else:
289 else:
290 stop = len(self)
290 stop = len(self)
291 return xrange(start, stop, step)
291 return xrange(start, stop, step)
292
292
293 @util.propertycache
293 @util.propertycache
294 def nodemap(self):
294 def nodemap(self):
295 self.rev(self.node(0))
295 self.rev(self.node(0))
296 return self._nodecache
296 return self._nodecache
297
297
298 def hasnode(self, node):
298 def hasnode(self, node):
299 try:
299 try:
300 self.rev(node)
300 self.rev(node)
301 return True
301 return True
302 except KeyError:
302 except KeyError:
303 return False
303 return False
304
304
305 def clearcaches(self):
305 def clearcaches(self):
306 try:
306 try:
307 self._nodecache.clearcaches()
307 self._nodecache.clearcaches()
308 except AttributeError:
308 except AttributeError:
309 self._nodecache = {nullid: nullrev}
309 self._nodecache = {nullid: nullrev}
310 self._nodepos = None
310 self._nodepos = None
311
311
312 def rev(self, node):
312 def rev(self, node):
313 try:
313 try:
314 return self._nodecache[node]
314 return self._nodecache[node]
315 except TypeError:
315 except TypeError:
316 raise
316 raise
317 except RevlogError:
317 except RevlogError:
318 # parsers.c radix tree lookup failed
318 # parsers.c radix tree lookup failed
319 raise LookupError(node, self.indexfile, _('no node'))
319 raise LookupError(node, self.indexfile, _('no node'))
320 except KeyError:
320 except KeyError:
321 # pure python cache lookup failed
321 # pure python cache lookup failed
322 n = self._nodecache
322 n = self._nodecache
323 i = self.index
323 i = self.index
324 p = self._nodepos
324 p = self._nodepos
325 if p is None:
325 if p is None:
326 p = len(i) - 2
326 p = len(i) - 2
327 for r in xrange(p, -1, -1):
327 for r in xrange(p, -1, -1):
328 v = i[r][7]
328 v = i[r][7]
329 n[v] = r
329 n[v] = r
330 if v == node:
330 if v == node:
331 self._nodepos = r - 1
331 self._nodepos = r - 1
332 return r
332 return r
333 raise LookupError(node, self.indexfile, _('no node'))
333 raise LookupError(node, self.indexfile, _('no node'))
334
334
335 def node(self, rev):
335 def node(self, rev):
336 return self.index[rev][7]
336 return self.index[rev][7]
337 def linkrev(self, rev):
337 def linkrev(self, rev):
338 return self.index[rev][4]
338 return self.index[rev][4]
339 def parents(self, node):
339 def parents(self, node):
340 i = self.index
340 i = self.index
341 d = i[self.rev(node)]
341 d = i[self.rev(node)]
342 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
342 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
343 def parentrevs(self, rev):
343 def parentrevs(self, rev):
344 return self.index[rev][5:7]
344 return self.index[rev][5:7]
345 def start(self, rev):
345 def start(self, rev):
346 return int(self.index[rev][0] >> 16)
346 return int(self.index[rev][0] >> 16)
347 def end(self, rev):
347 def end(self, rev):
348 return self.start(rev) + self.length(rev)
348 return self.start(rev) + self.length(rev)
349 def length(self, rev):
349 def length(self, rev):
350 return self.index[rev][1]
350 return self.index[rev][1]
351 def chainbase(self, rev):
351 def chainbase(self, rev):
352 index = self.index
352 index = self.index
353 base = index[rev][3]
353 base = index[rev][3]
354 while base != rev:
354 while base != rev:
355 rev = base
355 rev = base
356 base = index[rev][3]
356 base = index[rev][3]
357 return base
357 return base
358 def chainlen(self, rev):
358 def chainlen(self, rev):
359 return self._chaininfo(rev)[0]
359 return self._chaininfo(rev)[0]
360
360
361 def _chaininfo(self, rev):
361 def _chaininfo(self, rev):
362 chaininfocache = self._chaininfocache
362 chaininfocache = self._chaininfocache
363 if rev in chaininfocache:
363 if rev in chaininfocache:
364 return chaininfocache[rev]
364 return chaininfocache[rev]
365 index = self.index
365 index = self.index
366 generaldelta = self._generaldelta
366 generaldelta = self._generaldelta
367 iterrev = rev
367 iterrev = rev
368 e = index[iterrev]
368 e = index[iterrev]
369 clen = 0
369 clen = 0
370 compresseddeltalen = 0
370 compresseddeltalen = 0
371 while iterrev != e[3]:
371 while iterrev != e[3]:
372 clen += 1
372 clen += 1
373 compresseddeltalen += e[1]
373 compresseddeltalen += e[1]
374 if generaldelta:
374 if generaldelta:
375 iterrev = e[3]
375 iterrev = e[3]
376 else:
376 else:
377 iterrev -= 1
377 iterrev -= 1
378 if iterrev in chaininfocache:
378 if iterrev in chaininfocache:
379 t = chaininfocache[iterrev]
379 t = chaininfocache[iterrev]
380 clen += t[0]
380 clen += t[0]
381 compresseddeltalen += t[1]
381 compresseddeltalen += t[1]
382 break
382 break
383 e = index[iterrev]
383 e = index[iterrev]
384 else:
384 else:
385 # Add text length of base since decompressing that also takes
385 # Add text length of base since decompressing that also takes
386 # work. For cache hits the length is already included.
386 # work. For cache hits the length is already included.
387 compresseddeltalen += e[1]
387 compresseddeltalen += e[1]
388 r = (clen, compresseddeltalen)
388 r = (clen, compresseddeltalen)
389 chaininfocache[rev] = r
389 chaininfocache[rev] = r
390 return r
390 return r
391
391
392 def flags(self, rev):
392 def flags(self, rev):
393 return self.index[rev][0] & 0xFFFF
393 return self.index[rev][0] & 0xFFFF
394 def rawsize(self, rev):
394 def rawsize(self, rev):
395 """return the length of the uncompressed text for a given revision"""
395 """return the length of the uncompressed text for a given revision"""
396 l = self.index[rev][2]
396 l = self.index[rev][2]
397 if l >= 0:
397 if l >= 0:
398 return l
398 return l
399
399
400 t = self.revision(self.node(rev))
400 t = self.revision(self.node(rev))
401 return len(t)
401 return len(t)
402 size = rawsize
402 size = rawsize
403
403
404 def ancestors(self, revs, stoprev=0, inclusive=False):
404 def ancestors(self, revs, stoprev=0, inclusive=False):
405 """Generate the ancestors of 'revs' in reverse topological order.
405 """Generate the ancestors of 'revs' in reverse topological order.
406 Does not generate revs lower than stoprev.
406 Does not generate revs lower than stoprev.
407
407
408 See the documentation for ancestor.lazyancestors for more details."""
408 See the documentation for ancestor.lazyancestors for more details."""
409
409
410 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
410 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
411 inclusive=inclusive)
411 inclusive=inclusive)
412
412
413 def descendants(self, revs):
413 def descendants(self, revs):
414 """Generate the descendants of 'revs' in revision order.
414 """Generate the descendants of 'revs' in revision order.
415
415
416 Yield a sequence of revision numbers starting with a child of
416 Yield a sequence of revision numbers starting with a child of
417 some rev in revs, i.e., each revision is *not* considered a
417 some rev in revs, i.e., each revision is *not* considered a
418 descendant of itself. Results are ordered by revision number (a
418 descendant of itself. Results are ordered by revision number (a
419 topological sort)."""
419 topological sort)."""
420 first = min(revs)
420 first = min(revs)
421 if first == nullrev:
421 if first == nullrev:
422 for i in self:
422 for i in self:
423 yield i
423 yield i
424 return
424 return
425
425
426 seen = set(revs)
426 seen = set(revs)
427 for i in self.revs(start=first + 1):
427 for i in self.revs(start=first + 1):
428 for x in self.parentrevs(i):
428 for x in self.parentrevs(i):
429 if x != nullrev and x in seen:
429 if x != nullrev and x in seen:
430 seen.add(i)
430 seen.add(i)
431 yield i
431 yield i
432 break
432 break
433
433
434 def findcommonmissing(self, common=None, heads=None):
434 def findcommonmissing(self, common=None, heads=None):
435 """Return a tuple of the ancestors of common and the ancestors of heads
435 """Return a tuple of the ancestors of common and the ancestors of heads
436 that are not ancestors of common. In revset terminology, we return the
436 that are not ancestors of common. In revset terminology, we return the
437 tuple:
437 tuple:
438
438
439 ::common, (::heads) - (::common)
439 ::common, (::heads) - (::common)
440
440
441 The list is sorted by revision number, meaning it is
441 The list is sorted by revision number, meaning it is
442 topologically sorted.
442 topologically sorted.
443
443
444 'heads' and 'common' are both lists of node IDs. If heads is
444 'heads' and 'common' are both lists of node IDs. If heads is
445 not supplied, uses all of the revlog's heads. If common is not
445 not supplied, uses all of the revlog's heads. If common is not
446 supplied, uses nullid."""
446 supplied, uses nullid."""
447 if common is None:
447 if common is None:
448 common = [nullid]
448 common = [nullid]
449 if heads is None:
449 if heads is None:
450 heads = self.heads()
450 heads = self.heads()
451
451
452 common = [self.rev(n) for n in common]
452 common = [self.rev(n) for n in common]
453 heads = [self.rev(n) for n in heads]
453 heads = [self.rev(n) for n in heads]
454
454
455 # we want the ancestors, but inclusive
455 # we want the ancestors, but inclusive
456 class lazyset(object):
456 class lazyset(object):
457 def __init__(self, lazyvalues):
457 def __init__(self, lazyvalues):
458 self.addedvalues = set()
458 self.addedvalues = set()
459 self.lazyvalues = lazyvalues
459 self.lazyvalues = lazyvalues
460
460
461 def __contains__(self, value):
461 def __contains__(self, value):
462 return value in self.addedvalues or value in self.lazyvalues
462 return value in self.addedvalues or value in self.lazyvalues
463
463
464 def __iter__(self):
464 def __iter__(self):
465 added = self.addedvalues
465 added = self.addedvalues
466 for r in added:
466 for r in added:
467 yield r
467 yield r
468 for r in self.lazyvalues:
468 for r in self.lazyvalues:
469 if not r in added:
469 if not r in added:
470 yield r
470 yield r
471
471
472 def add(self, value):
472 def add(self, value):
473 self.addedvalues.add(value)
473 self.addedvalues.add(value)
474
474
475 def update(self, values):
475 def update(self, values):
476 self.addedvalues.update(values)
476 self.addedvalues.update(values)
477
477
478 has = lazyset(self.ancestors(common))
478 has = lazyset(self.ancestors(common))
479 has.add(nullrev)
479 has.add(nullrev)
480 has.update(common)
480 has.update(common)
481
481
482 # take all ancestors from heads that aren't in has
482 # take all ancestors from heads that aren't in has
483 missing = set()
483 missing = set()
484 visit = util.deque(r for r in heads if r not in has)
484 visit = util.deque(r for r in heads if r not in has)
485 while visit:
485 while visit:
486 r = visit.popleft()
486 r = visit.popleft()
487 if r in missing:
487 if r in missing:
488 continue
488 continue
489 else:
489 else:
490 missing.add(r)
490 missing.add(r)
491 for p in self.parentrevs(r):
491 for p in self.parentrevs(r):
492 if p not in has:
492 if p not in has:
493 visit.append(p)
493 visit.append(p)
494 missing = list(missing)
494 missing = list(missing)
495 missing.sort()
495 missing.sort()
496 return has, [self.node(r) for r in missing]
496 return has, [self.node(r) for r in missing]
497
497
498 def incrementalmissingrevs(self, common=None):
498 def incrementalmissingrevs(self, common=None):
499 """Return an object that can be used to incrementally compute the
499 """Return an object that can be used to incrementally compute the
500 revision numbers of the ancestors of arbitrary sets that are not
500 revision numbers of the ancestors of arbitrary sets that are not
501 ancestors of common. This is an ancestor.incrementalmissingancestors
501 ancestors of common. This is an ancestor.incrementalmissingancestors
502 object.
502 object.
503
503
504 'common' is a list of revision numbers. If common is not supplied, uses
504 'common' is a list of revision numbers. If common is not supplied, uses
505 nullrev.
505 nullrev.
506 """
506 """
507 if common is None:
507 if common is None:
508 common = [nullrev]
508 common = [nullrev]
509
509
510 return ancestor.incrementalmissingancestors(self.parentrevs, common)
510 return ancestor.incrementalmissingancestors(self.parentrevs, common)
511
511
512 def findmissingrevs(self, common=None, heads=None):
512 def findmissingrevs(self, common=None, heads=None):
513 """Return the revision numbers of the ancestors of heads that
513 """Return the revision numbers of the ancestors of heads that
514 are not ancestors of common.
514 are not ancestors of common.
515
515
516 More specifically, return a list of revision numbers corresponding to
516 More specifically, return a list of revision numbers corresponding to
517 nodes N such that every N satisfies the following constraints:
517 nodes N such that every N satisfies the following constraints:
518
518
519 1. N is an ancestor of some node in 'heads'
519 1. N is an ancestor of some node in 'heads'
520 2. N is not an ancestor of any node in 'common'
520 2. N is not an ancestor of any node in 'common'
521
521
522 The list is sorted by revision number, meaning it is
522 The list is sorted by revision number, meaning it is
523 topologically sorted.
523 topologically sorted.
524
524
525 'heads' and 'common' are both lists of revision numbers. If heads is
525 'heads' and 'common' are both lists of revision numbers. If heads is
526 not supplied, uses all of the revlog's heads. If common is not
526 not supplied, uses all of the revlog's heads. If common is not
527 supplied, uses nullid."""
527 supplied, uses nullid."""
528 if common is None:
528 if common is None:
529 common = [nullrev]
529 common = [nullrev]
530 if heads is None:
530 if heads is None:
531 heads = self.headrevs()
531 heads = self.headrevs()
532
532
533 return ancestor.missingancestors(heads, common, self.parentrevs)
533 inc = self.incrementalmissingrevs(common=common)
534 return inc.missingancestors(heads)
534
535
535 def findmissing(self, common=None, heads=None):
536 def findmissing(self, common=None, heads=None):
536 """Return the ancestors of heads that are not ancestors of common.
537 """Return the ancestors of heads that are not ancestors of common.
537
538
538 More specifically, return a list of nodes N such that every N
539 More specifically, return a list of nodes N such that every N
539 satisfies the following constraints:
540 satisfies the following constraints:
540
541
541 1. N is an ancestor of some node in 'heads'
542 1. N is an ancestor of some node in 'heads'
542 2. N is not an ancestor of any node in 'common'
543 2. N is not an ancestor of any node in 'common'
543
544
544 The list is sorted by revision number, meaning it is
545 The list is sorted by revision number, meaning it is
545 topologically sorted.
546 topologically sorted.
546
547
547 'heads' and 'common' are both lists of node IDs. If heads is
548 'heads' and 'common' are both lists of node IDs. If heads is
548 not supplied, uses all of the revlog's heads. If common is not
549 not supplied, uses all of the revlog's heads. If common is not
549 supplied, uses nullid."""
550 supplied, uses nullid."""
550 if common is None:
551 if common is None:
551 common = [nullid]
552 common = [nullid]
552 if heads is None:
553 if heads is None:
553 heads = self.heads()
554 heads = self.heads()
554
555
555 common = [self.rev(n) for n in common]
556 common = [self.rev(n) for n in common]
556 heads = [self.rev(n) for n in heads]
557 heads = [self.rev(n) for n in heads]
557
558
558 return [self.node(r) for r in
559 inc = self.incrementalmissingrevs(common=common)
559 ancestor.missingancestors(heads, common, self.parentrevs)]
560 return [self.node(r) for r in inc.missingancestors(heads)]
560
561
561 def nodesbetween(self, roots=None, heads=None):
562 def nodesbetween(self, roots=None, heads=None):
562 """Return a topological path from 'roots' to 'heads'.
563 """Return a topological path from 'roots' to 'heads'.
563
564
564 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
565 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
565 topologically sorted list of all nodes N that satisfy both of
566 topologically sorted list of all nodes N that satisfy both of
566 these constraints:
567 these constraints:
567
568
568 1. N is a descendant of some node in 'roots'
569 1. N is a descendant of some node in 'roots'
569 2. N is an ancestor of some node in 'heads'
570 2. N is an ancestor of some node in 'heads'
570
571
571 Every node is considered to be both a descendant and an ancestor
572 Every node is considered to be both a descendant and an ancestor
572 of itself, so every reachable node in 'roots' and 'heads' will be
573 of itself, so every reachable node in 'roots' and 'heads' will be
573 included in 'nodes'.
574 included in 'nodes'.
574
575
575 'outroots' is the list of reachable nodes in 'roots', i.e., the
576 'outroots' is the list of reachable nodes in 'roots', i.e., the
576 subset of 'roots' that is returned in 'nodes'. Likewise,
577 subset of 'roots' that is returned in 'nodes'. Likewise,
577 'outheads' is the subset of 'heads' that is also in 'nodes'.
578 'outheads' is the subset of 'heads' that is also in 'nodes'.
578
579
579 'roots' and 'heads' are both lists of node IDs. If 'roots' is
580 'roots' and 'heads' are both lists of node IDs. If 'roots' is
580 unspecified, uses nullid as the only root. If 'heads' is
581 unspecified, uses nullid as the only root. If 'heads' is
581 unspecified, uses list of all of the revlog's heads."""
582 unspecified, uses list of all of the revlog's heads."""
582 nonodes = ([], [], [])
583 nonodes = ([], [], [])
583 if roots is not None:
584 if roots is not None:
584 roots = list(roots)
585 roots = list(roots)
585 if not roots:
586 if not roots:
586 return nonodes
587 return nonodes
587 lowestrev = min([self.rev(n) for n in roots])
588 lowestrev = min([self.rev(n) for n in roots])
588 else:
589 else:
589 roots = [nullid] # Everybody's a descendant of nullid
590 roots = [nullid] # Everybody's a descendant of nullid
590 lowestrev = nullrev
591 lowestrev = nullrev
591 if (lowestrev == nullrev) and (heads is None):
592 if (lowestrev == nullrev) and (heads is None):
592 # We want _all_ the nodes!
593 # We want _all_ the nodes!
593 return ([self.node(r) for r in self], [nullid], list(self.heads()))
594 return ([self.node(r) for r in self], [nullid], list(self.heads()))
594 if heads is None:
595 if heads is None:
595 # All nodes are ancestors, so the latest ancestor is the last
596 # All nodes are ancestors, so the latest ancestor is the last
596 # node.
597 # node.
597 highestrev = len(self) - 1
598 highestrev = len(self) - 1
598 # Set ancestors to None to signal that every node is an ancestor.
599 # Set ancestors to None to signal that every node is an ancestor.
599 ancestors = None
600 ancestors = None
600 # Set heads to an empty dictionary for later discovery of heads
601 # Set heads to an empty dictionary for later discovery of heads
601 heads = {}
602 heads = {}
602 else:
603 else:
603 heads = list(heads)
604 heads = list(heads)
604 if not heads:
605 if not heads:
605 return nonodes
606 return nonodes
606 ancestors = set()
607 ancestors = set()
607 # Turn heads into a dictionary so we can remove 'fake' heads.
608 # Turn heads into a dictionary so we can remove 'fake' heads.
608 # Also, later we will be using it to filter out the heads we can't
609 # Also, later we will be using it to filter out the heads we can't
609 # find from roots.
610 # find from roots.
610 heads = dict.fromkeys(heads, False)
611 heads = dict.fromkeys(heads, False)
611 # Start at the top and keep marking parents until we're done.
612 # Start at the top and keep marking parents until we're done.
612 nodestotag = set(heads)
613 nodestotag = set(heads)
613 # Remember where the top was so we can use it as a limit later.
614 # Remember where the top was so we can use it as a limit later.
614 highestrev = max([self.rev(n) for n in nodestotag])
615 highestrev = max([self.rev(n) for n in nodestotag])
615 while nodestotag:
616 while nodestotag:
616 # grab a node to tag
617 # grab a node to tag
617 n = nodestotag.pop()
618 n = nodestotag.pop()
618 # Never tag nullid
619 # Never tag nullid
619 if n == nullid:
620 if n == nullid:
620 continue
621 continue
621 # A node's revision number represents its place in a
622 # A node's revision number represents its place in a
622 # topologically sorted list of nodes.
623 # topologically sorted list of nodes.
623 r = self.rev(n)
624 r = self.rev(n)
624 if r >= lowestrev:
625 if r >= lowestrev:
625 if n not in ancestors:
626 if n not in ancestors:
626 # If we are possibly a descendant of one of the roots
627 # If we are possibly a descendant of one of the roots
627 # and we haven't already been marked as an ancestor
628 # and we haven't already been marked as an ancestor
628 ancestors.add(n) # Mark as ancestor
629 ancestors.add(n) # Mark as ancestor
629 # Add non-nullid parents to list of nodes to tag.
630 # Add non-nullid parents to list of nodes to tag.
630 nodestotag.update([p for p in self.parents(n) if
631 nodestotag.update([p for p in self.parents(n) if
631 p != nullid])
632 p != nullid])
632 elif n in heads: # We've seen it before, is it a fake head?
633 elif n in heads: # We've seen it before, is it a fake head?
633 # So it is, real heads should not be the ancestors of
634 # So it is, real heads should not be the ancestors of
634 # any other heads.
635 # any other heads.
635 heads.pop(n)
636 heads.pop(n)
636 if not ancestors:
637 if not ancestors:
637 return nonodes
638 return nonodes
638 # Now that we have our set of ancestors, we want to remove any
639 # Now that we have our set of ancestors, we want to remove any
639 # roots that are not ancestors.
640 # roots that are not ancestors.
640
641
641 # If one of the roots was nullid, everything is included anyway.
642 # If one of the roots was nullid, everything is included anyway.
642 if lowestrev > nullrev:
643 if lowestrev > nullrev:
643 # But, since we weren't, let's recompute the lowest rev to not
644 # But, since we weren't, let's recompute the lowest rev to not
644 # include roots that aren't ancestors.
645 # include roots that aren't ancestors.
645
646
646 # Filter out roots that aren't ancestors of heads
647 # Filter out roots that aren't ancestors of heads
647 roots = [n for n in roots if n in ancestors]
648 roots = [n for n in roots if n in ancestors]
648 # Recompute the lowest revision
649 # Recompute the lowest revision
649 if roots:
650 if roots:
650 lowestrev = min([self.rev(n) for n in roots])
651 lowestrev = min([self.rev(n) for n in roots])
651 else:
652 else:
652 # No more roots? Return empty list
653 # No more roots? Return empty list
653 return nonodes
654 return nonodes
654 else:
655 else:
655 # We are descending from nullid, and don't need to care about
656 # We are descending from nullid, and don't need to care about
656 # any other roots.
657 # any other roots.
657 lowestrev = nullrev
658 lowestrev = nullrev
658 roots = [nullid]
659 roots = [nullid]
659 # Transform our roots list into a set.
660 # Transform our roots list into a set.
660 descendants = set(roots)
661 descendants = set(roots)
661 # Also, keep the original roots so we can filter out roots that aren't
662 # Also, keep the original roots so we can filter out roots that aren't
662 # 'real' roots (i.e. are descended from other roots).
663 # 'real' roots (i.e. are descended from other roots).
663 roots = descendants.copy()
664 roots = descendants.copy()
664 # Our topologically sorted list of output nodes.
665 # Our topologically sorted list of output nodes.
665 orderedout = []
666 orderedout = []
666 # Don't start at nullid since we don't want nullid in our output list,
667 # Don't start at nullid since we don't want nullid in our output list,
667 # and if nullid shows up in descendants, empty parents will look like
668 # and if nullid shows up in descendants, empty parents will look like
668 # they're descendants.
669 # they're descendants.
669 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
670 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
670 n = self.node(r)
671 n = self.node(r)
671 isdescendant = False
672 isdescendant = False
672 if lowestrev == nullrev: # Everybody is a descendant of nullid
673 if lowestrev == nullrev: # Everybody is a descendant of nullid
673 isdescendant = True
674 isdescendant = True
674 elif n in descendants:
675 elif n in descendants:
675 # n is already a descendant
676 # n is already a descendant
676 isdescendant = True
677 isdescendant = True
677 # This check only needs to be done here because all the roots
678 # This check only needs to be done here because all the roots
678 # will start being marked is descendants before the loop.
679 # will start being marked is descendants before the loop.
679 if n in roots:
680 if n in roots:
680 # If n was a root, check if it's a 'real' root.
681 # If n was a root, check if it's a 'real' root.
681 p = tuple(self.parents(n))
682 p = tuple(self.parents(n))
682 # If any of its parents are descendants, it's not a root.
683 # If any of its parents are descendants, it's not a root.
683 if (p[0] in descendants) or (p[1] in descendants):
684 if (p[0] in descendants) or (p[1] in descendants):
684 roots.remove(n)
685 roots.remove(n)
685 else:
686 else:
686 p = tuple(self.parents(n))
687 p = tuple(self.parents(n))
687 # A node is a descendant if either of its parents are
688 # A node is a descendant if either of its parents are
688 # descendants. (We seeded the dependents list with the roots
689 # descendants. (We seeded the dependents list with the roots
689 # up there, remember?)
690 # up there, remember?)
690 if (p[0] in descendants) or (p[1] in descendants):
691 if (p[0] in descendants) or (p[1] in descendants):
691 descendants.add(n)
692 descendants.add(n)
692 isdescendant = True
693 isdescendant = True
693 if isdescendant and ((ancestors is None) or (n in ancestors)):
694 if isdescendant and ((ancestors is None) or (n in ancestors)):
694 # Only include nodes that are both descendants and ancestors.
695 # Only include nodes that are both descendants and ancestors.
695 orderedout.append(n)
696 orderedout.append(n)
696 if (ancestors is not None) and (n in heads):
697 if (ancestors is not None) and (n in heads):
697 # We're trying to figure out which heads are reachable
698 # We're trying to figure out which heads are reachable
698 # from roots.
699 # from roots.
699 # Mark this head as having been reached
700 # Mark this head as having been reached
700 heads[n] = True
701 heads[n] = True
701 elif ancestors is None:
702 elif ancestors is None:
702 # Otherwise, we're trying to discover the heads.
703 # Otherwise, we're trying to discover the heads.
703 # Assume this is a head because if it isn't, the next step
704 # Assume this is a head because if it isn't, the next step
704 # will eventually remove it.
705 # will eventually remove it.
705 heads[n] = True
706 heads[n] = True
706 # But, obviously its parents aren't.
707 # But, obviously its parents aren't.
707 for p in self.parents(n):
708 for p in self.parents(n):
708 heads.pop(p, None)
709 heads.pop(p, None)
709 heads = [n for n, flag in heads.iteritems() if flag]
710 heads = [n for n, flag in heads.iteritems() if flag]
710 roots = list(roots)
711 roots = list(roots)
711 assert orderedout
712 assert orderedout
712 assert roots
713 assert roots
713 assert heads
714 assert heads
714 return (orderedout, roots, heads)
715 return (orderedout, roots, heads)
715
716
716 def headrevs(self):
717 def headrevs(self):
717 try:
718 try:
718 return self.index.headrevs()
719 return self.index.headrevs()
719 except AttributeError:
720 except AttributeError:
720 return self._headrevs()
721 return self._headrevs()
721
722
722 def _headrevs(self):
723 def _headrevs(self):
723 count = len(self)
724 count = len(self)
724 if not count:
725 if not count:
725 return [nullrev]
726 return [nullrev]
726 # we won't iter over filtered rev so nobody is a head at start
727 # we won't iter over filtered rev so nobody is a head at start
727 ishead = [0] * (count + 1)
728 ishead = [0] * (count + 1)
728 index = self.index
729 index = self.index
729 for r in self:
730 for r in self:
730 ishead[r] = 1 # I may be an head
731 ishead[r] = 1 # I may be an head
731 e = index[r]
732 e = index[r]
732 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
733 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
733 return [r for r, val in enumerate(ishead) if val]
734 return [r for r, val in enumerate(ishead) if val]
734
735
735 def heads(self, start=None, stop=None):
736 def heads(self, start=None, stop=None):
736 """return the list of all nodes that have no children
737 """return the list of all nodes that have no children
737
738
738 if start is specified, only heads that are descendants of
739 if start is specified, only heads that are descendants of
739 start will be returned
740 start will be returned
740 if stop is specified, it will consider all the revs from stop
741 if stop is specified, it will consider all the revs from stop
741 as if they had no children
742 as if they had no children
742 """
743 """
743 if start is None and stop is None:
744 if start is None and stop is None:
744 if not len(self):
745 if not len(self):
745 return [nullid]
746 return [nullid]
746 return [self.node(r) for r in self.headrevs()]
747 return [self.node(r) for r in self.headrevs()]
747
748
748 if start is None:
749 if start is None:
749 start = nullid
750 start = nullid
750 if stop is None:
751 if stop is None:
751 stop = []
752 stop = []
752 stoprevs = set([self.rev(n) for n in stop])
753 stoprevs = set([self.rev(n) for n in stop])
753 startrev = self.rev(start)
754 startrev = self.rev(start)
754 reachable = set((startrev,))
755 reachable = set((startrev,))
755 heads = set((startrev,))
756 heads = set((startrev,))
756
757
757 parentrevs = self.parentrevs
758 parentrevs = self.parentrevs
758 for r in self.revs(start=startrev + 1):
759 for r in self.revs(start=startrev + 1):
759 for p in parentrevs(r):
760 for p in parentrevs(r):
760 if p in reachable:
761 if p in reachable:
761 if r not in stoprevs:
762 if r not in stoprevs:
762 reachable.add(r)
763 reachable.add(r)
763 heads.add(r)
764 heads.add(r)
764 if p in heads and p not in stoprevs:
765 if p in heads and p not in stoprevs:
765 heads.remove(p)
766 heads.remove(p)
766
767
767 return [self.node(r) for r in heads]
768 return [self.node(r) for r in heads]
768
769
769 def children(self, node):
770 def children(self, node):
770 """find the children of a given node"""
771 """find the children of a given node"""
771 c = []
772 c = []
772 p = self.rev(node)
773 p = self.rev(node)
773 for r in self.revs(start=p + 1):
774 for r in self.revs(start=p + 1):
774 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
775 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
775 if prevs:
776 if prevs:
776 for pr in prevs:
777 for pr in prevs:
777 if pr == p:
778 if pr == p:
778 c.append(self.node(r))
779 c.append(self.node(r))
779 elif p == nullrev:
780 elif p == nullrev:
780 c.append(self.node(r))
781 c.append(self.node(r))
781 return c
782 return c
782
783
783 def descendant(self, start, end):
784 def descendant(self, start, end):
784 if start == nullrev:
785 if start == nullrev:
785 return True
786 return True
786 for i in self.descendants([start]):
787 for i in self.descendants([start]):
787 if i == end:
788 if i == end:
788 return True
789 return True
789 elif i > end:
790 elif i > end:
790 break
791 break
791 return False
792 return False
792
793
793 def commonancestorsheads(self, a, b):
794 def commonancestorsheads(self, a, b):
794 """calculate all the heads of the common ancestors of nodes a and b"""
795 """calculate all the heads of the common ancestors of nodes a and b"""
795 a, b = self.rev(a), self.rev(b)
796 a, b = self.rev(a), self.rev(b)
796 try:
797 try:
797 ancs = self.index.commonancestorsheads(a, b)
798 ancs = self.index.commonancestorsheads(a, b)
798 except (AttributeError, OverflowError): # C implementation failed
799 except (AttributeError, OverflowError): # C implementation failed
799 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
800 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
800 return map(self.node, ancs)
801 return map(self.node, ancs)
801
802
802 def isancestor(self, a, b):
803 def isancestor(self, a, b):
803 """return True if node a is an ancestor of node b
804 """return True if node a is an ancestor of node b
804
805
805 The implementation of this is trivial but the use of
806 The implementation of this is trivial but the use of
806 commonancestorsheads is not."""
807 commonancestorsheads is not."""
807 return a in self.commonancestorsheads(a, b)
808 return a in self.commonancestorsheads(a, b)
808
809
809 def ancestor(self, a, b):
810 def ancestor(self, a, b):
810 """calculate the "best" common ancestor of nodes a and b"""
811 """calculate the "best" common ancestor of nodes a and b"""
811
812
812 a, b = self.rev(a), self.rev(b)
813 a, b = self.rev(a), self.rev(b)
813 try:
814 try:
814 ancs = self.index.ancestors(a, b)
815 ancs = self.index.ancestors(a, b)
815 except (AttributeError, OverflowError):
816 except (AttributeError, OverflowError):
816 ancs = ancestor.ancestors(self.parentrevs, a, b)
817 ancs = ancestor.ancestors(self.parentrevs, a, b)
817 if ancs:
818 if ancs:
818 # choose a consistent winner when there's a tie
819 # choose a consistent winner when there's a tie
819 return min(map(self.node, ancs))
820 return min(map(self.node, ancs))
820 return nullid
821 return nullid
821
822
822 def _match(self, id):
823 def _match(self, id):
823 if isinstance(id, int):
824 if isinstance(id, int):
824 # rev
825 # rev
825 return self.node(id)
826 return self.node(id)
826 if len(id) == 20:
827 if len(id) == 20:
827 # possibly a binary node
828 # possibly a binary node
828 # odds of a binary node being all hex in ASCII are 1 in 10**25
829 # odds of a binary node being all hex in ASCII are 1 in 10**25
829 try:
830 try:
830 node = id
831 node = id
831 self.rev(node) # quick search the index
832 self.rev(node) # quick search the index
832 return node
833 return node
833 except LookupError:
834 except LookupError:
834 pass # may be partial hex id
835 pass # may be partial hex id
835 try:
836 try:
836 # str(rev)
837 # str(rev)
837 rev = int(id)
838 rev = int(id)
838 if str(rev) != id:
839 if str(rev) != id:
839 raise ValueError
840 raise ValueError
840 if rev < 0:
841 if rev < 0:
841 rev = len(self) + rev
842 rev = len(self) + rev
842 if rev < 0 or rev >= len(self):
843 if rev < 0 or rev >= len(self):
843 raise ValueError
844 raise ValueError
844 return self.node(rev)
845 return self.node(rev)
845 except (ValueError, OverflowError):
846 except (ValueError, OverflowError):
846 pass
847 pass
847 if len(id) == 40:
848 if len(id) == 40:
848 try:
849 try:
849 # a full hex nodeid?
850 # a full hex nodeid?
850 node = bin(id)
851 node = bin(id)
851 self.rev(node)
852 self.rev(node)
852 return node
853 return node
853 except (TypeError, LookupError):
854 except (TypeError, LookupError):
854 pass
855 pass
855
856
856 def _partialmatch(self, id):
857 def _partialmatch(self, id):
857 try:
858 try:
858 n = self.index.partialmatch(id)
859 n = self.index.partialmatch(id)
859 if n and self.hasnode(n):
860 if n and self.hasnode(n):
860 return n
861 return n
861 return None
862 return None
862 except RevlogError:
863 except RevlogError:
863 # parsers.c radix tree lookup gave multiple matches
864 # parsers.c radix tree lookup gave multiple matches
864 # fall through to slow path that filters hidden revisions
865 # fall through to slow path that filters hidden revisions
865 pass
866 pass
866 except (AttributeError, ValueError):
867 except (AttributeError, ValueError):
867 # we are pure python, or key was too short to search radix tree
868 # we are pure python, or key was too short to search radix tree
868 pass
869 pass
869
870
870 if id in self._pcache:
871 if id in self._pcache:
871 return self._pcache[id]
872 return self._pcache[id]
872
873
873 if len(id) < 40:
874 if len(id) < 40:
874 try:
875 try:
875 # hex(node)[:...]
876 # hex(node)[:...]
876 l = len(id) // 2 # grab an even number of digits
877 l = len(id) // 2 # grab an even number of digits
877 prefix = bin(id[:l * 2])
878 prefix = bin(id[:l * 2])
878 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
879 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
879 nl = [n for n in nl if hex(n).startswith(id) and
880 nl = [n for n in nl if hex(n).startswith(id) and
880 self.hasnode(n)]
881 self.hasnode(n)]
881 if len(nl) > 0:
882 if len(nl) > 0:
882 if len(nl) == 1:
883 if len(nl) == 1:
883 self._pcache[id] = nl[0]
884 self._pcache[id] = nl[0]
884 return nl[0]
885 return nl[0]
885 raise LookupError(id, self.indexfile,
886 raise LookupError(id, self.indexfile,
886 _('ambiguous identifier'))
887 _('ambiguous identifier'))
887 return None
888 return None
888 except TypeError:
889 except TypeError:
889 pass
890 pass
890
891
891 def lookup(self, id):
892 def lookup(self, id):
892 """locate a node based on:
893 """locate a node based on:
893 - revision number or str(revision number)
894 - revision number or str(revision number)
894 - nodeid or subset of hex nodeid
895 - nodeid or subset of hex nodeid
895 """
896 """
896 n = self._match(id)
897 n = self._match(id)
897 if n is not None:
898 if n is not None:
898 return n
899 return n
899 n = self._partialmatch(id)
900 n = self._partialmatch(id)
900 if n:
901 if n:
901 return n
902 return n
902
903
903 raise LookupError(id, self.indexfile, _('no match found'))
904 raise LookupError(id, self.indexfile, _('no match found'))
904
905
905 def cmp(self, node, text):
906 def cmp(self, node, text):
906 """compare text with a given file revision
907 """compare text with a given file revision
907
908
908 returns True if text is different than what is stored.
909 returns True if text is different than what is stored.
909 """
910 """
910 p1, p2 = self.parents(node)
911 p1, p2 = self.parents(node)
911 return hash(text, p1, p2) != node
912 return hash(text, p1, p2) != node
912
913
913 def _addchunk(self, offset, data):
914 def _addchunk(self, offset, data):
914 o, d = self._chunkcache
915 o, d = self._chunkcache
915 # try to add to existing cache
916 # try to add to existing cache
916 if o + len(d) == offset and len(d) + len(data) < _chunksize:
917 if o + len(d) == offset and len(d) + len(data) < _chunksize:
917 self._chunkcache = o, d + data
918 self._chunkcache = o, d + data
918 else:
919 else:
919 self._chunkcache = offset, data
920 self._chunkcache = offset, data
920
921
921 def _loadchunk(self, offset, length):
922 def _loadchunk(self, offset, length):
922 if self._inline:
923 if self._inline:
923 df = self.opener(self.indexfile)
924 df = self.opener(self.indexfile)
924 else:
925 else:
925 df = self.opener(self.datafile)
926 df = self.opener(self.datafile)
926
927
927 # Cache data both forward and backward around the requested
928 # Cache data both forward and backward around the requested
928 # data, in a fixed size window. This helps speed up operations
929 # data, in a fixed size window. This helps speed up operations
929 # involving reading the revlog backwards.
930 # involving reading the revlog backwards.
930 cachesize = self._chunkcachesize
931 cachesize = self._chunkcachesize
931 realoffset = offset & ~(cachesize - 1)
932 realoffset = offset & ~(cachesize - 1)
932 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
933 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
933 - realoffset)
934 - realoffset)
934 df.seek(realoffset)
935 df.seek(realoffset)
935 d = df.read(reallength)
936 d = df.read(reallength)
936 df.close()
937 df.close()
937 self._addchunk(realoffset, d)
938 self._addchunk(realoffset, d)
938 if offset != realoffset or reallength != length:
939 if offset != realoffset or reallength != length:
939 return util.buffer(d, offset - realoffset, length)
940 return util.buffer(d, offset - realoffset, length)
940 return d
941 return d
941
942
942 def _getchunk(self, offset, length):
943 def _getchunk(self, offset, length):
943 o, d = self._chunkcache
944 o, d = self._chunkcache
944 l = len(d)
945 l = len(d)
945
946
946 # is it in the cache?
947 # is it in the cache?
947 cachestart = offset - o
948 cachestart = offset - o
948 cacheend = cachestart + length
949 cacheend = cachestart + length
949 if cachestart >= 0 and cacheend <= l:
950 if cachestart >= 0 and cacheend <= l:
950 if cachestart == 0 and cacheend == l:
951 if cachestart == 0 and cacheend == l:
951 return d # avoid a copy
952 return d # avoid a copy
952 return util.buffer(d, cachestart, cacheend - cachestart)
953 return util.buffer(d, cachestart, cacheend - cachestart)
953
954
954 return self._loadchunk(offset, length)
955 return self._loadchunk(offset, length)
955
956
956 def _chunkraw(self, startrev, endrev):
957 def _chunkraw(self, startrev, endrev):
957 start = self.start(startrev)
958 start = self.start(startrev)
958 end = self.end(endrev)
959 end = self.end(endrev)
959 if self._inline:
960 if self._inline:
960 start += (startrev + 1) * self._io.size
961 start += (startrev + 1) * self._io.size
961 end += (endrev + 1) * self._io.size
962 end += (endrev + 1) * self._io.size
962 length = end - start
963 length = end - start
963 return self._getchunk(start, length)
964 return self._getchunk(start, length)
964
965
965 def _chunk(self, rev):
966 def _chunk(self, rev):
966 return decompress(self._chunkraw(rev, rev))
967 return decompress(self._chunkraw(rev, rev))
967
968
968 def _chunks(self, revs):
969 def _chunks(self, revs):
969 '''faster version of [self._chunk(rev) for rev in revs]
970 '''faster version of [self._chunk(rev) for rev in revs]
970
971
971 Assumes that revs is in ascending order.'''
972 Assumes that revs is in ascending order.'''
972 if not revs:
973 if not revs:
973 return []
974 return []
974 start = self.start
975 start = self.start
975 length = self.length
976 length = self.length
976 inline = self._inline
977 inline = self._inline
977 iosize = self._io.size
978 iosize = self._io.size
978 buffer = util.buffer
979 buffer = util.buffer
979
980
980 l = []
981 l = []
981 ladd = l.append
982 ladd = l.append
982
983
983 # preload the cache
984 # preload the cache
984 try:
985 try:
985 while True:
986 while True:
986 # ensure that the cache doesn't change out from under us
987 # ensure that the cache doesn't change out from under us
987 _cache = self._chunkcache
988 _cache = self._chunkcache
988 self._chunkraw(revs[0], revs[-1])
989 self._chunkraw(revs[0], revs[-1])
989 if _cache == self._chunkcache:
990 if _cache == self._chunkcache:
990 break
991 break
991 offset, data = _cache
992 offset, data = _cache
992 except OverflowError:
993 except OverflowError:
993 # issue4215 - we can't cache a run of chunks greater than
994 # issue4215 - we can't cache a run of chunks greater than
994 # 2G on Windows
995 # 2G on Windows
995 return [self._chunk(rev) for rev in revs]
996 return [self._chunk(rev) for rev in revs]
996
997
997 for rev in revs:
998 for rev in revs:
998 chunkstart = start(rev)
999 chunkstart = start(rev)
999 if inline:
1000 if inline:
1000 chunkstart += (rev + 1) * iosize
1001 chunkstart += (rev + 1) * iosize
1001 chunklength = length(rev)
1002 chunklength = length(rev)
1002 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1003 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1003
1004
1004 return l
1005 return l
1005
1006
1006 def _chunkclear(self):
1007 def _chunkclear(self):
1007 self._chunkcache = (0, '')
1008 self._chunkcache = (0, '')
1008
1009
1009 def deltaparent(self, rev):
1010 def deltaparent(self, rev):
1010 """return deltaparent of the given revision"""
1011 """return deltaparent of the given revision"""
1011 base = self.index[rev][3]
1012 base = self.index[rev][3]
1012 if base == rev:
1013 if base == rev:
1013 return nullrev
1014 return nullrev
1014 elif self._generaldelta:
1015 elif self._generaldelta:
1015 return base
1016 return base
1016 else:
1017 else:
1017 return rev - 1
1018 return rev - 1
1018
1019
1019 def revdiff(self, rev1, rev2):
1020 def revdiff(self, rev1, rev2):
1020 """return or calculate a delta between two revisions"""
1021 """return or calculate a delta between two revisions"""
1021 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1022 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1022 return str(self._chunk(rev2))
1023 return str(self._chunk(rev2))
1023
1024
1024 return mdiff.textdiff(self.revision(rev1),
1025 return mdiff.textdiff(self.revision(rev1),
1025 self.revision(rev2))
1026 self.revision(rev2))
1026
1027
1027 def revision(self, nodeorrev):
1028 def revision(self, nodeorrev):
1028 """return an uncompressed revision of a given node or revision
1029 """return an uncompressed revision of a given node or revision
1029 number.
1030 number.
1030 """
1031 """
1031 if isinstance(nodeorrev, int):
1032 if isinstance(nodeorrev, int):
1032 rev = nodeorrev
1033 rev = nodeorrev
1033 node = self.node(rev)
1034 node = self.node(rev)
1034 else:
1035 else:
1035 node = nodeorrev
1036 node = nodeorrev
1036 rev = None
1037 rev = None
1037
1038
1038 _cache = self._cache # grab local copy of cache to avoid thread race
1039 _cache = self._cache # grab local copy of cache to avoid thread race
1039 cachedrev = None
1040 cachedrev = None
1040 if node == nullid:
1041 if node == nullid:
1041 return ""
1042 return ""
1042 if _cache:
1043 if _cache:
1043 if _cache[0] == node:
1044 if _cache[0] == node:
1044 return _cache[2]
1045 return _cache[2]
1045 cachedrev = _cache[1]
1046 cachedrev = _cache[1]
1046
1047
1047 # look up what we need to read
1048 # look up what we need to read
1048 text = None
1049 text = None
1049 if rev is None:
1050 if rev is None:
1050 rev = self.rev(node)
1051 rev = self.rev(node)
1051
1052
1052 # check rev flags
1053 # check rev flags
1053 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1054 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1054 raise RevlogError(_('incompatible revision flag %x') %
1055 raise RevlogError(_('incompatible revision flag %x') %
1055 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1056 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1056
1057
1057 # build delta chain
1058 # build delta chain
1058 chain = []
1059 chain = []
1059 index = self.index # for performance
1060 index = self.index # for performance
1060 generaldelta = self._generaldelta
1061 generaldelta = self._generaldelta
1061 iterrev = rev
1062 iterrev = rev
1062 e = index[iterrev]
1063 e = index[iterrev]
1063 while iterrev != e[3] and iterrev != cachedrev:
1064 while iterrev != e[3] and iterrev != cachedrev:
1064 chain.append(iterrev)
1065 chain.append(iterrev)
1065 if generaldelta:
1066 if generaldelta:
1066 iterrev = e[3]
1067 iterrev = e[3]
1067 else:
1068 else:
1068 iterrev -= 1
1069 iterrev -= 1
1069 e = index[iterrev]
1070 e = index[iterrev]
1070
1071
1071 if iterrev == cachedrev:
1072 if iterrev == cachedrev:
1072 # cache hit
1073 # cache hit
1073 text = _cache[2]
1074 text = _cache[2]
1074 else:
1075 else:
1075 chain.append(iterrev)
1076 chain.append(iterrev)
1076 chain.reverse()
1077 chain.reverse()
1077
1078
1078 # drop cache to save memory
1079 # drop cache to save memory
1079 self._cache = None
1080 self._cache = None
1080
1081
1081 bins = self._chunks(chain)
1082 bins = self._chunks(chain)
1082 if text is None:
1083 if text is None:
1083 text = str(bins[0])
1084 text = str(bins[0])
1084 bins = bins[1:]
1085 bins = bins[1:]
1085
1086
1086 text = mdiff.patches(text, bins)
1087 text = mdiff.patches(text, bins)
1087
1088
1088 text = self._checkhash(text, node, rev)
1089 text = self._checkhash(text, node, rev)
1089
1090
1090 self._cache = (node, rev, text)
1091 self._cache = (node, rev, text)
1091 return text
1092 return text
1092
1093
1093 def hash(self, text, p1, p2):
1094 def hash(self, text, p1, p2):
1094 """Compute a node hash.
1095 """Compute a node hash.
1095
1096
1096 Available as a function so that subclasses can replace the hash
1097 Available as a function so that subclasses can replace the hash
1097 as needed.
1098 as needed.
1098 """
1099 """
1099 return hash(text, p1, p2)
1100 return hash(text, p1, p2)
1100
1101
1101 def _checkhash(self, text, node, rev):
1102 def _checkhash(self, text, node, rev):
1102 p1, p2 = self.parents(node)
1103 p1, p2 = self.parents(node)
1103 self.checkhash(text, p1, p2, node, rev)
1104 self.checkhash(text, p1, p2, node, rev)
1104 return text
1105 return text
1105
1106
1106 def checkhash(self, text, p1, p2, node, rev=None):
1107 def checkhash(self, text, p1, p2, node, rev=None):
1107 if node != self.hash(text, p1, p2):
1108 if node != self.hash(text, p1, p2):
1108 revornode = rev
1109 revornode = rev
1109 if revornode is None:
1110 if revornode is None:
1110 revornode = templatefilters.short(hex(node))
1111 revornode = templatefilters.short(hex(node))
1111 raise RevlogError(_("integrity check failed on %s:%s")
1112 raise RevlogError(_("integrity check failed on %s:%s")
1112 % (self.indexfile, revornode))
1113 % (self.indexfile, revornode))
1113
1114
1114 def checkinlinesize(self, tr, fp=None):
1115 def checkinlinesize(self, tr, fp=None):
1115 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1116 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1116 return
1117 return
1117
1118
1118 trinfo = tr.find(self.indexfile)
1119 trinfo = tr.find(self.indexfile)
1119 if trinfo is None:
1120 if trinfo is None:
1120 raise RevlogError(_("%s not found in the transaction")
1121 raise RevlogError(_("%s not found in the transaction")
1121 % self.indexfile)
1122 % self.indexfile)
1122
1123
1123 trindex = trinfo[2]
1124 trindex = trinfo[2]
1124 dataoff = self.start(trindex)
1125 dataoff = self.start(trindex)
1125
1126
1126 tr.add(self.datafile, dataoff)
1127 tr.add(self.datafile, dataoff)
1127
1128
1128 if fp:
1129 if fp:
1129 fp.flush()
1130 fp.flush()
1130 fp.close()
1131 fp.close()
1131
1132
1132 df = self.opener(self.datafile, 'w')
1133 df = self.opener(self.datafile, 'w')
1133 try:
1134 try:
1134 for r in self:
1135 for r in self:
1135 df.write(self._chunkraw(r, r))
1136 df.write(self._chunkraw(r, r))
1136 finally:
1137 finally:
1137 df.close()
1138 df.close()
1138
1139
1139 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1140 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1140 self.version &= ~(REVLOGNGINLINEDATA)
1141 self.version &= ~(REVLOGNGINLINEDATA)
1141 self._inline = False
1142 self._inline = False
1142 for i in self:
1143 for i in self:
1143 e = self._io.packentry(self.index[i], self.node, self.version, i)
1144 e = self._io.packentry(self.index[i], self.node, self.version, i)
1144 fp.write(e)
1145 fp.write(e)
1145
1146
1146 # if we don't call close, the temp file will never replace the
1147 # if we don't call close, the temp file will never replace the
1147 # real index
1148 # real index
1148 fp.close()
1149 fp.close()
1149
1150
1150 tr.replace(self.indexfile, trindex * self._io.size)
1151 tr.replace(self.indexfile, trindex * self._io.size)
1151 self._chunkclear()
1152 self._chunkclear()
1152
1153
1153 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1154 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1154 node=None):
1155 node=None):
1155 """add a revision to the log
1156 """add a revision to the log
1156
1157
1157 text - the revision data to add
1158 text - the revision data to add
1158 transaction - the transaction object used for rollback
1159 transaction - the transaction object used for rollback
1159 link - the linkrev data to add
1160 link - the linkrev data to add
1160 p1, p2 - the parent nodeids of the revision
1161 p1, p2 - the parent nodeids of the revision
1161 cachedelta - an optional precomputed delta
1162 cachedelta - an optional precomputed delta
1162 node - nodeid of revision; typically node is not specified, and it is
1163 node - nodeid of revision; typically node is not specified, and it is
1163 computed by default as hash(text, p1, p2), however subclasses might
1164 computed by default as hash(text, p1, p2), however subclasses might
1164 use different hashing method (and override checkhash() in such case)
1165 use different hashing method (and override checkhash() in such case)
1165 """
1166 """
1166 if link == nullrev:
1167 if link == nullrev:
1167 raise RevlogError(_("attempted to add linkrev -1 to %s")
1168 raise RevlogError(_("attempted to add linkrev -1 to %s")
1168 % self.indexfile)
1169 % self.indexfile)
1169 node = node or self.hash(text, p1, p2)
1170 node = node or self.hash(text, p1, p2)
1170 if node in self.nodemap:
1171 if node in self.nodemap:
1171 return node
1172 return node
1172
1173
1173 dfh = None
1174 dfh = None
1174 if not self._inline:
1175 if not self._inline:
1175 dfh = self.opener(self.datafile, "a")
1176 dfh = self.opener(self.datafile, "a")
1176 ifh = self.opener(self.indexfile, "a+")
1177 ifh = self.opener(self.indexfile, "a+")
1177 try:
1178 try:
1178 return self._addrevision(node, text, transaction, link, p1, p2,
1179 return self._addrevision(node, text, transaction, link, p1, p2,
1179 cachedelta, ifh, dfh)
1180 cachedelta, ifh, dfh)
1180 finally:
1181 finally:
1181 if dfh:
1182 if dfh:
1182 dfh.close()
1183 dfh.close()
1183 ifh.close()
1184 ifh.close()
1184
1185
1185 def compress(self, text):
1186 def compress(self, text):
1186 """ generate a possibly-compressed representation of text """
1187 """ generate a possibly-compressed representation of text """
1187 if not text:
1188 if not text:
1188 return ("", text)
1189 return ("", text)
1189 l = len(text)
1190 l = len(text)
1190 bin = None
1191 bin = None
1191 if l < 44:
1192 if l < 44:
1192 pass
1193 pass
1193 elif l > 1000000:
1194 elif l > 1000000:
1194 # zlib makes an internal copy, thus doubling memory usage for
1195 # zlib makes an internal copy, thus doubling memory usage for
1195 # large files, so lets do this in pieces
1196 # large files, so lets do this in pieces
1196 z = zlib.compressobj()
1197 z = zlib.compressobj()
1197 p = []
1198 p = []
1198 pos = 0
1199 pos = 0
1199 while pos < l:
1200 while pos < l:
1200 pos2 = pos + 2**20
1201 pos2 = pos + 2**20
1201 p.append(z.compress(text[pos:pos2]))
1202 p.append(z.compress(text[pos:pos2]))
1202 pos = pos2
1203 pos = pos2
1203 p.append(z.flush())
1204 p.append(z.flush())
1204 if sum(map(len, p)) < l:
1205 if sum(map(len, p)) < l:
1205 bin = "".join(p)
1206 bin = "".join(p)
1206 else:
1207 else:
1207 bin = _compress(text)
1208 bin = _compress(text)
1208 if bin is None or len(bin) > l:
1209 if bin is None or len(bin) > l:
1209 if text[0] == '\0':
1210 if text[0] == '\0':
1210 return ("", text)
1211 return ("", text)
1211 return ('u', text)
1212 return ('u', text)
1212 return ("", bin)
1213 return ("", bin)
1213
1214
1214 def _addrevision(self, node, text, transaction, link, p1, p2,
1215 def _addrevision(self, node, text, transaction, link, p1, p2,
1215 cachedelta, ifh, dfh):
1216 cachedelta, ifh, dfh):
1216 """internal function to add revisions to the log
1217 """internal function to add revisions to the log
1217
1218
1218 see addrevision for argument descriptions.
1219 see addrevision for argument descriptions.
1219 invariants:
1220 invariants:
1220 - text is optional (can be None); if not set, cachedelta must be set.
1221 - text is optional (can be None); if not set, cachedelta must be set.
1221 if both are set, they must correspond to each other.
1222 if both are set, they must correspond to each other.
1222 """
1223 """
1223 btext = [text]
1224 btext = [text]
1224 def buildtext():
1225 def buildtext():
1225 if btext[0] is not None:
1226 if btext[0] is not None:
1226 return btext[0]
1227 return btext[0]
1227 # flush any pending writes here so we can read it in revision
1228 # flush any pending writes here so we can read it in revision
1228 if dfh:
1229 if dfh:
1229 dfh.flush()
1230 dfh.flush()
1230 ifh.flush()
1231 ifh.flush()
1231 basetext = self.revision(self.node(cachedelta[0]))
1232 basetext = self.revision(self.node(cachedelta[0]))
1232 btext[0] = mdiff.patch(basetext, cachedelta[1])
1233 btext[0] = mdiff.patch(basetext, cachedelta[1])
1233 try:
1234 try:
1234 self.checkhash(btext[0], p1, p2, node)
1235 self.checkhash(btext[0], p1, p2, node)
1235 except CensoredNodeError:
1236 except CensoredNodeError:
1236 pass # always import a censor tombstone.
1237 pass # always import a censor tombstone.
1237 return btext[0]
1238 return btext[0]
1238
1239
1239 def builddelta(rev):
1240 def builddelta(rev):
1240 # can we use the cached delta?
1241 # can we use the cached delta?
1241 if cachedelta and cachedelta[0] == rev:
1242 if cachedelta and cachedelta[0] == rev:
1242 delta = cachedelta[1]
1243 delta = cachedelta[1]
1243 else:
1244 else:
1244 t = buildtext()
1245 t = buildtext()
1245 ptext = self.revision(self.node(rev))
1246 ptext = self.revision(self.node(rev))
1246 delta = mdiff.textdiff(ptext, t)
1247 delta = mdiff.textdiff(ptext, t)
1247 data = self.compress(delta)
1248 data = self.compress(delta)
1248 l = len(data[1]) + len(data[0])
1249 l = len(data[1]) + len(data[0])
1249 if basecache[0] == rev:
1250 if basecache[0] == rev:
1250 chainbase = basecache[1]
1251 chainbase = basecache[1]
1251 else:
1252 else:
1252 chainbase = self.chainbase(rev)
1253 chainbase = self.chainbase(rev)
1253 dist = l + offset - self.start(chainbase)
1254 dist = l + offset - self.start(chainbase)
1254 if self._generaldelta:
1255 if self._generaldelta:
1255 base = rev
1256 base = rev
1256 else:
1257 else:
1257 base = chainbase
1258 base = chainbase
1258 chainlen, compresseddeltalen = self._chaininfo(rev)
1259 chainlen, compresseddeltalen = self._chaininfo(rev)
1259 chainlen += 1
1260 chainlen += 1
1260 compresseddeltalen += l
1261 compresseddeltalen += l
1261 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1262 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1262
1263
1263 curr = len(self)
1264 curr = len(self)
1264 prev = curr - 1
1265 prev = curr - 1
1265 base = chainbase = curr
1266 base = chainbase = curr
1266 chainlen = None
1267 chainlen = None
1267 offset = self.end(prev)
1268 offset = self.end(prev)
1268 flags = 0
1269 flags = 0
1269 d = None
1270 d = None
1270 if self._basecache is None:
1271 if self._basecache is None:
1271 self._basecache = (prev, self.chainbase(prev))
1272 self._basecache = (prev, self.chainbase(prev))
1272 basecache = self._basecache
1273 basecache = self._basecache
1273 p1r, p2r = self.rev(p1), self.rev(p2)
1274 p1r, p2r = self.rev(p1), self.rev(p2)
1274
1275
1275 # should we try to build a delta?
1276 # should we try to build a delta?
1276 if prev != nullrev:
1277 if prev != nullrev:
1277 if self._generaldelta:
1278 if self._generaldelta:
1278 if p1r >= basecache[1]:
1279 if p1r >= basecache[1]:
1279 d = builddelta(p1r)
1280 d = builddelta(p1r)
1280 elif p2r >= basecache[1]:
1281 elif p2r >= basecache[1]:
1281 d = builddelta(p2r)
1282 d = builddelta(p2r)
1282 else:
1283 else:
1283 d = builddelta(prev)
1284 d = builddelta(prev)
1284 else:
1285 else:
1285 d = builddelta(prev)
1286 d = builddelta(prev)
1286 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1287 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1287
1288
1288 # full versions are inserted when the needed deltas
1289 # full versions are inserted when the needed deltas
1289 # become comparable to the uncompressed text
1290 # become comparable to the uncompressed text
1290 if text is None:
1291 if text is None:
1291 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1292 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1292 cachedelta[1])
1293 cachedelta[1])
1293 else:
1294 else:
1294 textlen = len(text)
1295 textlen = len(text)
1295
1296
1296 # - 'dist' is the distance from the base revision -- bounding it limits
1297 # - 'dist' is the distance from the base revision -- bounding it limits
1297 # the amount of I/O we need to do.
1298 # the amount of I/O we need to do.
1298 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1299 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1299 # to apply -- bounding it limits the amount of CPU we consume.
1300 # to apply -- bounding it limits the amount of CPU we consume.
1300 if (d is None or dist > textlen * 4 or l > textlen or
1301 if (d is None or dist > textlen * 4 or l > textlen or
1301 compresseddeltalen > textlen * 2 or
1302 compresseddeltalen > textlen * 2 or
1302 (self._maxchainlen and chainlen > self._maxchainlen)):
1303 (self._maxchainlen and chainlen > self._maxchainlen)):
1303 text = buildtext()
1304 text = buildtext()
1304 data = self.compress(text)
1305 data = self.compress(text)
1305 l = len(data[1]) + len(data[0])
1306 l = len(data[1]) + len(data[0])
1306 base = chainbase = curr
1307 base = chainbase = curr
1307
1308
1308 e = (offset_type(offset, flags), l, textlen,
1309 e = (offset_type(offset, flags), l, textlen,
1309 base, link, p1r, p2r, node)
1310 base, link, p1r, p2r, node)
1310 self.index.insert(-1, e)
1311 self.index.insert(-1, e)
1311 self.nodemap[node] = curr
1312 self.nodemap[node] = curr
1312
1313
1313 entry = self._io.packentry(e, self.node, self.version, curr)
1314 entry = self._io.packentry(e, self.node, self.version, curr)
1314 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1315 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1315
1316
1316 if type(text) == str: # only accept immutable objects
1317 if type(text) == str: # only accept immutable objects
1317 self._cache = (node, curr, text)
1318 self._cache = (node, curr, text)
1318 self._basecache = (curr, chainbase)
1319 self._basecache = (curr, chainbase)
1319 return node
1320 return node
1320
1321
1321 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1322 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1322 curr = len(self) - 1
1323 curr = len(self) - 1
1323 if not self._inline:
1324 if not self._inline:
1324 transaction.add(self.datafile, offset)
1325 transaction.add(self.datafile, offset)
1325 transaction.add(self.indexfile, curr * len(entry))
1326 transaction.add(self.indexfile, curr * len(entry))
1326 if data[0]:
1327 if data[0]:
1327 dfh.write(data[0])
1328 dfh.write(data[0])
1328 dfh.write(data[1])
1329 dfh.write(data[1])
1329 dfh.flush()
1330 dfh.flush()
1330 ifh.write(entry)
1331 ifh.write(entry)
1331 else:
1332 else:
1332 offset += curr * self._io.size
1333 offset += curr * self._io.size
1333 transaction.add(self.indexfile, offset, curr)
1334 transaction.add(self.indexfile, offset, curr)
1334 ifh.write(entry)
1335 ifh.write(entry)
1335 ifh.write(data[0])
1336 ifh.write(data[0])
1336 ifh.write(data[1])
1337 ifh.write(data[1])
1337 self.checkinlinesize(transaction, ifh)
1338 self.checkinlinesize(transaction, ifh)
1338
1339
1339 def addgroup(self, bundle, linkmapper, transaction):
1340 def addgroup(self, bundle, linkmapper, transaction):
1340 """
1341 """
1341 add a delta group
1342 add a delta group
1342
1343
1343 given a set of deltas, add them to the revision log. the
1344 given a set of deltas, add them to the revision log. the
1344 first delta is against its parent, which should be in our
1345 first delta is against its parent, which should be in our
1345 log, the rest are against the previous delta.
1346 log, the rest are against the previous delta.
1346 """
1347 """
1347
1348
1348 # track the base of the current delta log
1349 # track the base of the current delta log
1349 content = []
1350 content = []
1350 node = None
1351 node = None
1351
1352
1352 r = len(self)
1353 r = len(self)
1353 end = 0
1354 end = 0
1354 if r:
1355 if r:
1355 end = self.end(r - 1)
1356 end = self.end(r - 1)
1356 ifh = self.opener(self.indexfile, "a+")
1357 ifh = self.opener(self.indexfile, "a+")
1357 isize = r * self._io.size
1358 isize = r * self._io.size
1358 if self._inline:
1359 if self._inline:
1359 transaction.add(self.indexfile, end + isize, r)
1360 transaction.add(self.indexfile, end + isize, r)
1360 dfh = None
1361 dfh = None
1361 else:
1362 else:
1362 transaction.add(self.indexfile, isize, r)
1363 transaction.add(self.indexfile, isize, r)
1363 transaction.add(self.datafile, end)
1364 transaction.add(self.datafile, end)
1364 dfh = self.opener(self.datafile, "a")
1365 dfh = self.opener(self.datafile, "a")
1365
1366
1366 try:
1367 try:
1367 # loop through our set of deltas
1368 # loop through our set of deltas
1368 chain = None
1369 chain = None
1369 while True:
1370 while True:
1370 chunkdata = bundle.deltachunk(chain)
1371 chunkdata = bundle.deltachunk(chain)
1371 if not chunkdata:
1372 if not chunkdata:
1372 break
1373 break
1373 node = chunkdata['node']
1374 node = chunkdata['node']
1374 p1 = chunkdata['p1']
1375 p1 = chunkdata['p1']
1375 p2 = chunkdata['p2']
1376 p2 = chunkdata['p2']
1376 cs = chunkdata['cs']
1377 cs = chunkdata['cs']
1377 deltabase = chunkdata['deltabase']
1378 deltabase = chunkdata['deltabase']
1378 delta = chunkdata['delta']
1379 delta = chunkdata['delta']
1379
1380
1380 content.append(node)
1381 content.append(node)
1381
1382
1382 link = linkmapper(cs)
1383 link = linkmapper(cs)
1383 if node in self.nodemap:
1384 if node in self.nodemap:
1384 # this can happen if two branches make the same change
1385 # this can happen if two branches make the same change
1385 chain = node
1386 chain = node
1386 continue
1387 continue
1387
1388
1388 for p in (p1, p2):
1389 for p in (p1, p2):
1389 if p not in self.nodemap:
1390 if p not in self.nodemap:
1390 raise LookupError(p, self.indexfile,
1391 raise LookupError(p, self.indexfile,
1391 _('unknown parent'))
1392 _('unknown parent'))
1392
1393
1393 if deltabase not in self.nodemap:
1394 if deltabase not in self.nodemap:
1394 raise LookupError(deltabase, self.indexfile,
1395 raise LookupError(deltabase, self.indexfile,
1395 _('unknown delta base'))
1396 _('unknown delta base'))
1396
1397
1397 baserev = self.rev(deltabase)
1398 baserev = self.rev(deltabase)
1398 chain = self._addrevision(node, None, transaction, link,
1399 chain = self._addrevision(node, None, transaction, link,
1399 p1, p2, (baserev, delta), ifh, dfh)
1400 p1, p2, (baserev, delta), ifh, dfh)
1400 if not dfh and not self._inline:
1401 if not dfh and not self._inline:
1401 # addrevision switched from inline to conventional
1402 # addrevision switched from inline to conventional
1402 # reopen the index
1403 # reopen the index
1403 ifh.close()
1404 ifh.close()
1404 dfh = self.opener(self.datafile, "a")
1405 dfh = self.opener(self.datafile, "a")
1405 ifh = self.opener(self.indexfile, "a")
1406 ifh = self.opener(self.indexfile, "a")
1406 finally:
1407 finally:
1407 if dfh:
1408 if dfh:
1408 dfh.close()
1409 dfh.close()
1409 ifh.close()
1410 ifh.close()
1410
1411
1411 return content
1412 return content
1412
1413
1413 def getstrippoint(self, minlink):
1414 def getstrippoint(self, minlink):
1414 """find the minimum rev that must be stripped to strip the linkrev
1415 """find the minimum rev that must be stripped to strip the linkrev
1415
1416
1416 Returns a tuple containing the minimum rev and a set of all revs that
1417 Returns a tuple containing the minimum rev and a set of all revs that
1417 have linkrevs that will be broken by this strip.
1418 have linkrevs that will be broken by this strip.
1418 """
1419 """
1419 brokenrevs = set()
1420 brokenrevs = set()
1420 strippoint = len(self)
1421 strippoint = len(self)
1421
1422
1422 heads = {}
1423 heads = {}
1423 futurelargelinkrevs = set()
1424 futurelargelinkrevs = set()
1424 for head in self.headrevs():
1425 for head in self.headrevs():
1425 headlinkrev = self.linkrev(head)
1426 headlinkrev = self.linkrev(head)
1426 heads[head] = headlinkrev
1427 heads[head] = headlinkrev
1427 if headlinkrev >= minlink:
1428 if headlinkrev >= minlink:
1428 futurelargelinkrevs.add(headlinkrev)
1429 futurelargelinkrevs.add(headlinkrev)
1429
1430
1430 # This algorithm involves walking down the rev graph, starting at the
1431 # This algorithm involves walking down the rev graph, starting at the
1431 # heads. Since the revs are topologically sorted according to linkrev,
1432 # heads. Since the revs are topologically sorted according to linkrev,
1432 # once all head linkrevs are below the minlink, we know there are
1433 # once all head linkrevs are below the minlink, we know there are
1433 # no more revs that could have a linkrev greater than minlink.
1434 # no more revs that could have a linkrev greater than minlink.
1434 # So we can stop walking.
1435 # So we can stop walking.
1435 while futurelargelinkrevs:
1436 while futurelargelinkrevs:
1436 strippoint -= 1
1437 strippoint -= 1
1437 linkrev = heads.pop(strippoint)
1438 linkrev = heads.pop(strippoint)
1438
1439
1439 if linkrev < minlink:
1440 if linkrev < minlink:
1440 brokenrevs.add(strippoint)
1441 brokenrevs.add(strippoint)
1441 else:
1442 else:
1442 futurelargelinkrevs.remove(linkrev)
1443 futurelargelinkrevs.remove(linkrev)
1443
1444
1444 for p in self.parentrevs(strippoint):
1445 for p in self.parentrevs(strippoint):
1445 if p != nullrev:
1446 if p != nullrev:
1446 plinkrev = self.linkrev(p)
1447 plinkrev = self.linkrev(p)
1447 heads[p] = plinkrev
1448 heads[p] = plinkrev
1448 if plinkrev >= minlink:
1449 if plinkrev >= minlink:
1449 futurelargelinkrevs.add(plinkrev)
1450 futurelargelinkrevs.add(plinkrev)
1450
1451
1451 return strippoint, brokenrevs
1452 return strippoint, brokenrevs
1452
1453
1453 def strip(self, minlink, transaction):
1454 def strip(self, minlink, transaction):
1454 """truncate the revlog on the first revision with a linkrev >= minlink
1455 """truncate the revlog on the first revision with a linkrev >= minlink
1455
1456
1456 This function is called when we're stripping revision minlink and
1457 This function is called when we're stripping revision minlink and
1457 its descendants from the repository.
1458 its descendants from the repository.
1458
1459
1459 We have to remove all revisions with linkrev >= minlink, because
1460 We have to remove all revisions with linkrev >= minlink, because
1460 the equivalent changelog revisions will be renumbered after the
1461 the equivalent changelog revisions will be renumbered after the
1461 strip.
1462 strip.
1462
1463
1463 So we truncate the revlog on the first of these revisions, and
1464 So we truncate the revlog on the first of these revisions, and
1464 trust that the caller has saved the revisions that shouldn't be
1465 trust that the caller has saved the revisions that shouldn't be
1465 removed and that it'll re-add them after this truncation.
1466 removed and that it'll re-add them after this truncation.
1466 """
1467 """
1467 if len(self) == 0:
1468 if len(self) == 0:
1468 return
1469 return
1469
1470
1470 rev, _ = self.getstrippoint(minlink)
1471 rev, _ = self.getstrippoint(minlink)
1471 if rev == len(self):
1472 if rev == len(self):
1472 return
1473 return
1473
1474
1474 # first truncate the files on disk
1475 # first truncate the files on disk
1475 end = self.start(rev)
1476 end = self.start(rev)
1476 if not self._inline:
1477 if not self._inline:
1477 transaction.add(self.datafile, end)
1478 transaction.add(self.datafile, end)
1478 end = rev * self._io.size
1479 end = rev * self._io.size
1479 else:
1480 else:
1480 end += rev * self._io.size
1481 end += rev * self._io.size
1481
1482
1482 transaction.add(self.indexfile, end)
1483 transaction.add(self.indexfile, end)
1483
1484
1484 # then reset internal state in memory to forget those revisions
1485 # then reset internal state in memory to forget those revisions
1485 self._cache = None
1486 self._cache = None
1486 self._chaininfocache = {}
1487 self._chaininfocache = {}
1487 self._chunkclear()
1488 self._chunkclear()
1488 for x in xrange(rev, len(self)):
1489 for x in xrange(rev, len(self)):
1489 del self.nodemap[self.node(x)]
1490 del self.nodemap[self.node(x)]
1490
1491
1491 del self.index[rev:-1]
1492 del self.index[rev:-1]
1492
1493
1493 def checksize(self):
1494 def checksize(self):
1494 expected = 0
1495 expected = 0
1495 if len(self):
1496 if len(self):
1496 expected = max(0, self.end(len(self) - 1))
1497 expected = max(0, self.end(len(self) - 1))
1497
1498
1498 try:
1499 try:
1499 f = self.opener(self.datafile)
1500 f = self.opener(self.datafile)
1500 f.seek(0, 2)
1501 f.seek(0, 2)
1501 actual = f.tell()
1502 actual = f.tell()
1502 f.close()
1503 f.close()
1503 dd = actual - expected
1504 dd = actual - expected
1504 except IOError, inst:
1505 except IOError, inst:
1505 if inst.errno != errno.ENOENT:
1506 if inst.errno != errno.ENOENT:
1506 raise
1507 raise
1507 dd = 0
1508 dd = 0
1508
1509
1509 try:
1510 try:
1510 f = self.opener(self.indexfile)
1511 f = self.opener(self.indexfile)
1511 f.seek(0, 2)
1512 f.seek(0, 2)
1512 actual = f.tell()
1513 actual = f.tell()
1513 f.close()
1514 f.close()
1514 s = self._io.size
1515 s = self._io.size
1515 i = max(0, actual // s)
1516 i = max(0, actual // s)
1516 di = actual - (i * s)
1517 di = actual - (i * s)
1517 if self._inline:
1518 if self._inline:
1518 databytes = 0
1519 databytes = 0
1519 for r in self:
1520 for r in self:
1520 databytes += max(0, self.length(r))
1521 databytes += max(0, self.length(r))
1521 dd = 0
1522 dd = 0
1522 di = actual - len(self) * s - databytes
1523 di = actual - len(self) * s - databytes
1523 except IOError, inst:
1524 except IOError, inst:
1524 if inst.errno != errno.ENOENT:
1525 if inst.errno != errno.ENOENT:
1525 raise
1526 raise
1526 di = 0
1527 di = 0
1527
1528
1528 return (dd, di)
1529 return (dd, di)
1529
1530
1530 def files(self):
1531 def files(self):
1531 res = [self.indexfile]
1532 res = [self.indexfile]
1532 if not self._inline:
1533 if not self._inline:
1533 res.append(self.datafile)
1534 res.append(self.datafile)
1534 return res
1535 return res
General Comments 0
You need to be logged in to leave comments. Login now