##// END OF EJS Templates
revlog: define censored flag for revlogng index...
Mike Edgar -
r23855:4f23081c default
parent child Browse files
Show More
@@ -1,1535 +1,1537 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
38
40
39 # max size of revlog with inline data
41 # max size of revlog with inline data
40 _maxinline = 131072
42 _maxinline = 131072
41 _chunksize = 1048576
43 _chunksize = 1048576
42
44
43 RevlogError = error.RevlogError
45 RevlogError = error.RevlogError
44 LookupError = error.LookupError
46 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
47 CensoredNodeError = error.CensoredNodeError
46
48
47 def getoffset(q):
49 def getoffset(q):
48 return int(q >> 16)
50 return int(q >> 16)
49
51
50 def gettype(q):
52 def gettype(q):
51 return int(q & 0xFFFF)
53 return int(q & 0xFFFF)
52
54
53 def offset_type(offset, type):
55 def offset_type(offset, type):
54 return long(long(offset) << 16 | type)
56 return long(long(offset) << 16 | type)
55
57
56 _nullhash = _sha(nullid)
58 _nullhash = _sha(nullid)
57
59
58 def hash(text, p1, p2):
60 def hash(text, p1, p2):
59 """generate a hash from the given text and its parent hashes
61 """generate a hash from the given text and its parent hashes
60
62
61 This hash combines both the current file contents and its history
63 This hash combines both the current file contents and its history
62 in a manner that makes it easy to distinguish nodes with the same
64 in a manner that makes it easy to distinguish nodes with the same
63 content in the revision graph.
65 content in the revision graph.
64 """
66 """
65 # As of now, if one of the parent node is null, p2 is null
67 # As of now, if one of the parent node is null, p2 is null
66 if p2 == nullid:
68 if p2 == nullid:
67 # deep copy of a hash is faster than creating one
69 # deep copy of a hash is faster than creating one
68 s = _nullhash.copy()
70 s = _nullhash.copy()
69 s.update(p1)
71 s.update(p1)
70 else:
72 else:
71 # none of the parent nodes are nullid
73 # none of the parent nodes are nullid
72 l = [p1, p2]
74 l = [p1, p2]
73 l.sort()
75 l.sort()
74 s = _sha(l[0])
76 s = _sha(l[0])
75 s.update(l[1])
77 s.update(l[1])
76 s.update(text)
78 s.update(text)
77 return s.digest()
79 return s.digest()
78
80
79 def decompress(bin):
81 def decompress(bin):
80 """ decompress the given input """
82 """ decompress the given input """
81 if not bin:
83 if not bin:
82 return bin
84 return bin
83 t = bin[0]
85 t = bin[0]
84 if t == '\0':
86 if t == '\0':
85 return bin
87 return bin
86 if t == 'x':
88 if t == 'x':
87 try:
89 try:
88 return _decompress(bin)
90 return _decompress(bin)
89 except zlib.error, e:
91 except zlib.error, e:
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
92 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 if t == 'u':
93 if t == 'u':
92 return bin[1:]
94 return bin[1:]
93 raise RevlogError(_("unknown compression type %r") % t)
95 raise RevlogError(_("unknown compression type %r") % t)
94
96
95 # index v0:
97 # index v0:
96 # 4 bytes: offset
98 # 4 bytes: offset
97 # 4 bytes: compressed length
99 # 4 bytes: compressed length
98 # 4 bytes: base rev
100 # 4 bytes: base rev
99 # 4 bytes: link rev
101 # 4 bytes: link rev
100 # 32 bytes: parent 1 nodeid
102 # 32 bytes: parent 1 nodeid
101 # 32 bytes: parent 2 nodeid
103 # 32 bytes: parent 2 nodeid
102 # 32 bytes: nodeid
104 # 32 bytes: nodeid
103 indexformatv0 = ">4l20s20s20s"
105 indexformatv0 = ">4l20s20s20s"
104 v0shaoffset = 56
106 v0shaoffset = 56
105
107
106 class revlogoldio(object):
108 class revlogoldio(object):
107 def __init__(self):
109 def __init__(self):
108 self.size = struct.calcsize(indexformatv0)
110 self.size = struct.calcsize(indexformatv0)
109
111
110 def parseindex(self, data, inline):
112 def parseindex(self, data, inline):
111 s = self.size
113 s = self.size
112 index = []
114 index = []
113 nodemap = {nullid: nullrev}
115 nodemap = {nullid: nullrev}
114 n = off = 0
116 n = off = 0
115 l = len(data)
117 l = len(data)
116 while off + s <= l:
118 while off + s <= l:
117 cur = data[off:off + s]
119 cur = data[off:off + s]
118 off += s
120 off += s
119 e = _unpack(indexformatv0, cur)
121 e = _unpack(indexformatv0, cur)
120 # transform to revlogv1 format
122 # transform to revlogv1 format
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 index.append(e2)
125 index.append(e2)
124 nodemap[e[6]] = n
126 nodemap[e[6]] = n
125 n += 1
127 n += 1
126
128
127 # add the magic null revision at -1
129 # add the magic null revision at -1
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129
131
130 return index, nodemap, None
132 return index, nodemap, None
131
133
132 def packentry(self, entry, node, version, rev):
134 def packentry(self, entry, node, version, rev):
133 if gettype(entry[0]):
135 if gettype(entry[0]):
134 raise RevlogError(_("index entry flags need RevlogNG"))
136 raise RevlogError(_("index entry flags need RevlogNG"))
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 node(entry[5]), node(entry[6]), entry[7])
138 node(entry[5]), node(entry[6]), entry[7])
137 return _pack(indexformatv0, *e2)
139 return _pack(indexformatv0, *e2)
138
140
139 # index ng:
141 # index ng:
140 # 6 bytes: offset
142 # 6 bytes: offset
141 # 2 bytes: flags
143 # 2 bytes: flags
142 # 4 bytes: compressed length
144 # 4 bytes: compressed length
143 # 4 bytes: uncompressed length
145 # 4 bytes: uncompressed length
144 # 4 bytes: base rev
146 # 4 bytes: base rev
145 # 4 bytes: link rev
147 # 4 bytes: link rev
146 # 4 bytes: parent 1 rev
148 # 4 bytes: parent 1 rev
147 # 4 bytes: parent 2 rev
149 # 4 bytes: parent 2 rev
148 # 32 bytes: nodeid
150 # 32 bytes: nodeid
149 indexformatng = ">Qiiiiii20s12x"
151 indexformatng = ">Qiiiiii20s12x"
150 ngshaoffset = 32
152 ngshaoffset = 32
151 versionformat = ">I"
153 versionformat = ">I"
152
154
153 class revlogio(object):
155 class revlogio(object):
154 def __init__(self):
156 def __init__(self):
155 self.size = struct.calcsize(indexformatng)
157 self.size = struct.calcsize(indexformatng)
156
158
157 def parseindex(self, data, inline):
159 def parseindex(self, data, inline):
158 # call the C implementation to parse the index data
160 # call the C implementation to parse the index data
159 index, cache = parsers.parse_index2(data, inline)
161 index, cache = parsers.parse_index2(data, inline)
160 return index, getattr(index, 'nodemap', None), cache
162 return index, getattr(index, 'nodemap', None), cache
161
163
162 def packentry(self, entry, node, version, rev):
164 def packentry(self, entry, node, version, rev):
163 p = _pack(indexformatng, *entry)
165 p = _pack(indexformatng, *entry)
164 if rev == 0:
166 if rev == 0:
165 p = _pack(versionformat, version) + p[4:]
167 p = _pack(versionformat, version) + p[4:]
166 return p
168 return p
167
169
168 class revlog(object):
170 class revlog(object):
169 """
171 """
170 the underlying revision storage object
172 the underlying revision storage object
171
173
172 A revlog consists of two parts, an index and the revision data.
174 A revlog consists of two parts, an index and the revision data.
173
175
174 The index is a file with a fixed record size containing
176 The index is a file with a fixed record size containing
175 information on each revision, including its nodeid (hash), the
177 information on each revision, including its nodeid (hash), the
176 nodeids of its parents, the position and offset of its data within
178 nodeids of its parents, the position and offset of its data within
177 the data file, and the revision it's based on. Finally, each entry
179 the data file, and the revision it's based on. Finally, each entry
178 contains a linkrev entry that can serve as a pointer to external
180 contains a linkrev entry that can serve as a pointer to external
179 data.
181 data.
180
182
181 The revision data itself is a linear collection of data chunks.
183 The revision data itself is a linear collection of data chunks.
182 Each chunk represents a revision and is usually represented as a
184 Each chunk represents a revision and is usually represented as a
183 delta against the previous chunk. To bound lookup time, runs of
185 delta against the previous chunk. To bound lookup time, runs of
184 deltas are limited to about 2 times the length of the original
186 deltas are limited to about 2 times the length of the original
185 version data. This makes retrieval of a version proportional to
187 version data. This makes retrieval of a version proportional to
186 its size, or O(1) relative to the number of revisions.
188 its size, or O(1) relative to the number of revisions.
187
189
188 Both pieces of the revlog are written to in an append-only
190 Both pieces of the revlog are written to in an append-only
189 fashion, which means we never need to rewrite a file to insert or
191 fashion, which means we never need to rewrite a file to insert or
190 remove data, and can use some simple techniques to avoid the need
192 remove data, and can use some simple techniques to avoid the need
191 for locking while reading.
193 for locking while reading.
192 """
194 """
193 def __init__(self, opener, indexfile):
195 def __init__(self, opener, indexfile):
194 """
196 """
195 create a revlog object
197 create a revlog object
196
198
197 opener is a function that abstracts the file opening operation
199 opener is a function that abstracts the file opening operation
198 and can be used to implement COW semantics or the like.
200 and can be used to implement COW semantics or the like.
199 """
201 """
200 self.indexfile = indexfile
202 self.indexfile = indexfile
201 self.datafile = indexfile[:-2] + ".d"
203 self.datafile = indexfile[:-2] + ".d"
202 self.opener = opener
204 self.opener = opener
203 self._cache = None
205 self._cache = None
204 self._basecache = None
206 self._basecache = None
205 self._chunkcache = (0, '')
207 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536
208 self._chunkcachesize = 65536
207 self._maxchainlen = None
209 self._maxchainlen = None
208 self.index = []
210 self.index = []
209 self._pcache = {}
211 self._pcache = {}
210 self._nodecache = {nullid: nullrev}
212 self._nodecache = {nullid: nullrev}
211 self._nodepos = None
213 self._nodepos = None
212
214
213 v = REVLOG_DEFAULT_VERSION
215 v = REVLOG_DEFAULT_VERSION
214 opts = getattr(opener, 'options', None)
216 opts = getattr(opener, 'options', None)
215 if opts is not None:
217 if opts is not None:
216 if 'revlogv1' in opts:
218 if 'revlogv1' in opts:
217 if 'generaldelta' in opts:
219 if 'generaldelta' in opts:
218 v |= REVLOGGENERALDELTA
220 v |= REVLOGGENERALDELTA
219 else:
221 else:
220 v = 0
222 v = 0
221 if 'chunkcachesize' in opts:
223 if 'chunkcachesize' in opts:
222 self._chunkcachesize = opts['chunkcachesize']
224 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
225 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
226 self._maxchainlen = opts['maxchainlen']
225
227
226 if self._chunkcachesize <= 0:
228 if self._chunkcachesize <= 0:
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
229 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 'than 0') % self._chunkcachesize)
230 'than 0') % self._chunkcachesize)
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
231 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
232 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 'of 2') % self._chunkcachesize)
233 'of 2') % self._chunkcachesize)
232
234
233 i = ''
235 i = ''
234 self._initempty = True
236 self._initempty = True
235 try:
237 try:
236 f = self.opener(self.indexfile)
238 f = self.opener(self.indexfile)
237 i = f.read()
239 i = f.read()
238 f.close()
240 f.close()
239 if len(i) > 0:
241 if len(i) > 0:
240 v = struct.unpack(versionformat, i[:4])[0]
242 v = struct.unpack(versionformat, i[:4])[0]
241 self._initempty = False
243 self._initempty = False
242 except IOError, inst:
244 except IOError, inst:
243 if inst.errno != errno.ENOENT:
245 if inst.errno != errno.ENOENT:
244 raise
246 raise
245
247
246 self.version = v
248 self.version = v
247 self._inline = v & REVLOGNGINLINEDATA
249 self._inline = v & REVLOGNGINLINEDATA
248 self._generaldelta = v & REVLOGGENERALDELTA
250 self._generaldelta = v & REVLOGGENERALDELTA
249 flags = v & ~0xFFFF
251 flags = v & ~0xFFFF
250 fmt = v & 0xFFFF
252 fmt = v & 0xFFFF
251 if fmt == REVLOGV0 and flags:
253 if fmt == REVLOGV0 and flags:
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 % (self.indexfile, flags >> 16))
255 % (self.indexfile, flags >> 16))
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 % (self.indexfile, flags >> 16))
258 % (self.indexfile, flags >> 16))
257 elif fmt > REVLOGNG:
259 elif fmt > REVLOGNG:
258 raise RevlogError(_("index %s unknown format %d")
260 raise RevlogError(_("index %s unknown format %d")
259 % (self.indexfile, fmt))
261 % (self.indexfile, fmt))
260
262
261 self._io = revlogio()
263 self._io = revlogio()
262 if self.version == REVLOGV0:
264 if self.version == REVLOGV0:
263 self._io = revlogoldio()
265 self._io = revlogoldio()
264 try:
266 try:
265 d = self._io.parseindex(i, self._inline)
267 d = self._io.parseindex(i, self._inline)
266 except (ValueError, IndexError):
268 except (ValueError, IndexError):
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 self.index, nodemap, self._chunkcache = d
270 self.index, nodemap, self._chunkcache = d
269 if nodemap is not None:
271 if nodemap is not None:
270 self.nodemap = self._nodecache = nodemap
272 self.nodemap = self._nodecache = nodemap
271 if not self._chunkcache:
273 if not self._chunkcache:
272 self._chunkclear()
274 self._chunkclear()
273 # revnum -> (chain-length, sum-delta-length)
275 # revnum -> (chain-length, sum-delta-length)
274 self._chaininfocache = {}
276 self._chaininfocache = {}
275
277
276 def tip(self):
278 def tip(self):
277 return self.node(len(self.index) - 2)
279 return self.node(len(self.index) - 2)
278 def __len__(self):
280 def __len__(self):
279 return len(self.index) - 1
281 return len(self.index) - 1
280 def __iter__(self):
282 def __iter__(self):
281 return iter(xrange(len(self)))
283 return iter(xrange(len(self)))
282 def revs(self, start=0, stop=None):
284 def revs(self, start=0, stop=None):
283 """iterate over all rev in this revlog (from start to stop)"""
285 """iterate over all rev in this revlog (from start to stop)"""
284 step = 1
286 step = 1
285 if stop is not None:
287 if stop is not None:
286 if start > stop:
288 if start > stop:
287 step = -1
289 step = -1
288 stop += step
290 stop += step
289 else:
291 else:
290 stop = len(self)
292 stop = len(self)
291 return xrange(start, stop, step)
293 return xrange(start, stop, step)
292
294
293 @util.propertycache
295 @util.propertycache
294 def nodemap(self):
296 def nodemap(self):
295 self.rev(self.node(0))
297 self.rev(self.node(0))
296 return self._nodecache
298 return self._nodecache
297
299
298 def hasnode(self, node):
300 def hasnode(self, node):
299 try:
301 try:
300 self.rev(node)
302 self.rev(node)
301 return True
303 return True
302 except KeyError:
304 except KeyError:
303 return False
305 return False
304
306
305 def clearcaches(self):
307 def clearcaches(self):
306 try:
308 try:
307 self._nodecache.clearcaches()
309 self._nodecache.clearcaches()
308 except AttributeError:
310 except AttributeError:
309 self._nodecache = {nullid: nullrev}
311 self._nodecache = {nullid: nullrev}
310 self._nodepos = None
312 self._nodepos = None
311
313
312 def rev(self, node):
314 def rev(self, node):
313 try:
315 try:
314 return self._nodecache[node]
316 return self._nodecache[node]
315 except TypeError:
317 except TypeError:
316 raise
318 raise
317 except RevlogError:
319 except RevlogError:
318 # parsers.c radix tree lookup failed
320 # parsers.c radix tree lookup failed
319 raise LookupError(node, self.indexfile, _('no node'))
321 raise LookupError(node, self.indexfile, _('no node'))
320 except KeyError:
322 except KeyError:
321 # pure python cache lookup failed
323 # pure python cache lookup failed
322 n = self._nodecache
324 n = self._nodecache
323 i = self.index
325 i = self.index
324 p = self._nodepos
326 p = self._nodepos
325 if p is None:
327 if p is None:
326 p = len(i) - 2
328 p = len(i) - 2
327 for r in xrange(p, -1, -1):
329 for r in xrange(p, -1, -1):
328 v = i[r][7]
330 v = i[r][7]
329 n[v] = r
331 n[v] = r
330 if v == node:
332 if v == node:
331 self._nodepos = r - 1
333 self._nodepos = r - 1
332 return r
334 return r
333 raise LookupError(node, self.indexfile, _('no node'))
335 raise LookupError(node, self.indexfile, _('no node'))
334
336
335 def node(self, rev):
337 def node(self, rev):
336 return self.index[rev][7]
338 return self.index[rev][7]
337 def linkrev(self, rev):
339 def linkrev(self, rev):
338 return self.index[rev][4]
340 return self.index[rev][4]
339 def parents(self, node):
341 def parents(self, node):
340 i = self.index
342 i = self.index
341 d = i[self.rev(node)]
343 d = i[self.rev(node)]
342 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
344 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
343 def parentrevs(self, rev):
345 def parentrevs(self, rev):
344 return self.index[rev][5:7]
346 return self.index[rev][5:7]
345 def start(self, rev):
347 def start(self, rev):
346 return int(self.index[rev][0] >> 16)
348 return int(self.index[rev][0] >> 16)
347 def end(self, rev):
349 def end(self, rev):
348 return self.start(rev) + self.length(rev)
350 return self.start(rev) + self.length(rev)
349 def length(self, rev):
351 def length(self, rev):
350 return self.index[rev][1]
352 return self.index[rev][1]
351 def chainbase(self, rev):
353 def chainbase(self, rev):
352 index = self.index
354 index = self.index
353 base = index[rev][3]
355 base = index[rev][3]
354 while base != rev:
356 while base != rev:
355 rev = base
357 rev = base
356 base = index[rev][3]
358 base = index[rev][3]
357 return base
359 return base
358 def chainlen(self, rev):
360 def chainlen(self, rev):
359 return self._chaininfo(rev)[0]
361 return self._chaininfo(rev)[0]
360
362
361 def _chaininfo(self, rev):
363 def _chaininfo(self, rev):
362 chaininfocache = self._chaininfocache
364 chaininfocache = self._chaininfocache
363 if rev in chaininfocache:
365 if rev in chaininfocache:
364 return chaininfocache[rev]
366 return chaininfocache[rev]
365 index = self.index
367 index = self.index
366 generaldelta = self._generaldelta
368 generaldelta = self._generaldelta
367 iterrev = rev
369 iterrev = rev
368 e = index[iterrev]
370 e = index[iterrev]
369 clen = 0
371 clen = 0
370 compresseddeltalen = 0
372 compresseddeltalen = 0
371 while iterrev != e[3]:
373 while iterrev != e[3]:
372 clen += 1
374 clen += 1
373 compresseddeltalen += e[1]
375 compresseddeltalen += e[1]
374 if generaldelta:
376 if generaldelta:
375 iterrev = e[3]
377 iterrev = e[3]
376 else:
378 else:
377 iterrev -= 1
379 iterrev -= 1
378 if iterrev in chaininfocache:
380 if iterrev in chaininfocache:
379 t = chaininfocache[iterrev]
381 t = chaininfocache[iterrev]
380 clen += t[0]
382 clen += t[0]
381 compresseddeltalen += t[1]
383 compresseddeltalen += t[1]
382 break
384 break
383 e = index[iterrev]
385 e = index[iterrev]
384 else:
386 else:
385 # Add text length of base since decompressing that also takes
387 # Add text length of base since decompressing that also takes
386 # work. For cache hits the length is already included.
388 # work. For cache hits the length is already included.
387 compresseddeltalen += e[1]
389 compresseddeltalen += e[1]
388 r = (clen, compresseddeltalen)
390 r = (clen, compresseddeltalen)
389 chaininfocache[rev] = r
391 chaininfocache[rev] = r
390 return r
392 return r
391
393
392 def flags(self, rev):
394 def flags(self, rev):
393 return self.index[rev][0] & 0xFFFF
395 return self.index[rev][0] & 0xFFFF
394 def rawsize(self, rev):
396 def rawsize(self, rev):
395 """return the length of the uncompressed text for a given revision"""
397 """return the length of the uncompressed text for a given revision"""
396 l = self.index[rev][2]
398 l = self.index[rev][2]
397 if l >= 0:
399 if l >= 0:
398 return l
400 return l
399
401
400 t = self.revision(self.node(rev))
402 t = self.revision(self.node(rev))
401 return len(t)
403 return len(t)
402 size = rawsize
404 size = rawsize
403
405
404 def ancestors(self, revs, stoprev=0, inclusive=False):
406 def ancestors(self, revs, stoprev=0, inclusive=False):
405 """Generate the ancestors of 'revs' in reverse topological order.
407 """Generate the ancestors of 'revs' in reverse topological order.
406 Does not generate revs lower than stoprev.
408 Does not generate revs lower than stoprev.
407
409
408 See the documentation for ancestor.lazyancestors for more details."""
410 See the documentation for ancestor.lazyancestors for more details."""
409
411
410 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
412 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
411 inclusive=inclusive)
413 inclusive=inclusive)
412
414
413 def descendants(self, revs):
415 def descendants(self, revs):
414 """Generate the descendants of 'revs' in revision order.
416 """Generate the descendants of 'revs' in revision order.
415
417
416 Yield a sequence of revision numbers starting with a child of
418 Yield a sequence of revision numbers starting with a child of
417 some rev in revs, i.e., each revision is *not* considered a
419 some rev in revs, i.e., each revision is *not* considered a
418 descendant of itself. Results are ordered by revision number (a
420 descendant of itself. Results are ordered by revision number (a
419 topological sort)."""
421 topological sort)."""
420 first = min(revs)
422 first = min(revs)
421 if first == nullrev:
423 if first == nullrev:
422 for i in self:
424 for i in self:
423 yield i
425 yield i
424 return
426 return
425
427
426 seen = set(revs)
428 seen = set(revs)
427 for i in self.revs(start=first + 1):
429 for i in self.revs(start=first + 1):
428 for x in self.parentrevs(i):
430 for x in self.parentrevs(i):
429 if x != nullrev and x in seen:
431 if x != nullrev and x in seen:
430 seen.add(i)
432 seen.add(i)
431 yield i
433 yield i
432 break
434 break
433
435
434 def findcommonmissing(self, common=None, heads=None):
436 def findcommonmissing(self, common=None, heads=None):
435 """Return a tuple of the ancestors of common and the ancestors of heads
437 """Return a tuple of the ancestors of common and the ancestors of heads
436 that are not ancestors of common. In revset terminology, we return the
438 that are not ancestors of common. In revset terminology, we return the
437 tuple:
439 tuple:
438
440
439 ::common, (::heads) - (::common)
441 ::common, (::heads) - (::common)
440
442
441 The list is sorted by revision number, meaning it is
443 The list is sorted by revision number, meaning it is
442 topologically sorted.
444 topologically sorted.
443
445
444 'heads' and 'common' are both lists of node IDs. If heads is
446 'heads' and 'common' are both lists of node IDs. If heads is
445 not supplied, uses all of the revlog's heads. If common is not
447 not supplied, uses all of the revlog's heads. If common is not
446 supplied, uses nullid."""
448 supplied, uses nullid."""
447 if common is None:
449 if common is None:
448 common = [nullid]
450 common = [nullid]
449 if heads is None:
451 if heads is None:
450 heads = self.heads()
452 heads = self.heads()
451
453
452 common = [self.rev(n) for n in common]
454 common = [self.rev(n) for n in common]
453 heads = [self.rev(n) for n in heads]
455 heads = [self.rev(n) for n in heads]
454
456
455 # we want the ancestors, but inclusive
457 # we want the ancestors, but inclusive
456 class lazyset(object):
458 class lazyset(object):
457 def __init__(self, lazyvalues):
459 def __init__(self, lazyvalues):
458 self.addedvalues = set()
460 self.addedvalues = set()
459 self.lazyvalues = lazyvalues
461 self.lazyvalues = lazyvalues
460
462
461 def __contains__(self, value):
463 def __contains__(self, value):
462 return value in self.addedvalues or value in self.lazyvalues
464 return value in self.addedvalues or value in self.lazyvalues
463
465
464 def __iter__(self):
466 def __iter__(self):
465 added = self.addedvalues
467 added = self.addedvalues
466 for r in added:
468 for r in added:
467 yield r
469 yield r
468 for r in self.lazyvalues:
470 for r in self.lazyvalues:
469 if not r in added:
471 if not r in added:
470 yield r
472 yield r
471
473
472 def add(self, value):
474 def add(self, value):
473 self.addedvalues.add(value)
475 self.addedvalues.add(value)
474
476
475 def update(self, values):
477 def update(self, values):
476 self.addedvalues.update(values)
478 self.addedvalues.update(values)
477
479
478 has = lazyset(self.ancestors(common))
480 has = lazyset(self.ancestors(common))
479 has.add(nullrev)
481 has.add(nullrev)
480 has.update(common)
482 has.update(common)
481
483
482 # take all ancestors from heads that aren't in has
484 # take all ancestors from heads that aren't in has
483 missing = set()
485 missing = set()
484 visit = util.deque(r for r in heads if r not in has)
486 visit = util.deque(r for r in heads if r not in has)
485 while visit:
487 while visit:
486 r = visit.popleft()
488 r = visit.popleft()
487 if r in missing:
489 if r in missing:
488 continue
490 continue
489 else:
491 else:
490 missing.add(r)
492 missing.add(r)
491 for p in self.parentrevs(r):
493 for p in self.parentrevs(r):
492 if p not in has:
494 if p not in has:
493 visit.append(p)
495 visit.append(p)
494 missing = list(missing)
496 missing = list(missing)
495 missing.sort()
497 missing.sort()
496 return has, [self.node(r) for r in missing]
498 return has, [self.node(r) for r in missing]
497
499
498 def incrementalmissingrevs(self, common=None):
500 def incrementalmissingrevs(self, common=None):
499 """Return an object that can be used to incrementally compute the
501 """Return an object that can be used to incrementally compute the
500 revision numbers of the ancestors of arbitrary sets that are not
502 revision numbers of the ancestors of arbitrary sets that are not
501 ancestors of common. This is an ancestor.incrementalmissingancestors
503 ancestors of common. This is an ancestor.incrementalmissingancestors
502 object.
504 object.
503
505
504 'common' is a list of revision numbers. If common is not supplied, uses
506 'common' is a list of revision numbers. If common is not supplied, uses
505 nullrev.
507 nullrev.
506 """
508 """
507 if common is None:
509 if common is None:
508 common = [nullrev]
510 common = [nullrev]
509
511
510 return ancestor.incrementalmissingancestors(self.parentrevs, common)
512 return ancestor.incrementalmissingancestors(self.parentrevs, common)
511
513
512 def findmissingrevs(self, common=None, heads=None):
514 def findmissingrevs(self, common=None, heads=None):
513 """Return the revision numbers of the ancestors of heads that
515 """Return the revision numbers of the ancestors of heads that
514 are not ancestors of common.
516 are not ancestors of common.
515
517
516 More specifically, return a list of revision numbers corresponding to
518 More specifically, return a list of revision numbers corresponding to
517 nodes N such that every N satisfies the following constraints:
519 nodes N such that every N satisfies the following constraints:
518
520
519 1. N is an ancestor of some node in 'heads'
521 1. N is an ancestor of some node in 'heads'
520 2. N is not an ancestor of any node in 'common'
522 2. N is not an ancestor of any node in 'common'
521
523
522 The list is sorted by revision number, meaning it is
524 The list is sorted by revision number, meaning it is
523 topologically sorted.
525 topologically sorted.
524
526
525 'heads' and 'common' are both lists of revision numbers. If heads is
527 'heads' and 'common' are both lists of revision numbers. If heads is
526 not supplied, uses all of the revlog's heads. If common is not
528 not supplied, uses all of the revlog's heads. If common is not
527 supplied, uses nullid."""
529 supplied, uses nullid."""
528 if common is None:
530 if common is None:
529 common = [nullrev]
531 common = [nullrev]
530 if heads is None:
532 if heads is None:
531 heads = self.headrevs()
533 heads = self.headrevs()
532
534
533 inc = self.incrementalmissingrevs(common=common)
535 inc = self.incrementalmissingrevs(common=common)
534 return inc.missingancestors(heads)
536 return inc.missingancestors(heads)
535
537
536 def findmissing(self, common=None, heads=None):
538 def findmissing(self, common=None, heads=None):
537 """Return the ancestors of heads that are not ancestors of common.
539 """Return the ancestors of heads that are not ancestors of common.
538
540
539 More specifically, return a list of nodes N such that every N
541 More specifically, return a list of nodes N such that every N
540 satisfies the following constraints:
542 satisfies the following constraints:
541
543
542 1. N is an ancestor of some node in 'heads'
544 1. N is an ancestor of some node in 'heads'
543 2. N is not an ancestor of any node in 'common'
545 2. N is not an ancestor of any node in 'common'
544
546
545 The list is sorted by revision number, meaning it is
547 The list is sorted by revision number, meaning it is
546 topologically sorted.
548 topologically sorted.
547
549
548 'heads' and 'common' are both lists of node IDs. If heads is
550 'heads' and 'common' are both lists of node IDs. If heads is
549 not supplied, uses all of the revlog's heads. If common is not
551 not supplied, uses all of the revlog's heads. If common is not
550 supplied, uses nullid."""
552 supplied, uses nullid."""
551 if common is None:
553 if common is None:
552 common = [nullid]
554 common = [nullid]
553 if heads is None:
555 if heads is None:
554 heads = self.heads()
556 heads = self.heads()
555
557
556 common = [self.rev(n) for n in common]
558 common = [self.rev(n) for n in common]
557 heads = [self.rev(n) for n in heads]
559 heads = [self.rev(n) for n in heads]
558
560
559 inc = self.incrementalmissingrevs(common=common)
561 inc = self.incrementalmissingrevs(common=common)
560 return [self.node(r) for r in inc.missingancestors(heads)]
562 return [self.node(r) for r in inc.missingancestors(heads)]
561
563
562 def nodesbetween(self, roots=None, heads=None):
564 def nodesbetween(self, roots=None, heads=None):
563 """Return a topological path from 'roots' to 'heads'.
565 """Return a topological path from 'roots' to 'heads'.
564
566
565 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
567 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
566 topologically sorted list of all nodes N that satisfy both of
568 topologically sorted list of all nodes N that satisfy both of
567 these constraints:
569 these constraints:
568
570
569 1. N is a descendant of some node in 'roots'
571 1. N is a descendant of some node in 'roots'
570 2. N is an ancestor of some node in 'heads'
572 2. N is an ancestor of some node in 'heads'
571
573
572 Every node is considered to be both a descendant and an ancestor
574 Every node is considered to be both a descendant and an ancestor
573 of itself, so every reachable node in 'roots' and 'heads' will be
575 of itself, so every reachable node in 'roots' and 'heads' will be
574 included in 'nodes'.
576 included in 'nodes'.
575
577
576 'outroots' is the list of reachable nodes in 'roots', i.e., the
578 'outroots' is the list of reachable nodes in 'roots', i.e., the
577 subset of 'roots' that is returned in 'nodes'. Likewise,
579 subset of 'roots' that is returned in 'nodes'. Likewise,
578 'outheads' is the subset of 'heads' that is also in 'nodes'.
580 'outheads' is the subset of 'heads' that is also in 'nodes'.
579
581
580 'roots' and 'heads' are both lists of node IDs. If 'roots' is
582 'roots' and 'heads' are both lists of node IDs. If 'roots' is
581 unspecified, uses nullid as the only root. If 'heads' is
583 unspecified, uses nullid as the only root. If 'heads' is
582 unspecified, uses list of all of the revlog's heads."""
584 unspecified, uses list of all of the revlog's heads."""
583 nonodes = ([], [], [])
585 nonodes = ([], [], [])
584 if roots is not None:
586 if roots is not None:
585 roots = list(roots)
587 roots = list(roots)
586 if not roots:
588 if not roots:
587 return nonodes
589 return nonodes
588 lowestrev = min([self.rev(n) for n in roots])
590 lowestrev = min([self.rev(n) for n in roots])
589 else:
591 else:
590 roots = [nullid] # Everybody's a descendant of nullid
592 roots = [nullid] # Everybody's a descendant of nullid
591 lowestrev = nullrev
593 lowestrev = nullrev
592 if (lowestrev == nullrev) and (heads is None):
594 if (lowestrev == nullrev) and (heads is None):
593 # We want _all_ the nodes!
595 # We want _all_ the nodes!
594 return ([self.node(r) for r in self], [nullid], list(self.heads()))
596 return ([self.node(r) for r in self], [nullid], list(self.heads()))
595 if heads is None:
597 if heads is None:
596 # All nodes are ancestors, so the latest ancestor is the last
598 # All nodes are ancestors, so the latest ancestor is the last
597 # node.
599 # node.
598 highestrev = len(self) - 1
600 highestrev = len(self) - 1
599 # Set ancestors to None to signal that every node is an ancestor.
601 # Set ancestors to None to signal that every node is an ancestor.
600 ancestors = None
602 ancestors = None
601 # Set heads to an empty dictionary for later discovery of heads
603 # Set heads to an empty dictionary for later discovery of heads
602 heads = {}
604 heads = {}
603 else:
605 else:
604 heads = list(heads)
606 heads = list(heads)
605 if not heads:
607 if not heads:
606 return nonodes
608 return nonodes
607 ancestors = set()
609 ancestors = set()
608 # Turn heads into a dictionary so we can remove 'fake' heads.
610 # Turn heads into a dictionary so we can remove 'fake' heads.
609 # Also, later we will be using it to filter out the heads we can't
611 # Also, later we will be using it to filter out the heads we can't
610 # find from roots.
612 # find from roots.
611 heads = dict.fromkeys(heads, False)
613 heads = dict.fromkeys(heads, False)
612 # Start at the top and keep marking parents until we're done.
614 # Start at the top and keep marking parents until we're done.
613 nodestotag = set(heads)
615 nodestotag = set(heads)
614 # Remember where the top was so we can use it as a limit later.
616 # Remember where the top was so we can use it as a limit later.
615 highestrev = max([self.rev(n) for n in nodestotag])
617 highestrev = max([self.rev(n) for n in nodestotag])
616 while nodestotag:
618 while nodestotag:
617 # grab a node to tag
619 # grab a node to tag
618 n = nodestotag.pop()
620 n = nodestotag.pop()
619 # Never tag nullid
621 # Never tag nullid
620 if n == nullid:
622 if n == nullid:
621 continue
623 continue
622 # A node's revision number represents its place in a
624 # A node's revision number represents its place in a
623 # topologically sorted list of nodes.
625 # topologically sorted list of nodes.
624 r = self.rev(n)
626 r = self.rev(n)
625 if r >= lowestrev:
627 if r >= lowestrev:
626 if n not in ancestors:
628 if n not in ancestors:
627 # If we are possibly a descendant of one of the roots
629 # If we are possibly a descendant of one of the roots
628 # and we haven't already been marked as an ancestor
630 # and we haven't already been marked as an ancestor
629 ancestors.add(n) # Mark as ancestor
631 ancestors.add(n) # Mark as ancestor
630 # Add non-nullid parents to list of nodes to tag.
632 # Add non-nullid parents to list of nodes to tag.
631 nodestotag.update([p for p in self.parents(n) if
633 nodestotag.update([p for p in self.parents(n) if
632 p != nullid])
634 p != nullid])
633 elif n in heads: # We've seen it before, is it a fake head?
635 elif n in heads: # We've seen it before, is it a fake head?
634 # So it is, real heads should not be the ancestors of
636 # So it is, real heads should not be the ancestors of
635 # any other heads.
637 # any other heads.
636 heads.pop(n)
638 heads.pop(n)
637 if not ancestors:
639 if not ancestors:
638 return nonodes
640 return nonodes
639 # Now that we have our set of ancestors, we want to remove any
641 # Now that we have our set of ancestors, we want to remove any
640 # roots that are not ancestors.
642 # roots that are not ancestors.
641
643
642 # If one of the roots was nullid, everything is included anyway.
644 # If one of the roots was nullid, everything is included anyway.
643 if lowestrev > nullrev:
645 if lowestrev > nullrev:
644 # But, since we weren't, let's recompute the lowest rev to not
646 # But, since we weren't, let's recompute the lowest rev to not
645 # include roots that aren't ancestors.
647 # include roots that aren't ancestors.
646
648
647 # Filter out roots that aren't ancestors of heads
649 # Filter out roots that aren't ancestors of heads
648 roots = [n for n in roots if n in ancestors]
650 roots = [n for n in roots if n in ancestors]
649 # Recompute the lowest revision
651 # Recompute the lowest revision
650 if roots:
652 if roots:
651 lowestrev = min([self.rev(n) for n in roots])
653 lowestrev = min([self.rev(n) for n in roots])
652 else:
654 else:
653 # No more roots? Return empty list
655 # No more roots? Return empty list
654 return nonodes
656 return nonodes
655 else:
657 else:
656 # We are descending from nullid, and don't need to care about
658 # We are descending from nullid, and don't need to care about
657 # any other roots.
659 # any other roots.
658 lowestrev = nullrev
660 lowestrev = nullrev
659 roots = [nullid]
661 roots = [nullid]
660 # Transform our roots list into a set.
662 # Transform our roots list into a set.
661 descendants = set(roots)
663 descendants = set(roots)
662 # Also, keep the original roots so we can filter out roots that aren't
664 # Also, keep the original roots so we can filter out roots that aren't
663 # 'real' roots (i.e. are descended from other roots).
665 # 'real' roots (i.e. are descended from other roots).
664 roots = descendants.copy()
666 roots = descendants.copy()
665 # Our topologically sorted list of output nodes.
667 # Our topologically sorted list of output nodes.
666 orderedout = []
668 orderedout = []
667 # Don't start at nullid since we don't want nullid in our output list,
669 # Don't start at nullid since we don't want nullid in our output list,
668 # and if nullid shows up in descendants, empty parents will look like
670 # and if nullid shows up in descendants, empty parents will look like
669 # they're descendants.
671 # they're descendants.
670 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
672 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
671 n = self.node(r)
673 n = self.node(r)
672 isdescendant = False
674 isdescendant = False
673 if lowestrev == nullrev: # Everybody is a descendant of nullid
675 if lowestrev == nullrev: # Everybody is a descendant of nullid
674 isdescendant = True
676 isdescendant = True
675 elif n in descendants:
677 elif n in descendants:
676 # n is already a descendant
678 # n is already a descendant
677 isdescendant = True
679 isdescendant = True
678 # This check only needs to be done here because all the roots
680 # This check only needs to be done here because all the roots
679 # will start being marked is descendants before the loop.
681 # will start being marked is descendants before the loop.
680 if n in roots:
682 if n in roots:
681 # If n was a root, check if it's a 'real' root.
683 # If n was a root, check if it's a 'real' root.
682 p = tuple(self.parents(n))
684 p = tuple(self.parents(n))
683 # If any of its parents are descendants, it's not a root.
685 # If any of its parents are descendants, it's not a root.
684 if (p[0] in descendants) or (p[1] in descendants):
686 if (p[0] in descendants) or (p[1] in descendants):
685 roots.remove(n)
687 roots.remove(n)
686 else:
688 else:
687 p = tuple(self.parents(n))
689 p = tuple(self.parents(n))
688 # A node is a descendant if either of its parents are
690 # A node is a descendant if either of its parents are
689 # descendants. (We seeded the dependents list with the roots
691 # descendants. (We seeded the dependents list with the roots
690 # up there, remember?)
692 # up there, remember?)
691 if (p[0] in descendants) or (p[1] in descendants):
693 if (p[0] in descendants) or (p[1] in descendants):
692 descendants.add(n)
694 descendants.add(n)
693 isdescendant = True
695 isdescendant = True
694 if isdescendant and ((ancestors is None) or (n in ancestors)):
696 if isdescendant and ((ancestors is None) or (n in ancestors)):
695 # Only include nodes that are both descendants and ancestors.
697 # Only include nodes that are both descendants and ancestors.
696 orderedout.append(n)
698 orderedout.append(n)
697 if (ancestors is not None) and (n in heads):
699 if (ancestors is not None) and (n in heads):
698 # We're trying to figure out which heads are reachable
700 # We're trying to figure out which heads are reachable
699 # from roots.
701 # from roots.
700 # Mark this head as having been reached
702 # Mark this head as having been reached
701 heads[n] = True
703 heads[n] = True
702 elif ancestors is None:
704 elif ancestors is None:
703 # Otherwise, we're trying to discover the heads.
705 # Otherwise, we're trying to discover the heads.
704 # Assume this is a head because if it isn't, the next step
706 # Assume this is a head because if it isn't, the next step
705 # will eventually remove it.
707 # will eventually remove it.
706 heads[n] = True
708 heads[n] = True
707 # But, obviously its parents aren't.
709 # But, obviously its parents aren't.
708 for p in self.parents(n):
710 for p in self.parents(n):
709 heads.pop(p, None)
711 heads.pop(p, None)
710 heads = [n for n, flag in heads.iteritems() if flag]
712 heads = [n for n, flag in heads.iteritems() if flag]
711 roots = list(roots)
713 roots = list(roots)
712 assert orderedout
714 assert orderedout
713 assert roots
715 assert roots
714 assert heads
716 assert heads
715 return (orderedout, roots, heads)
717 return (orderedout, roots, heads)
716
718
717 def headrevs(self):
719 def headrevs(self):
718 try:
720 try:
719 return self.index.headrevs()
721 return self.index.headrevs()
720 except AttributeError:
722 except AttributeError:
721 return self._headrevs()
723 return self._headrevs()
722
724
723 def _headrevs(self):
725 def _headrevs(self):
724 count = len(self)
726 count = len(self)
725 if not count:
727 if not count:
726 return [nullrev]
728 return [nullrev]
727 # we won't iter over filtered rev so nobody is a head at start
729 # we won't iter over filtered rev so nobody is a head at start
728 ishead = [0] * (count + 1)
730 ishead = [0] * (count + 1)
729 index = self.index
731 index = self.index
730 for r in self:
732 for r in self:
731 ishead[r] = 1 # I may be an head
733 ishead[r] = 1 # I may be an head
732 e = index[r]
734 e = index[r]
733 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
735 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
734 return [r for r, val in enumerate(ishead) if val]
736 return [r for r, val in enumerate(ishead) if val]
735
737
736 def heads(self, start=None, stop=None):
738 def heads(self, start=None, stop=None):
737 """return the list of all nodes that have no children
739 """return the list of all nodes that have no children
738
740
739 if start is specified, only heads that are descendants of
741 if start is specified, only heads that are descendants of
740 start will be returned
742 start will be returned
741 if stop is specified, it will consider all the revs from stop
743 if stop is specified, it will consider all the revs from stop
742 as if they had no children
744 as if they had no children
743 """
745 """
744 if start is None and stop is None:
746 if start is None and stop is None:
745 if not len(self):
747 if not len(self):
746 return [nullid]
748 return [nullid]
747 return [self.node(r) for r in self.headrevs()]
749 return [self.node(r) for r in self.headrevs()]
748
750
749 if start is None:
751 if start is None:
750 start = nullid
752 start = nullid
751 if stop is None:
753 if stop is None:
752 stop = []
754 stop = []
753 stoprevs = set([self.rev(n) for n in stop])
755 stoprevs = set([self.rev(n) for n in stop])
754 startrev = self.rev(start)
756 startrev = self.rev(start)
755 reachable = set((startrev,))
757 reachable = set((startrev,))
756 heads = set((startrev,))
758 heads = set((startrev,))
757
759
758 parentrevs = self.parentrevs
760 parentrevs = self.parentrevs
759 for r in self.revs(start=startrev + 1):
761 for r in self.revs(start=startrev + 1):
760 for p in parentrevs(r):
762 for p in parentrevs(r):
761 if p in reachable:
763 if p in reachable:
762 if r not in stoprevs:
764 if r not in stoprevs:
763 reachable.add(r)
765 reachable.add(r)
764 heads.add(r)
766 heads.add(r)
765 if p in heads and p not in stoprevs:
767 if p in heads and p not in stoprevs:
766 heads.remove(p)
768 heads.remove(p)
767
769
768 return [self.node(r) for r in heads]
770 return [self.node(r) for r in heads]
769
771
770 def children(self, node):
772 def children(self, node):
771 """find the children of a given node"""
773 """find the children of a given node"""
772 c = []
774 c = []
773 p = self.rev(node)
775 p = self.rev(node)
774 for r in self.revs(start=p + 1):
776 for r in self.revs(start=p + 1):
775 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
777 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
776 if prevs:
778 if prevs:
777 for pr in prevs:
779 for pr in prevs:
778 if pr == p:
780 if pr == p:
779 c.append(self.node(r))
781 c.append(self.node(r))
780 elif p == nullrev:
782 elif p == nullrev:
781 c.append(self.node(r))
783 c.append(self.node(r))
782 return c
784 return c
783
785
784 def descendant(self, start, end):
786 def descendant(self, start, end):
785 if start == nullrev:
787 if start == nullrev:
786 return True
788 return True
787 for i in self.descendants([start]):
789 for i in self.descendants([start]):
788 if i == end:
790 if i == end:
789 return True
791 return True
790 elif i > end:
792 elif i > end:
791 break
793 break
792 return False
794 return False
793
795
794 def commonancestorsheads(self, a, b):
796 def commonancestorsheads(self, a, b):
795 """calculate all the heads of the common ancestors of nodes a and b"""
797 """calculate all the heads of the common ancestors of nodes a and b"""
796 a, b = self.rev(a), self.rev(b)
798 a, b = self.rev(a), self.rev(b)
797 try:
799 try:
798 ancs = self.index.commonancestorsheads(a, b)
800 ancs = self.index.commonancestorsheads(a, b)
799 except (AttributeError, OverflowError): # C implementation failed
801 except (AttributeError, OverflowError): # C implementation failed
800 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
802 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
801 return map(self.node, ancs)
803 return map(self.node, ancs)
802
804
803 def isancestor(self, a, b):
805 def isancestor(self, a, b):
804 """return True if node a is an ancestor of node b
806 """return True if node a is an ancestor of node b
805
807
806 The implementation of this is trivial but the use of
808 The implementation of this is trivial but the use of
807 commonancestorsheads is not."""
809 commonancestorsheads is not."""
808 return a in self.commonancestorsheads(a, b)
810 return a in self.commonancestorsheads(a, b)
809
811
810 def ancestor(self, a, b):
812 def ancestor(self, a, b):
811 """calculate the "best" common ancestor of nodes a and b"""
813 """calculate the "best" common ancestor of nodes a and b"""
812
814
813 a, b = self.rev(a), self.rev(b)
815 a, b = self.rev(a), self.rev(b)
814 try:
816 try:
815 ancs = self.index.ancestors(a, b)
817 ancs = self.index.ancestors(a, b)
816 except (AttributeError, OverflowError):
818 except (AttributeError, OverflowError):
817 ancs = ancestor.ancestors(self.parentrevs, a, b)
819 ancs = ancestor.ancestors(self.parentrevs, a, b)
818 if ancs:
820 if ancs:
819 # choose a consistent winner when there's a tie
821 # choose a consistent winner when there's a tie
820 return min(map(self.node, ancs))
822 return min(map(self.node, ancs))
821 return nullid
823 return nullid
822
824
823 def _match(self, id):
825 def _match(self, id):
824 if isinstance(id, int):
826 if isinstance(id, int):
825 # rev
827 # rev
826 return self.node(id)
828 return self.node(id)
827 if len(id) == 20:
829 if len(id) == 20:
828 # possibly a binary node
830 # possibly a binary node
829 # odds of a binary node being all hex in ASCII are 1 in 10**25
831 # odds of a binary node being all hex in ASCII are 1 in 10**25
830 try:
832 try:
831 node = id
833 node = id
832 self.rev(node) # quick search the index
834 self.rev(node) # quick search the index
833 return node
835 return node
834 except LookupError:
836 except LookupError:
835 pass # may be partial hex id
837 pass # may be partial hex id
836 try:
838 try:
837 # str(rev)
839 # str(rev)
838 rev = int(id)
840 rev = int(id)
839 if str(rev) != id:
841 if str(rev) != id:
840 raise ValueError
842 raise ValueError
841 if rev < 0:
843 if rev < 0:
842 rev = len(self) + rev
844 rev = len(self) + rev
843 if rev < 0 or rev >= len(self):
845 if rev < 0 or rev >= len(self):
844 raise ValueError
846 raise ValueError
845 return self.node(rev)
847 return self.node(rev)
846 except (ValueError, OverflowError):
848 except (ValueError, OverflowError):
847 pass
849 pass
848 if len(id) == 40:
850 if len(id) == 40:
849 try:
851 try:
850 # a full hex nodeid?
852 # a full hex nodeid?
851 node = bin(id)
853 node = bin(id)
852 self.rev(node)
854 self.rev(node)
853 return node
855 return node
854 except (TypeError, LookupError):
856 except (TypeError, LookupError):
855 pass
857 pass
856
858
857 def _partialmatch(self, id):
859 def _partialmatch(self, id):
858 try:
860 try:
859 n = self.index.partialmatch(id)
861 n = self.index.partialmatch(id)
860 if n and self.hasnode(n):
862 if n and self.hasnode(n):
861 return n
863 return n
862 return None
864 return None
863 except RevlogError:
865 except RevlogError:
864 # parsers.c radix tree lookup gave multiple matches
866 # parsers.c radix tree lookup gave multiple matches
865 # fall through to slow path that filters hidden revisions
867 # fall through to slow path that filters hidden revisions
866 pass
868 pass
867 except (AttributeError, ValueError):
869 except (AttributeError, ValueError):
868 # we are pure python, or key was too short to search radix tree
870 # we are pure python, or key was too short to search radix tree
869 pass
871 pass
870
872
871 if id in self._pcache:
873 if id in self._pcache:
872 return self._pcache[id]
874 return self._pcache[id]
873
875
874 if len(id) < 40:
876 if len(id) < 40:
875 try:
877 try:
876 # hex(node)[:...]
878 # hex(node)[:...]
877 l = len(id) // 2 # grab an even number of digits
879 l = len(id) // 2 # grab an even number of digits
878 prefix = bin(id[:l * 2])
880 prefix = bin(id[:l * 2])
879 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
881 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
880 nl = [n for n in nl if hex(n).startswith(id) and
882 nl = [n for n in nl if hex(n).startswith(id) and
881 self.hasnode(n)]
883 self.hasnode(n)]
882 if len(nl) > 0:
884 if len(nl) > 0:
883 if len(nl) == 1:
885 if len(nl) == 1:
884 self._pcache[id] = nl[0]
886 self._pcache[id] = nl[0]
885 return nl[0]
887 return nl[0]
886 raise LookupError(id, self.indexfile,
888 raise LookupError(id, self.indexfile,
887 _('ambiguous identifier'))
889 _('ambiguous identifier'))
888 return None
890 return None
889 except TypeError:
891 except TypeError:
890 pass
892 pass
891
893
892 def lookup(self, id):
894 def lookup(self, id):
893 """locate a node based on:
895 """locate a node based on:
894 - revision number or str(revision number)
896 - revision number or str(revision number)
895 - nodeid or subset of hex nodeid
897 - nodeid or subset of hex nodeid
896 """
898 """
897 n = self._match(id)
899 n = self._match(id)
898 if n is not None:
900 if n is not None:
899 return n
901 return n
900 n = self._partialmatch(id)
902 n = self._partialmatch(id)
901 if n:
903 if n:
902 return n
904 return n
903
905
904 raise LookupError(id, self.indexfile, _('no match found'))
906 raise LookupError(id, self.indexfile, _('no match found'))
905
907
906 def cmp(self, node, text):
908 def cmp(self, node, text):
907 """compare text with a given file revision
909 """compare text with a given file revision
908
910
909 returns True if text is different than what is stored.
911 returns True if text is different than what is stored.
910 """
912 """
911 p1, p2 = self.parents(node)
913 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
914 return hash(text, p1, p2) != node
913
915
914 def _addchunk(self, offset, data):
916 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
917 o, d = self._chunkcache
916 # try to add to existing cache
918 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _chunksize:
919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
918 self._chunkcache = o, d + data
920 self._chunkcache = o, d + data
919 else:
921 else:
920 self._chunkcache = offset, data
922 self._chunkcache = offset, data
921
923
922 def _loadchunk(self, offset, length):
924 def _loadchunk(self, offset, length):
923 if self._inline:
925 if self._inline:
924 df = self.opener(self.indexfile)
926 df = self.opener(self.indexfile)
925 else:
927 else:
926 df = self.opener(self.datafile)
928 df = self.opener(self.datafile)
927
929
928 # Cache data both forward and backward around the requested
930 # Cache data both forward and backward around the requested
929 # data, in a fixed size window. This helps speed up operations
931 # data, in a fixed size window. This helps speed up operations
930 # involving reading the revlog backwards.
932 # involving reading the revlog backwards.
931 cachesize = self._chunkcachesize
933 cachesize = self._chunkcachesize
932 realoffset = offset & ~(cachesize - 1)
934 realoffset = offset & ~(cachesize - 1)
933 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
935 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
934 - realoffset)
936 - realoffset)
935 df.seek(realoffset)
937 df.seek(realoffset)
936 d = df.read(reallength)
938 d = df.read(reallength)
937 df.close()
939 df.close()
938 self._addchunk(realoffset, d)
940 self._addchunk(realoffset, d)
939 if offset != realoffset or reallength != length:
941 if offset != realoffset or reallength != length:
940 return util.buffer(d, offset - realoffset, length)
942 return util.buffer(d, offset - realoffset, length)
941 return d
943 return d
942
944
943 def _getchunk(self, offset, length):
945 def _getchunk(self, offset, length):
944 o, d = self._chunkcache
946 o, d = self._chunkcache
945 l = len(d)
947 l = len(d)
946
948
947 # is it in the cache?
949 # is it in the cache?
948 cachestart = offset - o
950 cachestart = offset - o
949 cacheend = cachestart + length
951 cacheend = cachestart + length
950 if cachestart >= 0 and cacheend <= l:
952 if cachestart >= 0 and cacheend <= l:
951 if cachestart == 0 and cacheend == l:
953 if cachestart == 0 and cacheend == l:
952 return d # avoid a copy
954 return d # avoid a copy
953 return util.buffer(d, cachestart, cacheend - cachestart)
955 return util.buffer(d, cachestart, cacheend - cachestart)
954
956
955 return self._loadchunk(offset, length)
957 return self._loadchunk(offset, length)
956
958
957 def _chunkraw(self, startrev, endrev):
959 def _chunkraw(self, startrev, endrev):
958 start = self.start(startrev)
960 start = self.start(startrev)
959 end = self.end(endrev)
961 end = self.end(endrev)
960 if self._inline:
962 if self._inline:
961 start += (startrev + 1) * self._io.size
963 start += (startrev + 1) * self._io.size
962 end += (endrev + 1) * self._io.size
964 end += (endrev + 1) * self._io.size
963 length = end - start
965 length = end - start
964 return self._getchunk(start, length)
966 return self._getchunk(start, length)
965
967
966 def _chunk(self, rev):
968 def _chunk(self, rev):
967 return decompress(self._chunkraw(rev, rev))
969 return decompress(self._chunkraw(rev, rev))
968
970
969 def _chunks(self, revs):
971 def _chunks(self, revs):
970 '''faster version of [self._chunk(rev) for rev in revs]
972 '''faster version of [self._chunk(rev) for rev in revs]
971
973
972 Assumes that revs is in ascending order.'''
974 Assumes that revs is in ascending order.'''
973 if not revs:
975 if not revs:
974 return []
976 return []
975 start = self.start
977 start = self.start
976 length = self.length
978 length = self.length
977 inline = self._inline
979 inline = self._inline
978 iosize = self._io.size
980 iosize = self._io.size
979 buffer = util.buffer
981 buffer = util.buffer
980
982
981 l = []
983 l = []
982 ladd = l.append
984 ladd = l.append
983
985
984 # preload the cache
986 # preload the cache
985 try:
987 try:
986 while True:
988 while True:
987 # ensure that the cache doesn't change out from under us
989 # ensure that the cache doesn't change out from under us
988 _cache = self._chunkcache
990 _cache = self._chunkcache
989 self._chunkraw(revs[0], revs[-1])
991 self._chunkraw(revs[0], revs[-1])
990 if _cache == self._chunkcache:
992 if _cache == self._chunkcache:
991 break
993 break
992 offset, data = _cache
994 offset, data = _cache
993 except OverflowError:
995 except OverflowError:
994 # issue4215 - we can't cache a run of chunks greater than
996 # issue4215 - we can't cache a run of chunks greater than
995 # 2G on Windows
997 # 2G on Windows
996 return [self._chunk(rev) for rev in revs]
998 return [self._chunk(rev) for rev in revs]
997
999
998 for rev in revs:
1000 for rev in revs:
999 chunkstart = start(rev)
1001 chunkstart = start(rev)
1000 if inline:
1002 if inline:
1001 chunkstart += (rev + 1) * iosize
1003 chunkstart += (rev + 1) * iosize
1002 chunklength = length(rev)
1004 chunklength = length(rev)
1003 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1005 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1004
1006
1005 return l
1007 return l
1006
1008
1007 def _chunkclear(self):
1009 def _chunkclear(self):
1008 self._chunkcache = (0, '')
1010 self._chunkcache = (0, '')
1009
1011
1010 def deltaparent(self, rev):
1012 def deltaparent(self, rev):
1011 """return deltaparent of the given revision"""
1013 """return deltaparent of the given revision"""
1012 base = self.index[rev][3]
1014 base = self.index[rev][3]
1013 if base == rev:
1015 if base == rev:
1014 return nullrev
1016 return nullrev
1015 elif self._generaldelta:
1017 elif self._generaldelta:
1016 return base
1018 return base
1017 else:
1019 else:
1018 return rev - 1
1020 return rev - 1
1019
1021
1020 def revdiff(self, rev1, rev2):
1022 def revdiff(self, rev1, rev2):
1021 """return or calculate a delta between two revisions"""
1023 """return or calculate a delta between two revisions"""
1022 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1024 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1023 return str(self._chunk(rev2))
1025 return str(self._chunk(rev2))
1024
1026
1025 return mdiff.textdiff(self.revision(rev1),
1027 return mdiff.textdiff(self.revision(rev1),
1026 self.revision(rev2))
1028 self.revision(rev2))
1027
1029
1028 def revision(self, nodeorrev):
1030 def revision(self, nodeorrev):
1029 """return an uncompressed revision of a given node or revision
1031 """return an uncompressed revision of a given node or revision
1030 number.
1032 number.
1031 """
1033 """
1032 if isinstance(nodeorrev, int):
1034 if isinstance(nodeorrev, int):
1033 rev = nodeorrev
1035 rev = nodeorrev
1034 node = self.node(rev)
1036 node = self.node(rev)
1035 else:
1037 else:
1036 node = nodeorrev
1038 node = nodeorrev
1037 rev = None
1039 rev = None
1038
1040
1039 _cache = self._cache # grab local copy of cache to avoid thread race
1041 _cache = self._cache # grab local copy of cache to avoid thread race
1040 cachedrev = None
1042 cachedrev = None
1041 if node == nullid:
1043 if node == nullid:
1042 return ""
1044 return ""
1043 if _cache:
1045 if _cache:
1044 if _cache[0] == node:
1046 if _cache[0] == node:
1045 return _cache[2]
1047 return _cache[2]
1046 cachedrev = _cache[1]
1048 cachedrev = _cache[1]
1047
1049
1048 # look up what we need to read
1050 # look up what we need to read
1049 text = None
1051 text = None
1050 if rev is None:
1052 if rev is None:
1051 rev = self.rev(node)
1053 rev = self.rev(node)
1052
1054
1053 # check rev flags
1055 # check rev flags
1054 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1056 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1055 raise RevlogError(_('incompatible revision flag %x') %
1057 raise RevlogError(_('incompatible revision flag %x') %
1056 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1058 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1057
1059
1058 # build delta chain
1060 # build delta chain
1059 chain = []
1061 chain = []
1060 index = self.index # for performance
1062 index = self.index # for performance
1061 generaldelta = self._generaldelta
1063 generaldelta = self._generaldelta
1062 iterrev = rev
1064 iterrev = rev
1063 e = index[iterrev]
1065 e = index[iterrev]
1064 while iterrev != e[3] and iterrev != cachedrev:
1066 while iterrev != e[3] and iterrev != cachedrev:
1065 chain.append(iterrev)
1067 chain.append(iterrev)
1066 if generaldelta:
1068 if generaldelta:
1067 iterrev = e[3]
1069 iterrev = e[3]
1068 else:
1070 else:
1069 iterrev -= 1
1071 iterrev -= 1
1070 e = index[iterrev]
1072 e = index[iterrev]
1071
1073
1072 if iterrev == cachedrev:
1074 if iterrev == cachedrev:
1073 # cache hit
1075 # cache hit
1074 text = _cache[2]
1076 text = _cache[2]
1075 else:
1077 else:
1076 chain.append(iterrev)
1078 chain.append(iterrev)
1077 chain.reverse()
1079 chain.reverse()
1078
1080
1079 # drop cache to save memory
1081 # drop cache to save memory
1080 self._cache = None
1082 self._cache = None
1081
1083
1082 bins = self._chunks(chain)
1084 bins = self._chunks(chain)
1083 if text is None:
1085 if text is None:
1084 text = str(bins[0])
1086 text = str(bins[0])
1085 bins = bins[1:]
1087 bins = bins[1:]
1086
1088
1087 text = mdiff.patches(text, bins)
1089 text = mdiff.patches(text, bins)
1088
1090
1089 text = self._checkhash(text, node, rev)
1091 text = self._checkhash(text, node, rev)
1090
1092
1091 self._cache = (node, rev, text)
1093 self._cache = (node, rev, text)
1092 return text
1094 return text
1093
1095
1094 def hash(self, text, p1, p2):
1096 def hash(self, text, p1, p2):
1095 """Compute a node hash.
1097 """Compute a node hash.
1096
1098
1097 Available as a function so that subclasses can replace the hash
1099 Available as a function so that subclasses can replace the hash
1098 as needed.
1100 as needed.
1099 """
1101 """
1100 return hash(text, p1, p2)
1102 return hash(text, p1, p2)
1101
1103
1102 def _checkhash(self, text, node, rev):
1104 def _checkhash(self, text, node, rev):
1103 p1, p2 = self.parents(node)
1105 p1, p2 = self.parents(node)
1104 self.checkhash(text, p1, p2, node, rev)
1106 self.checkhash(text, p1, p2, node, rev)
1105 return text
1107 return text
1106
1108
1107 def checkhash(self, text, p1, p2, node, rev=None):
1109 def checkhash(self, text, p1, p2, node, rev=None):
1108 if node != self.hash(text, p1, p2):
1110 if node != self.hash(text, p1, p2):
1109 revornode = rev
1111 revornode = rev
1110 if revornode is None:
1112 if revornode is None:
1111 revornode = templatefilters.short(hex(node))
1113 revornode = templatefilters.short(hex(node))
1112 raise RevlogError(_("integrity check failed on %s:%s")
1114 raise RevlogError(_("integrity check failed on %s:%s")
1113 % (self.indexfile, revornode))
1115 % (self.indexfile, revornode))
1114
1116
1115 def checkinlinesize(self, tr, fp=None):
1117 def checkinlinesize(self, tr, fp=None):
1116 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1118 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1117 return
1119 return
1118
1120
1119 trinfo = tr.find(self.indexfile)
1121 trinfo = tr.find(self.indexfile)
1120 if trinfo is None:
1122 if trinfo is None:
1121 raise RevlogError(_("%s not found in the transaction")
1123 raise RevlogError(_("%s not found in the transaction")
1122 % self.indexfile)
1124 % self.indexfile)
1123
1125
1124 trindex = trinfo[2]
1126 trindex = trinfo[2]
1125 dataoff = self.start(trindex)
1127 dataoff = self.start(trindex)
1126
1128
1127 tr.add(self.datafile, dataoff)
1129 tr.add(self.datafile, dataoff)
1128
1130
1129 if fp:
1131 if fp:
1130 fp.flush()
1132 fp.flush()
1131 fp.close()
1133 fp.close()
1132
1134
1133 df = self.opener(self.datafile, 'w')
1135 df = self.opener(self.datafile, 'w')
1134 try:
1136 try:
1135 for r in self:
1137 for r in self:
1136 df.write(self._chunkraw(r, r))
1138 df.write(self._chunkraw(r, r))
1137 finally:
1139 finally:
1138 df.close()
1140 df.close()
1139
1141
1140 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1142 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1141 self.version &= ~(REVLOGNGINLINEDATA)
1143 self.version &= ~(REVLOGNGINLINEDATA)
1142 self._inline = False
1144 self._inline = False
1143 for i in self:
1145 for i in self:
1144 e = self._io.packentry(self.index[i], self.node, self.version, i)
1146 e = self._io.packentry(self.index[i], self.node, self.version, i)
1145 fp.write(e)
1147 fp.write(e)
1146
1148
1147 # if we don't call close, the temp file will never replace the
1149 # if we don't call close, the temp file will never replace the
1148 # real index
1150 # real index
1149 fp.close()
1151 fp.close()
1150
1152
1151 tr.replace(self.indexfile, trindex * self._io.size)
1153 tr.replace(self.indexfile, trindex * self._io.size)
1152 self._chunkclear()
1154 self._chunkclear()
1153
1155
1154 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1156 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1155 node=None):
1157 node=None):
1156 """add a revision to the log
1158 """add a revision to the log
1157
1159
1158 text - the revision data to add
1160 text - the revision data to add
1159 transaction - the transaction object used for rollback
1161 transaction - the transaction object used for rollback
1160 link - the linkrev data to add
1162 link - the linkrev data to add
1161 p1, p2 - the parent nodeids of the revision
1163 p1, p2 - the parent nodeids of the revision
1162 cachedelta - an optional precomputed delta
1164 cachedelta - an optional precomputed delta
1163 node - nodeid of revision; typically node is not specified, and it is
1165 node - nodeid of revision; typically node is not specified, and it is
1164 computed by default as hash(text, p1, p2), however subclasses might
1166 computed by default as hash(text, p1, p2), however subclasses might
1165 use different hashing method (and override checkhash() in such case)
1167 use different hashing method (and override checkhash() in such case)
1166 """
1168 """
1167 if link == nullrev:
1169 if link == nullrev:
1168 raise RevlogError(_("attempted to add linkrev -1 to %s")
1170 raise RevlogError(_("attempted to add linkrev -1 to %s")
1169 % self.indexfile)
1171 % self.indexfile)
1170 node = node or self.hash(text, p1, p2)
1172 node = node or self.hash(text, p1, p2)
1171 if node in self.nodemap:
1173 if node in self.nodemap:
1172 return node
1174 return node
1173
1175
1174 dfh = None
1176 dfh = None
1175 if not self._inline:
1177 if not self._inline:
1176 dfh = self.opener(self.datafile, "a")
1178 dfh = self.opener(self.datafile, "a")
1177 ifh = self.opener(self.indexfile, "a+")
1179 ifh = self.opener(self.indexfile, "a+")
1178 try:
1180 try:
1179 return self._addrevision(node, text, transaction, link, p1, p2,
1181 return self._addrevision(node, text, transaction, link, p1, p2,
1180 cachedelta, ifh, dfh)
1182 cachedelta, ifh, dfh)
1181 finally:
1183 finally:
1182 if dfh:
1184 if dfh:
1183 dfh.close()
1185 dfh.close()
1184 ifh.close()
1186 ifh.close()
1185
1187
1186 def compress(self, text):
1188 def compress(self, text):
1187 """ generate a possibly-compressed representation of text """
1189 """ generate a possibly-compressed representation of text """
1188 if not text:
1190 if not text:
1189 return ("", text)
1191 return ("", text)
1190 l = len(text)
1192 l = len(text)
1191 bin = None
1193 bin = None
1192 if l < 44:
1194 if l < 44:
1193 pass
1195 pass
1194 elif l > 1000000:
1196 elif l > 1000000:
1195 # zlib makes an internal copy, thus doubling memory usage for
1197 # zlib makes an internal copy, thus doubling memory usage for
1196 # large files, so lets do this in pieces
1198 # large files, so lets do this in pieces
1197 z = zlib.compressobj()
1199 z = zlib.compressobj()
1198 p = []
1200 p = []
1199 pos = 0
1201 pos = 0
1200 while pos < l:
1202 while pos < l:
1201 pos2 = pos + 2**20
1203 pos2 = pos + 2**20
1202 p.append(z.compress(text[pos:pos2]))
1204 p.append(z.compress(text[pos:pos2]))
1203 pos = pos2
1205 pos = pos2
1204 p.append(z.flush())
1206 p.append(z.flush())
1205 if sum(map(len, p)) < l:
1207 if sum(map(len, p)) < l:
1206 bin = "".join(p)
1208 bin = "".join(p)
1207 else:
1209 else:
1208 bin = _compress(text)
1210 bin = _compress(text)
1209 if bin is None or len(bin) > l:
1211 if bin is None or len(bin) > l:
1210 if text[0] == '\0':
1212 if text[0] == '\0':
1211 return ("", text)
1213 return ("", text)
1212 return ('u', text)
1214 return ('u', text)
1213 return ("", bin)
1215 return ("", bin)
1214
1216
1215 def _addrevision(self, node, text, transaction, link, p1, p2,
1217 def _addrevision(self, node, text, transaction, link, p1, p2,
1216 cachedelta, ifh, dfh):
1218 cachedelta, ifh, dfh):
1217 """internal function to add revisions to the log
1219 """internal function to add revisions to the log
1218
1220
1219 see addrevision for argument descriptions.
1221 see addrevision for argument descriptions.
1220 invariants:
1222 invariants:
1221 - text is optional (can be None); if not set, cachedelta must be set.
1223 - text is optional (can be None); if not set, cachedelta must be set.
1222 if both are set, they must correspond to each other.
1224 if both are set, they must correspond to each other.
1223 """
1225 """
1224 btext = [text]
1226 btext = [text]
1225 def buildtext():
1227 def buildtext():
1226 if btext[0] is not None:
1228 if btext[0] is not None:
1227 return btext[0]
1229 return btext[0]
1228 # flush any pending writes here so we can read it in revision
1230 # flush any pending writes here so we can read it in revision
1229 if dfh:
1231 if dfh:
1230 dfh.flush()
1232 dfh.flush()
1231 ifh.flush()
1233 ifh.flush()
1232 basetext = self.revision(self.node(cachedelta[0]))
1234 basetext = self.revision(self.node(cachedelta[0]))
1233 btext[0] = mdiff.patch(basetext, cachedelta[1])
1235 btext[0] = mdiff.patch(basetext, cachedelta[1])
1234 try:
1236 try:
1235 self.checkhash(btext[0], p1, p2, node)
1237 self.checkhash(btext[0], p1, p2, node)
1236 except CensoredNodeError:
1238 except CensoredNodeError:
1237 pass # always import a censor tombstone.
1239 pass # always import a censor tombstone.
1238 return btext[0]
1240 return btext[0]
1239
1241
1240 def builddelta(rev):
1242 def builddelta(rev):
1241 # can we use the cached delta?
1243 # can we use the cached delta?
1242 if cachedelta and cachedelta[0] == rev:
1244 if cachedelta and cachedelta[0] == rev:
1243 delta = cachedelta[1]
1245 delta = cachedelta[1]
1244 else:
1246 else:
1245 t = buildtext()
1247 t = buildtext()
1246 ptext = self.revision(self.node(rev))
1248 ptext = self.revision(self.node(rev))
1247 delta = mdiff.textdiff(ptext, t)
1249 delta = mdiff.textdiff(ptext, t)
1248 data = self.compress(delta)
1250 data = self.compress(delta)
1249 l = len(data[1]) + len(data[0])
1251 l = len(data[1]) + len(data[0])
1250 if basecache[0] == rev:
1252 if basecache[0] == rev:
1251 chainbase = basecache[1]
1253 chainbase = basecache[1]
1252 else:
1254 else:
1253 chainbase = self.chainbase(rev)
1255 chainbase = self.chainbase(rev)
1254 dist = l + offset - self.start(chainbase)
1256 dist = l + offset - self.start(chainbase)
1255 if self._generaldelta:
1257 if self._generaldelta:
1256 base = rev
1258 base = rev
1257 else:
1259 else:
1258 base = chainbase
1260 base = chainbase
1259 chainlen, compresseddeltalen = self._chaininfo(rev)
1261 chainlen, compresseddeltalen = self._chaininfo(rev)
1260 chainlen += 1
1262 chainlen += 1
1261 compresseddeltalen += l
1263 compresseddeltalen += l
1262 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1264 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1263
1265
1264 curr = len(self)
1266 curr = len(self)
1265 prev = curr - 1
1267 prev = curr - 1
1266 base = chainbase = curr
1268 base = chainbase = curr
1267 chainlen = None
1269 chainlen = None
1268 offset = self.end(prev)
1270 offset = self.end(prev)
1269 flags = 0
1271 flags = 0
1270 d = None
1272 d = None
1271 if self._basecache is None:
1273 if self._basecache is None:
1272 self._basecache = (prev, self.chainbase(prev))
1274 self._basecache = (prev, self.chainbase(prev))
1273 basecache = self._basecache
1275 basecache = self._basecache
1274 p1r, p2r = self.rev(p1), self.rev(p2)
1276 p1r, p2r = self.rev(p1), self.rev(p2)
1275
1277
1276 # should we try to build a delta?
1278 # should we try to build a delta?
1277 if prev != nullrev:
1279 if prev != nullrev:
1278 if self._generaldelta:
1280 if self._generaldelta:
1279 if p1r >= basecache[1]:
1281 if p1r >= basecache[1]:
1280 d = builddelta(p1r)
1282 d = builddelta(p1r)
1281 elif p2r >= basecache[1]:
1283 elif p2r >= basecache[1]:
1282 d = builddelta(p2r)
1284 d = builddelta(p2r)
1283 else:
1285 else:
1284 d = builddelta(prev)
1286 d = builddelta(prev)
1285 else:
1287 else:
1286 d = builddelta(prev)
1288 d = builddelta(prev)
1287 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1289 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1288
1290
1289 # full versions are inserted when the needed deltas
1291 # full versions are inserted when the needed deltas
1290 # become comparable to the uncompressed text
1292 # become comparable to the uncompressed text
1291 if text is None:
1293 if text is None:
1292 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1294 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1293 cachedelta[1])
1295 cachedelta[1])
1294 else:
1296 else:
1295 textlen = len(text)
1297 textlen = len(text)
1296
1298
1297 # - 'dist' is the distance from the base revision -- bounding it limits
1299 # - 'dist' is the distance from the base revision -- bounding it limits
1298 # the amount of I/O we need to do.
1300 # the amount of I/O we need to do.
1299 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1301 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1300 # to apply -- bounding it limits the amount of CPU we consume.
1302 # to apply -- bounding it limits the amount of CPU we consume.
1301 if (d is None or dist > textlen * 4 or l > textlen or
1303 if (d is None or dist > textlen * 4 or l > textlen or
1302 compresseddeltalen > textlen * 2 or
1304 compresseddeltalen > textlen * 2 or
1303 (self._maxchainlen and chainlen > self._maxchainlen)):
1305 (self._maxchainlen and chainlen > self._maxchainlen)):
1304 text = buildtext()
1306 text = buildtext()
1305 data = self.compress(text)
1307 data = self.compress(text)
1306 l = len(data[1]) + len(data[0])
1308 l = len(data[1]) + len(data[0])
1307 base = chainbase = curr
1309 base = chainbase = curr
1308
1310
1309 e = (offset_type(offset, flags), l, textlen,
1311 e = (offset_type(offset, flags), l, textlen,
1310 base, link, p1r, p2r, node)
1312 base, link, p1r, p2r, node)
1311 self.index.insert(-1, e)
1313 self.index.insert(-1, e)
1312 self.nodemap[node] = curr
1314 self.nodemap[node] = curr
1313
1315
1314 entry = self._io.packentry(e, self.node, self.version, curr)
1316 entry = self._io.packentry(e, self.node, self.version, curr)
1315 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1317 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1316
1318
1317 if type(text) == str: # only accept immutable objects
1319 if type(text) == str: # only accept immutable objects
1318 self._cache = (node, curr, text)
1320 self._cache = (node, curr, text)
1319 self._basecache = (curr, chainbase)
1321 self._basecache = (curr, chainbase)
1320 return node
1322 return node
1321
1323
1322 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1324 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1323 curr = len(self) - 1
1325 curr = len(self) - 1
1324 if not self._inline:
1326 if not self._inline:
1325 transaction.add(self.datafile, offset)
1327 transaction.add(self.datafile, offset)
1326 transaction.add(self.indexfile, curr * len(entry))
1328 transaction.add(self.indexfile, curr * len(entry))
1327 if data[0]:
1329 if data[0]:
1328 dfh.write(data[0])
1330 dfh.write(data[0])
1329 dfh.write(data[1])
1331 dfh.write(data[1])
1330 dfh.flush()
1332 dfh.flush()
1331 ifh.write(entry)
1333 ifh.write(entry)
1332 else:
1334 else:
1333 offset += curr * self._io.size
1335 offset += curr * self._io.size
1334 transaction.add(self.indexfile, offset, curr)
1336 transaction.add(self.indexfile, offset, curr)
1335 ifh.write(entry)
1337 ifh.write(entry)
1336 ifh.write(data[0])
1338 ifh.write(data[0])
1337 ifh.write(data[1])
1339 ifh.write(data[1])
1338 self.checkinlinesize(transaction, ifh)
1340 self.checkinlinesize(transaction, ifh)
1339
1341
1340 def addgroup(self, bundle, linkmapper, transaction):
1342 def addgroup(self, bundle, linkmapper, transaction):
1341 """
1343 """
1342 add a delta group
1344 add a delta group
1343
1345
1344 given a set of deltas, add them to the revision log. the
1346 given a set of deltas, add them to the revision log. the
1345 first delta is against its parent, which should be in our
1347 first delta is against its parent, which should be in our
1346 log, the rest are against the previous delta.
1348 log, the rest are against the previous delta.
1347 """
1349 """
1348
1350
1349 # track the base of the current delta log
1351 # track the base of the current delta log
1350 content = []
1352 content = []
1351 node = None
1353 node = None
1352
1354
1353 r = len(self)
1355 r = len(self)
1354 end = 0
1356 end = 0
1355 if r:
1357 if r:
1356 end = self.end(r - 1)
1358 end = self.end(r - 1)
1357 ifh = self.opener(self.indexfile, "a+")
1359 ifh = self.opener(self.indexfile, "a+")
1358 isize = r * self._io.size
1360 isize = r * self._io.size
1359 if self._inline:
1361 if self._inline:
1360 transaction.add(self.indexfile, end + isize, r)
1362 transaction.add(self.indexfile, end + isize, r)
1361 dfh = None
1363 dfh = None
1362 else:
1364 else:
1363 transaction.add(self.indexfile, isize, r)
1365 transaction.add(self.indexfile, isize, r)
1364 transaction.add(self.datafile, end)
1366 transaction.add(self.datafile, end)
1365 dfh = self.opener(self.datafile, "a")
1367 dfh = self.opener(self.datafile, "a")
1366
1368
1367 try:
1369 try:
1368 # loop through our set of deltas
1370 # loop through our set of deltas
1369 chain = None
1371 chain = None
1370 while True:
1372 while True:
1371 chunkdata = bundle.deltachunk(chain)
1373 chunkdata = bundle.deltachunk(chain)
1372 if not chunkdata:
1374 if not chunkdata:
1373 break
1375 break
1374 node = chunkdata['node']
1376 node = chunkdata['node']
1375 p1 = chunkdata['p1']
1377 p1 = chunkdata['p1']
1376 p2 = chunkdata['p2']
1378 p2 = chunkdata['p2']
1377 cs = chunkdata['cs']
1379 cs = chunkdata['cs']
1378 deltabase = chunkdata['deltabase']
1380 deltabase = chunkdata['deltabase']
1379 delta = chunkdata['delta']
1381 delta = chunkdata['delta']
1380
1382
1381 content.append(node)
1383 content.append(node)
1382
1384
1383 link = linkmapper(cs)
1385 link = linkmapper(cs)
1384 if node in self.nodemap:
1386 if node in self.nodemap:
1385 # this can happen if two branches make the same change
1387 # this can happen if two branches make the same change
1386 chain = node
1388 chain = node
1387 continue
1389 continue
1388
1390
1389 for p in (p1, p2):
1391 for p in (p1, p2):
1390 if p not in self.nodemap:
1392 if p not in self.nodemap:
1391 raise LookupError(p, self.indexfile,
1393 raise LookupError(p, self.indexfile,
1392 _('unknown parent'))
1394 _('unknown parent'))
1393
1395
1394 if deltabase not in self.nodemap:
1396 if deltabase not in self.nodemap:
1395 raise LookupError(deltabase, self.indexfile,
1397 raise LookupError(deltabase, self.indexfile,
1396 _('unknown delta base'))
1398 _('unknown delta base'))
1397
1399
1398 baserev = self.rev(deltabase)
1400 baserev = self.rev(deltabase)
1399 chain = self._addrevision(node, None, transaction, link,
1401 chain = self._addrevision(node, None, transaction, link,
1400 p1, p2, (baserev, delta), ifh, dfh)
1402 p1, p2, (baserev, delta), ifh, dfh)
1401 if not dfh and not self._inline:
1403 if not dfh and not self._inline:
1402 # addrevision switched from inline to conventional
1404 # addrevision switched from inline to conventional
1403 # reopen the index
1405 # reopen the index
1404 ifh.close()
1406 ifh.close()
1405 dfh = self.opener(self.datafile, "a")
1407 dfh = self.opener(self.datafile, "a")
1406 ifh = self.opener(self.indexfile, "a")
1408 ifh = self.opener(self.indexfile, "a")
1407 finally:
1409 finally:
1408 if dfh:
1410 if dfh:
1409 dfh.close()
1411 dfh.close()
1410 ifh.close()
1412 ifh.close()
1411
1413
1412 return content
1414 return content
1413
1415
1414 def getstrippoint(self, minlink):
1416 def getstrippoint(self, minlink):
1415 """find the minimum rev that must be stripped to strip the linkrev
1417 """find the minimum rev that must be stripped to strip the linkrev
1416
1418
1417 Returns a tuple containing the minimum rev and a set of all revs that
1419 Returns a tuple containing the minimum rev and a set of all revs that
1418 have linkrevs that will be broken by this strip.
1420 have linkrevs that will be broken by this strip.
1419 """
1421 """
1420 brokenrevs = set()
1422 brokenrevs = set()
1421 strippoint = len(self)
1423 strippoint = len(self)
1422
1424
1423 heads = {}
1425 heads = {}
1424 futurelargelinkrevs = set()
1426 futurelargelinkrevs = set()
1425 for head in self.headrevs():
1427 for head in self.headrevs():
1426 headlinkrev = self.linkrev(head)
1428 headlinkrev = self.linkrev(head)
1427 heads[head] = headlinkrev
1429 heads[head] = headlinkrev
1428 if headlinkrev >= minlink:
1430 if headlinkrev >= minlink:
1429 futurelargelinkrevs.add(headlinkrev)
1431 futurelargelinkrevs.add(headlinkrev)
1430
1432
1431 # This algorithm involves walking down the rev graph, starting at the
1433 # This algorithm involves walking down the rev graph, starting at the
1432 # heads. Since the revs are topologically sorted according to linkrev,
1434 # heads. Since the revs are topologically sorted according to linkrev,
1433 # once all head linkrevs are below the minlink, we know there are
1435 # once all head linkrevs are below the minlink, we know there are
1434 # no more revs that could have a linkrev greater than minlink.
1436 # no more revs that could have a linkrev greater than minlink.
1435 # So we can stop walking.
1437 # So we can stop walking.
1436 while futurelargelinkrevs:
1438 while futurelargelinkrevs:
1437 strippoint -= 1
1439 strippoint -= 1
1438 linkrev = heads.pop(strippoint)
1440 linkrev = heads.pop(strippoint)
1439
1441
1440 if linkrev < minlink:
1442 if linkrev < minlink:
1441 brokenrevs.add(strippoint)
1443 brokenrevs.add(strippoint)
1442 else:
1444 else:
1443 futurelargelinkrevs.remove(linkrev)
1445 futurelargelinkrevs.remove(linkrev)
1444
1446
1445 for p in self.parentrevs(strippoint):
1447 for p in self.parentrevs(strippoint):
1446 if p != nullrev:
1448 if p != nullrev:
1447 plinkrev = self.linkrev(p)
1449 plinkrev = self.linkrev(p)
1448 heads[p] = plinkrev
1450 heads[p] = plinkrev
1449 if plinkrev >= minlink:
1451 if plinkrev >= minlink:
1450 futurelargelinkrevs.add(plinkrev)
1452 futurelargelinkrevs.add(plinkrev)
1451
1453
1452 return strippoint, brokenrevs
1454 return strippoint, brokenrevs
1453
1455
1454 def strip(self, minlink, transaction):
1456 def strip(self, minlink, transaction):
1455 """truncate the revlog on the first revision with a linkrev >= minlink
1457 """truncate the revlog on the first revision with a linkrev >= minlink
1456
1458
1457 This function is called when we're stripping revision minlink and
1459 This function is called when we're stripping revision minlink and
1458 its descendants from the repository.
1460 its descendants from the repository.
1459
1461
1460 We have to remove all revisions with linkrev >= minlink, because
1462 We have to remove all revisions with linkrev >= minlink, because
1461 the equivalent changelog revisions will be renumbered after the
1463 the equivalent changelog revisions will be renumbered after the
1462 strip.
1464 strip.
1463
1465
1464 So we truncate the revlog on the first of these revisions, and
1466 So we truncate the revlog on the first of these revisions, and
1465 trust that the caller has saved the revisions that shouldn't be
1467 trust that the caller has saved the revisions that shouldn't be
1466 removed and that it'll re-add them after this truncation.
1468 removed and that it'll re-add them after this truncation.
1467 """
1469 """
1468 if len(self) == 0:
1470 if len(self) == 0:
1469 return
1471 return
1470
1472
1471 rev, _ = self.getstrippoint(minlink)
1473 rev, _ = self.getstrippoint(minlink)
1472 if rev == len(self):
1474 if rev == len(self):
1473 return
1475 return
1474
1476
1475 # first truncate the files on disk
1477 # first truncate the files on disk
1476 end = self.start(rev)
1478 end = self.start(rev)
1477 if not self._inline:
1479 if not self._inline:
1478 transaction.add(self.datafile, end)
1480 transaction.add(self.datafile, end)
1479 end = rev * self._io.size
1481 end = rev * self._io.size
1480 else:
1482 else:
1481 end += rev * self._io.size
1483 end += rev * self._io.size
1482
1484
1483 transaction.add(self.indexfile, end)
1485 transaction.add(self.indexfile, end)
1484
1486
1485 # then reset internal state in memory to forget those revisions
1487 # then reset internal state in memory to forget those revisions
1486 self._cache = None
1488 self._cache = None
1487 self._chaininfocache = {}
1489 self._chaininfocache = {}
1488 self._chunkclear()
1490 self._chunkclear()
1489 for x in xrange(rev, len(self)):
1491 for x in xrange(rev, len(self)):
1490 del self.nodemap[self.node(x)]
1492 del self.nodemap[self.node(x)]
1491
1493
1492 del self.index[rev:-1]
1494 del self.index[rev:-1]
1493
1495
1494 def checksize(self):
1496 def checksize(self):
1495 expected = 0
1497 expected = 0
1496 if len(self):
1498 if len(self):
1497 expected = max(0, self.end(len(self) - 1))
1499 expected = max(0, self.end(len(self) - 1))
1498
1500
1499 try:
1501 try:
1500 f = self.opener(self.datafile)
1502 f = self.opener(self.datafile)
1501 f.seek(0, 2)
1503 f.seek(0, 2)
1502 actual = f.tell()
1504 actual = f.tell()
1503 f.close()
1505 f.close()
1504 dd = actual - expected
1506 dd = actual - expected
1505 except IOError, inst:
1507 except IOError, inst:
1506 if inst.errno != errno.ENOENT:
1508 if inst.errno != errno.ENOENT:
1507 raise
1509 raise
1508 dd = 0
1510 dd = 0
1509
1511
1510 try:
1512 try:
1511 f = self.opener(self.indexfile)
1513 f = self.opener(self.indexfile)
1512 f.seek(0, 2)
1514 f.seek(0, 2)
1513 actual = f.tell()
1515 actual = f.tell()
1514 f.close()
1516 f.close()
1515 s = self._io.size
1517 s = self._io.size
1516 i = max(0, actual // s)
1518 i = max(0, actual // s)
1517 di = actual - (i * s)
1519 di = actual - (i * s)
1518 if self._inline:
1520 if self._inline:
1519 databytes = 0
1521 databytes = 0
1520 for r in self:
1522 for r in self:
1521 databytes += max(0, self.length(r))
1523 databytes += max(0, self.length(r))
1522 dd = 0
1524 dd = 0
1523 di = actual - len(self) * s - databytes
1525 di = actual - len(self) * s - databytes
1524 except IOError, inst:
1526 except IOError, inst:
1525 if inst.errno != errno.ENOENT:
1527 if inst.errno != errno.ENOENT:
1526 raise
1528 raise
1527 di = 0
1529 di = 0
1528
1530
1529 return (dd, di)
1531 return (dd, di)
1530
1532
1531 def files(self):
1533 def files(self):
1532 res = [self.indexfile]
1534 res = [self.indexfile]
1533 if not self._inline:
1535 if not self._inline:
1534 res.append(self.datafile)
1536 res.append(self.datafile)
1535 return res
1537 return res
General Comments 0
You need to be logged in to leave comments. Login now