##// END OF EJS Templates
revlog: support importing censored file revision tombstones...
Mike Edgar -
r22934:8a096d4d default
parent child Browse files
Show More
@@ -1,1465 +1,1469 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45
46
46 def getoffset(q):
47 def getoffset(q):
47 return int(q >> 16)
48 return int(q >> 16)
48
49
49 def gettype(q):
50 def gettype(q):
50 return int(q & 0xFFFF)
51 return int(q & 0xFFFF)
51
52
52 def offset_type(offset, type):
53 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
54 return long(long(offset) << 16 | type)
54
55
55 _nullhash = _sha(nullid)
56 _nullhash = _sha(nullid)
56
57
57 def hash(text, p1, p2):
58 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
59 """generate a hash from the given text and its parent hashes
59
60
60 This hash combines both the current file contents and its history
61 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
62 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
63 content in the revision graph.
63 """
64 """
64 # As of now, if one of the parent node is null, p2 is null
65 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
66 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
67 # deep copy of a hash is faster than creating one
67 s = _nullhash.copy()
68 s = _nullhash.copy()
68 s.update(p1)
69 s.update(p1)
69 else:
70 else:
70 # none of the parent nodes are nullid
71 # none of the parent nodes are nullid
71 l = [p1, p2]
72 l = [p1, p2]
72 l.sort()
73 l.sort()
73 s = _sha(l[0])
74 s = _sha(l[0])
74 s.update(l[1])
75 s.update(l[1])
75 s.update(text)
76 s.update(text)
76 return s.digest()
77 return s.digest()
77
78
78 def decompress(bin):
79 def decompress(bin):
79 """ decompress the given input """
80 """ decompress the given input """
80 if not bin:
81 if not bin:
81 return bin
82 return bin
82 t = bin[0]
83 t = bin[0]
83 if t == '\0':
84 if t == '\0':
84 return bin
85 return bin
85 if t == 'x':
86 if t == 'x':
86 try:
87 try:
87 return _decompress(bin)
88 return _decompress(bin)
88 except zlib.error, e:
89 except zlib.error, e:
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 if t == 'u':
91 if t == 'u':
91 return bin[1:]
92 return bin[1:]
92 raise RevlogError(_("unknown compression type %r") % t)
93 raise RevlogError(_("unknown compression type %r") % t)
93
94
94 # index v0:
95 # index v0:
95 # 4 bytes: offset
96 # 4 bytes: offset
96 # 4 bytes: compressed length
97 # 4 bytes: compressed length
97 # 4 bytes: base rev
98 # 4 bytes: base rev
98 # 4 bytes: link rev
99 # 4 bytes: link rev
99 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 2 nodeid
101 # 32 bytes: parent 2 nodeid
101 # 32 bytes: nodeid
102 # 32 bytes: nodeid
102 indexformatv0 = ">4l20s20s20s"
103 indexformatv0 = ">4l20s20s20s"
103 v0shaoffset = 56
104 v0shaoffset = 56
104
105
105 class revlogoldio(object):
106 class revlogoldio(object):
106 def __init__(self):
107 def __init__(self):
107 self.size = struct.calcsize(indexformatv0)
108 self.size = struct.calcsize(indexformatv0)
108
109
109 def parseindex(self, data, inline):
110 def parseindex(self, data, inline):
110 s = self.size
111 s = self.size
111 index = []
112 index = []
112 nodemap = {nullid: nullrev}
113 nodemap = {nullid: nullrev}
113 n = off = 0
114 n = off = 0
114 l = len(data)
115 l = len(data)
115 while off + s <= l:
116 while off + s <= l:
116 cur = data[off:off + s]
117 cur = data[off:off + s]
117 off += s
118 off += s
118 e = _unpack(indexformatv0, cur)
119 e = _unpack(indexformatv0, cur)
119 # transform to revlogv1 format
120 # transform to revlogv1 format
120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 index.append(e2)
123 index.append(e2)
123 nodemap[e[6]] = n
124 nodemap[e[6]] = n
124 n += 1
125 n += 1
125
126
126 # add the magic null revision at -1
127 # add the magic null revision at -1
127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128
129
129 return index, nodemap, None
130 return index, nodemap, None
130
131
131 def packentry(self, entry, node, version, rev):
132 def packentry(self, entry, node, version, rev):
132 if gettype(entry[0]):
133 if gettype(entry[0]):
133 raise RevlogError(_("index entry flags need RevlogNG"))
134 raise RevlogError(_("index entry flags need RevlogNG"))
134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 node(entry[5]), node(entry[6]), entry[7])
136 node(entry[5]), node(entry[6]), entry[7])
136 return _pack(indexformatv0, *e2)
137 return _pack(indexformatv0, *e2)
137
138
138 # index ng:
139 # index ng:
139 # 6 bytes: offset
140 # 6 bytes: offset
140 # 2 bytes: flags
141 # 2 bytes: flags
141 # 4 bytes: compressed length
142 # 4 bytes: compressed length
142 # 4 bytes: uncompressed length
143 # 4 bytes: uncompressed length
143 # 4 bytes: base rev
144 # 4 bytes: base rev
144 # 4 bytes: link rev
145 # 4 bytes: link rev
145 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 2 rev
147 # 4 bytes: parent 2 rev
147 # 32 bytes: nodeid
148 # 32 bytes: nodeid
148 indexformatng = ">Qiiiiii20s12x"
149 indexformatng = ">Qiiiiii20s12x"
149 ngshaoffset = 32
150 ngshaoffset = 32
150 versionformat = ">I"
151 versionformat = ">I"
151
152
152 class revlogio(object):
153 class revlogio(object):
153 def __init__(self):
154 def __init__(self):
154 self.size = struct.calcsize(indexformatng)
155 self.size = struct.calcsize(indexformatng)
155
156
156 def parseindex(self, data, inline):
157 def parseindex(self, data, inline):
157 # call the C implementation to parse the index data
158 # call the C implementation to parse the index data
158 index, cache = parsers.parse_index2(data, inline)
159 index, cache = parsers.parse_index2(data, inline)
159 return index, getattr(index, 'nodemap', None), cache
160 return index, getattr(index, 'nodemap', None), cache
160
161
161 def packentry(self, entry, node, version, rev):
162 def packentry(self, entry, node, version, rev):
162 p = _pack(indexformatng, *entry)
163 p = _pack(indexformatng, *entry)
163 if rev == 0:
164 if rev == 0:
164 p = _pack(versionformat, version) + p[4:]
165 p = _pack(versionformat, version) + p[4:]
165 return p
166 return p
166
167
167 class revlog(object):
168 class revlog(object):
168 """
169 """
169 the underlying revision storage object
170 the underlying revision storage object
170
171
171 A revlog consists of two parts, an index and the revision data.
172 A revlog consists of two parts, an index and the revision data.
172
173
173 The index is a file with a fixed record size containing
174 The index is a file with a fixed record size containing
174 information on each revision, including its nodeid (hash), the
175 information on each revision, including its nodeid (hash), the
175 nodeids of its parents, the position and offset of its data within
176 nodeids of its parents, the position and offset of its data within
176 the data file, and the revision it's based on. Finally, each entry
177 the data file, and the revision it's based on. Finally, each entry
177 contains a linkrev entry that can serve as a pointer to external
178 contains a linkrev entry that can serve as a pointer to external
178 data.
179 data.
179
180
180 The revision data itself is a linear collection of data chunks.
181 The revision data itself is a linear collection of data chunks.
181 Each chunk represents a revision and is usually represented as a
182 Each chunk represents a revision and is usually represented as a
182 delta against the previous chunk. To bound lookup time, runs of
183 delta against the previous chunk. To bound lookup time, runs of
183 deltas are limited to about 2 times the length of the original
184 deltas are limited to about 2 times the length of the original
184 version data. This makes retrieval of a version proportional to
185 version data. This makes retrieval of a version proportional to
185 its size, or O(1) relative to the number of revisions.
186 its size, or O(1) relative to the number of revisions.
186
187
187 Both pieces of the revlog are written to in an append-only
188 Both pieces of the revlog are written to in an append-only
188 fashion, which means we never need to rewrite a file to insert or
189 fashion, which means we never need to rewrite a file to insert or
189 remove data, and can use some simple techniques to avoid the need
190 remove data, and can use some simple techniques to avoid the need
190 for locking while reading.
191 for locking while reading.
191 """
192 """
192 def __init__(self, opener, indexfile):
193 def __init__(self, opener, indexfile):
193 """
194 """
194 create a revlog object
195 create a revlog object
195
196
196 opener is a function that abstracts the file opening operation
197 opener is a function that abstracts the file opening operation
197 and can be used to implement COW semantics or the like.
198 and can be used to implement COW semantics or the like.
198 """
199 """
199 self.indexfile = indexfile
200 self.indexfile = indexfile
200 self.datafile = indexfile[:-2] + ".d"
201 self.datafile = indexfile[:-2] + ".d"
201 self.opener = opener
202 self.opener = opener
202 self._cache = None
203 self._cache = None
203 self._basecache = None
204 self._basecache = None
204 self._chunkcache = (0, '')
205 self._chunkcache = (0, '')
205 self._chunkcachesize = 65536
206 self._chunkcachesize = 65536
206 self.index = []
207 self.index = []
207 self._pcache = {}
208 self._pcache = {}
208 self._nodecache = {nullid: nullrev}
209 self._nodecache = {nullid: nullrev}
209 self._nodepos = None
210 self._nodepos = None
210
211
211 v = REVLOG_DEFAULT_VERSION
212 v = REVLOG_DEFAULT_VERSION
212 opts = getattr(opener, 'options', None)
213 opts = getattr(opener, 'options', None)
213 if opts is not None:
214 if opts is not None:
214 if 'revlogv1' in opts:
215 if 'revlogv1' in opts:
215 if 'generaldelta' in opts:
216 if 'generaldelta' in opts:
216 v |= REVLOGGENERALDELTA
217 v |= REVLOGGENERALDELTA
217 else:
218 else:
218 v = 0
219 v = 0
219 if 'chunkcachesize' in opts:
220 if 'chunkcachesize' in opts:
220 self._chunkcachesize = opts['chunkcachesize']
221 self._chunkcachesize = opts['chunkcachesize']
221
222
222 if self._chunkcachesize <= 0:
223 if self._chunkcachesize <= 0:
223 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 raise RevlogError(_('revlog chunk cache size %r is not greater '
224 'than 0') % self._chunkcachesize)
225 'than 0') % self._chunkcachesize)
225 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 elif self._chunkcachesize & (self._chunkcachesize - 1):
226 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 raise RevlogError(_('revlog chunk cache size %r is not a power '
227 'of 2') % self._chunkcachesize)
228 'of 2') % self._chunkcachesize)
228
229
229 i = ''
230 i = ''
230 self._initempty = True
231 self._initempty = True
231 try:
232 try:
232 f = self.opener(self.indexfile)
233 f = self.opener(self.indexfile)
233 i = f.read()
234 i = f.read()
234 f.close()
235 f.close()
235 if len(i) > 0:
236 if len(i) > 0:
236 v = struct.unpack(versionformat, i[:4])[0]
237 v = struct.unpack(versionformat, i[:4])[0]
237 self._initempty = False
238 self._initempty = False
238 except IOError, inst:
239 except IOError, inst:
239 if inst.errno != errno.ENOENT:
240 if inst.errno != errno.ENOENT:
240 raise
241 raise
241
242
242 self.version = v
243 self.version = v
243 self._inline = v & REVLOGNGINLINEDATA
244 self._inline = v & REVLOGNGINLINEDATA
244 self._generaldelta = v & REVLOGGENERALDELTA
245 self._generaldelta = v & REVLOGGENERALDELTA
245 flags = v & ~0xFFFF
246 flags = v & ~0xFFFF
246 fmt = v & 0xFFFF
247 fmt = v & 0xFFFF
247 if fmt == REVLOGV0 and flags:
248 if fmt == REVLOGV0 and flags:
248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 raise RevlogError(_("index %s unknown flags %#04x for format v0")
249 % (self.indexfile, flags >> 16))
250 % (self.indexfile, flags >> 16))
250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
252 % (self.indexfile, flags >> 16))
253 % (self.indexfile, flags >> 16))
253 elif fmt > REVLOGNG:
254 elif fmt > REVLOGNG:
254 raise RevlogError(_("index %s unknown format %d")
255 raise RevlogError(_("index %s unknown format %d")
255 % (self.indexfile, fmt))
256 % (self.indexfile, fmt))
256
257
257 self._io = revlogio()
258 self._io = revlogio()
258 if self.version == REVLOGV0:
259 if self.version == REVLOGV0:
259 self._io = revlogoldio()
260 self._io = revlogoldio()
260 try:
261 try:
261 d = self._io.parseindex(i, self._inline)
262 d = self._io.parseindex(i, self._inline)
262 except (ValueError, IndexError):
263 except (ValueError, IndexError):
263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
264 self.index, nodemap, self._chunkcache = d
265 self.index, nodemap, self._chunkcache = d
265 if nodemap is not None:
266 if nodemap is not None:
266 self.nodemap = self._nodecache = nodemap
267 self.nodemap = self._nodecache = nodemap
267 if not self._chunkcache:
268 if not self._chunkcache:
268 self._chunkclear()
269 self._chunkclear()
269
270
270 def tip(self):
271 def tip(self):
271 return self.node(len(self.index) - 2)
272 return self.node(len(self.index) - 2)
272 def __len__(self):
273 def __len__(self):
273 return len(self.index) - 1
274 return len(self.index) - 1
274 def __iter__(self):
275 def __iter__(self):
275 return iter(xrange(len(self)))
276 return iter(xrange(len(self)))
276 def revs(self, start=0, stop=None):
277 def revs(self, start=0, stop=None):
277 """iterate over all rev in this revlog (from start to stop)"""
278 """iterate over all rev in this revlog (from start to stop)"""
278 step = 1
279 step = 1
279 if stop is not None:
280 if stop is not None:
280 if start > stop:
281 if start > stop:
281 step = -1
282 step = -1
282 stop += step
283 stop += step
283 else:
284 else:
284 stop = len(self)
285 stop = len(self)
285 return xrange(start, stop, step)
286 return xrange(start, stop, step)
286
287
287 @util.propertycache
288 @util.propertycache
288 def nodemap(self):
289 def nodemap(self):
289 self.rev(self.node(0))
290 self.rev(self.node(0))
290 return self._nodecache
291 return self._nodecache
291
292
292 def hasnode(self, node):
293 def hasnode(self, node):
293 try:
294 try:
294 self.rev(node)
295 self.rev(node)
295 return True
296 return True
296 except KeyError:
297 except KeyError:
297 return False
298 return False
298
299
299 def clearcaches(self):
300 def clearcaches(self):
300 try:
301 try:
301 self._nodecache.clearcaches()
302 self._nodecache.clearcaches()
302 except AttributeError:
303 except AttributeError:
303 self._nodecache = {nullid: nullrev}
304 self._nodecache = {nullid: nullrev}
304 self._nodepos = None
305 self._nodepos = None
305
306
306 def rev(self, node):
307 def rev(self, node):
307 try:
308 try:
308 return self._nodecache[node]
309 return self._nodecache[node]
309 except TypeError:
310 except TypeError:
310 raise
311 raise
311 except RevlogError:
312 except RevlogError:
312 # parsers.c radix tree lookup failed
313 # parsers.c radix tree lookup failed
313 raise LookupError(node, self.indexfile, _('no node'))
314 raise LookupError(node, self.indexfile, _('no node'))
314 except KeyError:
315 except KeyError:
315 # pure python cache lookup failed
316 # pure python cache lookup failed
316 n = self._nodecache
317 n = self._nodecache
317 i = self.index
318 i = self.index
318 p = self._nodepos
319 p = self._nodepos
319 if p is None:
320 if p is None:
320 p = len(i) - 2
321 p = len(i) - 2
321 for r in xrange(p, -1, -1):
322 for r in xrange(p, -1, -1):
322 v = i[r][7]
323 v = i[r][7]
323 n[v] = r
324 n[v] = r
324 if v == node:
325 if v == node:
325 self._nodepos = r - 1
326 self._nodepos = r - 1
326 return r
327 return r
327 raise LookupError(node, self.indexfile, _('no node'))
328 raise LookupError(node, self.indexfile, _('no node'))
328
329
329 def node(self, rev):
330 def node(self, rev):
330 return self.index[rev][7]
331 return self.index[rev][7]
331 def linkrev(self, rev):
332 def linkrev(self, rev):
332 return self.index[rev][4]
333 return self.index[rev][4]
333 def parents(self, node):
334 def parents(self, node):
334 i = self.index
335 i = self.index
335 d = i[self.rev(node)]
336 d = i[self.rev(node)]
336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 def parentrevs(self, rev):
338 def parentrevs(self, rev):
338 return self.index[rev][5:7]
339 return self.index[rev][5:7]
339 def start(self, rev):
340 def start(self, rev):
340 return int(self.index[rev][0] >> 16)
341 return int(self.index[rev][0] >> 16)
341 def end(self, rev):
342 def end(self, rev):
342 return self.start(rev) + self.length(rev)
343 return self.start(rev) + self.length(rev)
343 def length(self, rev):
344 def length(self, rev):
344 return self.index[rev][1]
345 return self.index[rev][1]
345 def chainbase(self, rev):
346 def chainbase(self, rev):
346 index = self.index
347 index = self.index
347 base = index[rev][3]
348 base = index[rev][3]
348 while base != rev:
349 while base != rev:
349 rev = base
350 rev = base
350 base = index[rev][3]
351 base = index[rev][3]
351 return base
352 return base
352 def flags(self, rev):
353 def flags(self, rev):
353 return self.index[rev][0] & 0xFFFF
354 return self.index[rev][0] & 0xFFFF
354 def rawsize(self, rev):
355 def rawsize(self, rev):
355 """return the length of the uncompressed text for a given revision"""
356 """return the length of the uncompressed text for a given revision"""
356 l = self.index[rev][2]
357 l = self.index[rev][2]
357 if l >= 0:
358 if l >= 0:
358 return l
359 return l
359
360
360 t = self.revision(self.node(rev))
361 t = self.revision(self.node(rev))
361 return len(t)
362 return len(t)
362 size = rawsize
363 size = rawsize
363
364
364 def ancestors(self, revs, stoprev=0, inclusive=False):
365 def ancestors(self, revs, stoprev=0, inclusive=False):
365 """Generate the ancestors of 'revs' in reverse topological order.
366 """Generate the ancestors of 'revs' in reverse topological order.
366 Does not generate revs lower than stoprev.
367 Does not generate revs lower than stoprev.
367
368
368 See the documentation for ancestor.lazyancestors for more details."""
369 See the documentation for ancestor.lazyancestors for more details."""
369
370
370 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
371 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
371 inclusive=inclusive)
372 inclusive=inclusive)
372
373
373 def descendants(self, revs):
374 def descendants(self, revs):
374 """Generate the descendants of 'revs' in revision order.
375 """Generate the descendants of 'revs' in revision order.
375
376
376 Yield a sequence of revision numbers starting with a child of
377 Yield a sequence of revision numbers starting with a child of
377 some rev in revs, i.e., each revision is *not* considered a
378 some rev in revs, i.e., each revision is *not* considered a
378 descendant of itself. Results are ordered by revision number (a
379 descendant of itself. Results are ordered by revision number (a
379 topological sort)."""
380 topological sort)."""
380 first = min(revs)
381 first = min(revs)
381 if first == nullrev:
382 if first == nullrev:
382 for i in self:
383 for i in self:
383 yield i
384 yield i
384 return
385 return
385
386
386 seen = set(revs)
387 seen = set(revs)
387 for i in self.revs(start=first + 1):
388 for i in self.revs(start=first + 1):
388 for x in self.parentrevs(i):
389 for x in self.parentrevs(i):
389 if x != nullrev and x in seen:
390 if x != nullrev and x in seen:
390 seen.add(i)
391 seen.add(i)
391 yield i
392 yield i
392 break
393 break
393
394
394 def findcommonmissing(self, common=None, heads=None):
395 def findcommonmissing(self, common=None, heads=None):
395 """Return a tuple of the ancestors of common and the ancestors of heads
396 """Return a tuple of the ancestors of common and the ancestors of heads
396 that are not ancestors of common. In revset terminology, we return the
397 that are not ancestors of common. In revset terminology, we return the
397 tuple:
398 tuple:
398
399
399 ::common, (::heads) - (::common)
400 ::common, (::heads) - (::common)
400
401
401 The list is sorted by revision number, meaning it is
402 The list is sorted by revision number, meaning it is
402 topologically sorted.
403 topologically sorted.
403
404
404 'heads' and 'common' are both lists of node IDs. If heads is
405 'heads' and 'common' are both lists of node IDs. If heads is
405 not supplied, uses all of the revlog's heads. If common is not
406 not supplied, uses all of the revlog's heads. If common is not
406 supplied, uses nullid."""
407 supplied, uses nullid."""
407 if common is None:
408 if common is None:
408 common = [nullid]
409 common = [nullid]
409 if heads is None:
410 if heads is None:
410 heads = self.heads()
411 heads = self.heads()
411
412
412 common = [self.rev(n) for n in common]
413 common = [self.rev(n) for n in common]
413 heads = [self.rev(n) for n in heads]
414 heads = [self.rev(n) for n in heads]
414
415
415 # we want the ancestors, but inclusive
416 # we want the ancestors, but inclusive
416 class lazyset(object):
417 class lazyset(object):
417 def __init__(self, lazyvalues):
418 def __init__(self, lazyvalues):
418 self.addedvalues = set()
419 self.addedvalues = set()
419 self.lazyvalues = lazyvalues
420 self.lazyvalues = lazyvalues
420
421
421 def __contains__(self, value):
422 def __contains__(self, value):
422 return value in self.addedvalues or value in self.lazyvalues
423 return value in self.addedvalues or value in self.lazyvalues
423
424
424 def __iter__(self):
425 def __iter__(self):
425 added = self.addedvalues
426 added = self.addedvalues
426 for r in added:
427 for r in added:
427 yield r
428 yield r
428 for r in self.lazyvalues:
429 for r in self.lazyvalues:
429 if not r in added:
430 if not r in added:
430 yield r
431 yield r
431
432
432 def add(self, value):
433 def add(self, value):
433 self.addedvalues.add(value)
434 self.addedvalues.add(value)
434
435
435 def update(self, values):
436 def update(self, values):
436 self.addedvalues.update(values)
437 self.addedvalues.update(values)
437
438
438 has = lazyset(self.ancestors(common))
439 has = lazyset(self.ancestors(common))
439 has.add(nullrev)
440 has.add(nullrev)
440 has.update(common)
441 has.update(common)
441
442
442 # take all ancestors from heads that aren't in has
443 # take all ancestors from heads that aren't in has
443 missing = set()
444 missing = set()
444 visit = util.deque(r for r in heads if r not in has)
445 visit = util.deque(r for r in heads if r not in has)
445 while visit:
446 while visit:
446 r = visit.popleft()
447 r = visit.popleft()
447 if r in missing:
448 if r in missing:
448 continue
449 continue
449 else:
450 else:
450 missing.add(r)
451 missing.add(r)
451 for p in self.parentrevs(r):
452 for p in self.parentrevs(r):
452 if p not in has:
453 if p not in has:
453 visit.append(p)
454 visit.append(p)
454 missing = list(missing)
455 missing = list(missing)
455 missing.sort()
456 missing.sort()
456 return has, [self.node(r) for r in missing]
457 return has, [self.node(r) for r in missing]
457
458
458 def findmissingrevs(self, common=None, heads=None):
459 def findmissingrevs(self, common=None, heads=None):
459 """Return the revision numbers of the ancestors of heads that
460 """Return the revision numbers of the ancestors of heads that
460 are not ancestors of common.
461 are not ancestors of common.
461
462
462 More specifically, return a list of revision numbers corresponding to
463 More specifically, return a list of revision numbers corresponding to
463 nodes N such that every N satisfies the following constraints:
464 nodes N such that every N satisfies the following constraints:
464
465
465 1. N is an ancestor of some node in 'heads'
466 1. N is an ancestor of some node in 'heads'
466 2. N is not an ancestor of any node in 'common'
467 2. N is not an ancestor of any node in 'common'
467
468
468 The list is sorted by revision number, meaning it is
469 The list is sorted by revision number, meaning it is
469 topologically sorted.
470 topologically sorted.
470
471
471 'heads' and 'common' are both lists of revision numbers. If heads is
472 'heads' and 'common' are both lists of revision numbers. If heads is
472 not supplied, uses all of the revlog's heads. If common is not
473 not supplied, uses all of the revlog's heads. If common is not
473 supplied, uses nullid."""
474 supplied, uses nullid."""
474 if common is None:
475 if common is None:
475 common = [nullrev]
476 common = [nullrev]
476 if heads is None:
477 if heads is None:
477 heads = self.headrevs()
478 heads = self.headrevs()
478
479
479 return ancestor.missingancestors(heads, common, self.parentrevs)
480 return ancestor.missingancestors(heads, common, self.parentrevs)
480
481
481 def findmissing(self, common=None, heads=None):
482 def findmissing(self, common=None, heads=None):
482 """Return the ancestors of heads that are not ancestors of common.
483 """Return the ancestors of heads that are not ancestors of common.
483
484
484 More specifically, return a list of nodes N such that every N
485 More specifically, return a list of nodes N such that every N
485 satisfies the following constraints:
486 satisfies the following constraints:
486
487
487 1. N is an ancestor of some node in 'heads'
488 1. N is an ancestor of some node in 'heads'
488 2. N is not an ancestor of any node in 'common'
489 2. N is not an ancestor of any node in 'common'
489
490
490 The list is sorted by revision number, meaning it is
491 The list is sorted by revision number, meaning it is
491 topologically sorted.
492 topologically sorted.
492
493
493 'heads' and 'common' are both lists of node IDs. If heads is
494 'heads' and 'common' are both lists of node IDs. If heads is
494 not supplied, uses all of the revlog's heads. If common is not
495 not supplied, uses all of the revlog's heads. If common is not
495 supplied, uses nullid."""
496 supplied, uses nullid."""
496 if common is None:
497 if common is None:
497 common = [nullid]
498 common = [nullid]
498 if heads is None:
499 if heads is None:
499 heads = self.heads()
500 heads = self.heads()
500
501
501 common = [self.rev(n) for n in common]
502 common = [self.rev(n) for n in common]
502 heads = [self.rev(n) for n in heads]
503 heads = [self.rev(n) for n in heads]
503
504
504 return [self.node(r) for r in
505 return [self.node(r) for r in
505 ancestor.missingancestors(heads, common, self.parentrevs)]
506 ancestor.missingancestors(heads, common, self.parentrevs)]
506
507
507 def nodesbetween(self, roots=None, heads=None):
508 def nodesbetween(self, roots=None, heads=None):
508 """Return a topological path from 'roots' to 'heads'.
509 """Return a topological path from 'roots' to 'heads'.
509
510
510 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
511 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
511 topologically sorted list of all nodes N that satisfy both of
512 topologically sorted list of all nodes N that satisfy both of
512 these constraints:
513 these constraints:
513
514
514 1. N is a descendant of some node in 'roots'
515 1. N is a descendant of some node in 'roots'
515 2. N is an ancestor of some node in 'heads'
516 2. N is an ancestor of some node in 'heads'
516
517
517 Every node is considered to be both a descendant and an ancestor
518 Every node is considered to be both a descendant and an ancestor
518 of itself, so every reachable node in 'roots' and 'heads' will be
519 of itself, so every reachable node in 'roots' and 'heads' will be
519 included in 'nodes'.
520 included in 'nodes'.
520
521
521 'outroots' is the list of reachable nodes in 'roots', i.e., the
522 'outroots' is the list of reachable nodes in 'roots', i.e., the
522 subset of 'roots' that is returned in 'nodes'. Likewise,
523 subset of 'roots' that is returned in 'nodes'. Likewise,
523 'outheads' is the subset of 'heads' that is also in 'nodes'.
524 'outheads' is the subset of 'heads' that is also in 'nodes'.
524
525
525 'roots' and 'heads' are both lists of node IDs. If 'roots' is
526 'roots' and 'heads' are both lists of node IDs. If 'roots' is
526 unspecified, uses nullid as the only root. If 'heads' is
527 unspecified, uses nullid as the only root. If 'heads' is
527 unspecified, uses list of all of the revlog's heads."""
528 unspecified, uses list of all of the revlog's heads."""
528 nonodes = ([], [], [])
529 nonodes = ([], [], [])
529 if roots is not None:
530 if roots is not None:
530 roots = list(roots)
531 roots = list(roots)
531 if not roots:
532 if not roots:
532 return nonodes
533 return nonodes
533 lowestrev = min([self.rev(n) for n in roots])
534 lowestrev = min([self.rev(n) for n in roots])
534 else:
535 else:
535 roots = [nullid] # Everybody's a descendant of nullid
536 roots = [nullid] # Everybody's a descendant of nullid
536 lowestrev = nullrev
537 lowestrev = nullrev
537 if (lowestrev == nullrev) and (heads is None):
538 if (lowestrev == nullrev) and (heads is None):
538 # We want _all_ the nodes!
539 # We want _all_ the nodes!
539 return ([self.node(r) for r in self], [nullid], list(self.heads()))
540 return ([self.node(r) for r in self], [nullid], list(self.heads()))
540 if heads is None:
541 if heads is None:
541 # All nodes are ancestors, so the latest ancestor is the last
542 # All nodes are ancestors, so the latest ancestor is the last
542 # node.
543 # node.
543 highestrev = len(self) - 1
544 highestrev = len(self) - 1
544 # Set ancestors to None to signal that every node is an ancestor.
545 # Set ancestors to None to signal that every node is an ancestor.
545 ancestors = None
546 ancestors = None
546 # Set heads to an empty dictionary for later discovery of heads
547 # Set heads to an empty dictionary for later discovery of heads
547 heads = {}
548 heads = {}
548 else:
549 else:
549 heads = list(heads)
550 heads = list(heads)
550 if not heads:
551 if not heads:
551 return nonodes
552 return nonodes
552 ancestors = set()
553 ancestors = set()
553 # Turn heads into a dictionary so we can remove 'fake' heads.
554 # Turn heads into a dictionary so we can remove 'fake' heads.
554 # Also, later we will be using it to filter out the heads we can't
555 # Also, later we will be using it to filter out the heads we can't
555 # find from roots.
556 # find from roots.
556 heads = dict.fromkeys(heads, False)
557 heads = dict.fromkeys(heads, False)
557 # Start at the top and keep marking parents until we're done.
558 # Start at the top and keep marking parents until we're done.
558 nodestotag = set(heads)
559 nodestotag = set(heads)
559 # Remember where the top was so we can use it as a limit later.
560 # Remember where the top was so we can use it as a limit later.
560 highestrev = max([self.rev(n) for n in nodestotag])
561 highestrev = max([self.rev(n) for n in nodestotag])
561 while nodestotag:
562 while nodestotag:
562 # grab a node to tag
563 # grab a node to tag
563 n = nodestotag.pop()
564 n = nodestotag.pop()
564 # Never tag nullid
565 # Never tag nullid
565 if n == nullid:
566 if n == nullid:
566 continue
567 continue
567 # A node's revision number represents its place in a
568 # A node's revision number represents its place in a
568 # topologically sorted list of nodes.
569 # topologically sorted list of nodes.
569 r = self.rev(n)
570 r = self.rev(n)
570 if r >= lowestrev:
571 if r >= lowestrev:
571 if n not in ancestors:
572 if n not in ancestors:
572 # If we are possibly a descendant of one of the roots
573 # If we are possibly a descendant of one of the roots
573 # and we haven't already been marked as an ancestor
574 # and we haven't already been marked as an ancestor
574 ancestors.add(n) # Mark as ancestor
575 ancestors.add(n) # Mark as ancestor
575 # Add non-nullid parents to list of nodes to tag.
576 # Add non-nullid parents to list of nodes to tag.
576 nodestotag.update([p for p in self.parents(n) if
577 nodestotag.update([p for p in self.parents(n) if
577 p != nullid])
578 p != nullid])
578 elif n in heads: # We've seen it before, is it a fake head?
579 elif n in heads: # We've seen it before, is it a fake head?
579 # So it is, real heads should not be the ancestors of
580 # So it is, real heads should not be the ancestors of
580 # any other heads.
581 # any other heads.
581 heads.pop(n)
582 heads.pop(n)
582 if not ancestors:
583 if not ancestors:
583 return nonodes
584 return nonodes
584 # Now that we have our set of ancestors, we want to remove any
585 # Now that we have our set of ancestors, we want to remove any
585 # roots that are not ancestors.
586 # roots that are not ancestors.
586
587
587 # If one of the roots was nullid, everything is included anyway.
588 # If one of the roots was nullid, everything is included anyway.
588 if lowestrev > nullrev:
589 if lowestrev > nullrev:
589 # But, since we weren't, let's recompute the lowest rev to not
590 # But, since we weren't, let's recompute the lowest rev to not
590 # include roots that aren't ancestors.
591 # include roots that aren't ancestors.
591
592
592 # Filter out roots that aren't ancestors of heads
593 # Filter out roots that aren't ancestors of heads
593 roots = [n for n in roots if n in ancestors]
594 roots = [n for n in roots if n in ancestors]
594 # Recompute the lowest revision
595 # Recompute the lowest revision
595 if roots:
596 if roots:
596 lowestrev = min([self.rev(n) for n in roots])
597 lowestrev = min([self.rev(n) for n in roots])
597 else:
598 else:
598 # No more roots? Return empty list
599 # No more roots? Return empty list
599 return nonodes
600 return nonodes
600 else:
601 else:
601 # We are descending from nullid, and don't need to care about
602 # We are descending from nullid, and don't need to care about
602 # any other roots.
603 # any other roots.
603 lowestrev = nullrev
604 lowestrev = nullrev
604 roots = [nullid]
605 roots = [nullid]
605 # Transform our roots list into a set.
606 # Transform our roots list into a set.
606 descendants = set(roots)
607 descendants = set(roots)
607 # Also, keep the original roots so we can filter out roots that aren't
608 # Also, keep the original roots so we can filter out roots that aren't
608 # 'real' roots (i.e. are descended from other roots).
609 # 'real' roots (i.e. are descended from other roots).
609 roots = descendants.copy()
610 roots = descendants.copy()
610 # Our topologically sorted list of output nodes.
611 # Our topologically sorted list of output nodes.
611 orderedout = []
612 orderedout = []
612 # Don't start at nullid since we don't want nullid in our output list,
613 # Don't start at nullid since we don't want nullid in our output list,
613 # and if nullid shows up in descendants, empty parents will look like
614 # and if nullid shows up in descendants, empty parents will look like
614 # they're descendants.
615 # they're descendants.
615 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
616 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
616 n = self.node(r)
617 n = self.node(r)
617 isdescendant = False
618 isdescendant = False
618 if lowestrev == nullrev: # Everybody is a descendant of nullid
619 if lowestrev == nullrev: # Everybody is a descendant of nullid
619 isdescendant = True
620 isdescendant = True
620 elif n in descendants:
621 elif n in descendants:
621 # n is already a descendant
622 # n is already a descendant
622 isdescendant = True
623 isdescendant = True
623 # This check only needs to be done here because all the roots
624 # This check only needs to be done here because all the roots
624 # will start being marked is descendants before the loop.
625 # will start being marked is descendants before the loop.
625 if n in roots:
626 if n in roots:
626 # If n was a root, check if it's a 'real' root.
627 # If n was a root, check if it's a 'real' root.
627 p = tuple(self.parents(n))
628 p = tuple(self.parents(n))
628 # If any of its parents are descendants, it's not a root.
629 # If any of its parents are descendants, it's not a root.
629 if (p[0] in descendants) or (p[1] in descendants):
630 if (p[0] in descendants) or (p[1] in descendants):
630 roots.remove(n)
631 roots.remove(n)
631 else:
632 else:
632 p = tuple(self.parents(n))
633 p = tuple(self.parents(n))
633 # A node is a descendant if either of its parents are
634 # A node is a descendant if either of its parents are
634 # descendants. (We seeded the dependents list with the roots
635 # descendants. (We seeded the dependents list with the roots
635 # up there, remember?)
636 # up there, remember?)
636 if (p[0] in descendants) or (p[1] in descendants):
637 if (p[0] in descendants) or (p[1] in descendants):
637 descendants.add(n)
638 descendants.add(n)
638 isdescendant = True
639 isdescendant = True
639 if isdescendant and ((ancestors is None) or (n in ancestors)):
640 if isdescendant and ((ancestors is None) or (n in ancestors)):
640 # Only include nodes that are both descendants and ancestors.
641 # Only include nodes that are both descendants and ancestors.
641 orderedout.append(n)
642 orderedout.append(n)
642 if (ancestors is not None) and (n in heads):
643 if (ancestors is not None) and (n in heads):
643 # We're trying to figure out which heads are reachable
644 # We're trying to figure out which heads are reachable
644 # from roots.
645 # from roots.
645 # Mark this head as having been reached
646 # Mark this head as having been reached
646 heads[n] = True
647 heads[n] = True
647 elif ancestors is None:
648 elif ancestors is None:
648 # Otherwise, we're trying to discover the heads.
649 # Otherwise, we're trying to discover the heads.
649 # Assume this is a head because if it isn't, the next step
650 # Assume this is a head because if it isn't, the next step
650 # will eventually remove it.
651 # will eventually remove it.
651 heads[n] = True
652 heads[n] = True
652 # But, obviously its parents aren't.
653 # But, obviously its parents aren't.
653 for p in self.parents(n):
654 for p in self.parents(n):
654 heads.pop(p, None)
655 heads.pop(p, None)
655 heads = [n for n, flag in heads.iteritems() if flag]
656 heads = [n for n, flag in heads.iteritems() if flag]
656 roots = list(roots)
657 roots = list(roots)
657 assert orderedout
658 assert orderedout
658 assert roots
659 assert roots
659 assert heads
660 assert heads
660 return (orderedout, roots, heads)
661 return (orderedout, roots, heads)
661
662
662 def headrevs(self):
663 def headrevs(self):
663 try:
664 try:
664 return self.index.headrevs()
665 return self.index.headrevs()
665 except AttributeError:
666 except AttributeError:
666 return self._headrevs()
667 return self._headrevs()
667
668
668 def _headrevs(self):
669 def _headrevs(self):
669 count = len(self)
670 count = len(self)
670 if not count:
671 if not count:
671 return [nullrev]
672 return [nullrev]
672 # we won't iter over filtered rev so nobody is a head at start
673 # we won't iter over filtered rev so nobody is a head at start
673 ishead = [0] * (count + 1)
674 ishead = [0] * (count + 1)
674 index = self.index
675 index = self.index
675 for r in self:
676 for r in self:
676 ishead[r] = 1 # I may be an head
677 ishead[r] = 1 # I may be an head
677 e = index[r]
678 e = index[r]
678 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
679 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
679 return [r for r, val in enumerate(ishead) if val]
680 return [r for r, val in enumerate(ishead) if val]
680
681
681 def heads(self, start=None, stop=None):
682 def heads(self, start=None, stop=None):
682 """return the list of all nodes that have no children
683 """return the list of all nodes that have no children
683
684
684 if start is specified, only heads that are descendants of
685 if start is specified, only heads that are descendants of
685 start will be returned
686 start will be returned
686 if stop is specified, it will consider all the revs from stop
687 if stop is specified, it will consider all the revs from stop
687 as if they had no children
688 as if they had no children
688 """
689 """
689 if start is None and stop is None:
690 if start is None and stop is None:
690 if not len(self):
691 if not len(self):
691 return [nullid]
692 return [nullid]
692 return [self.node(r) for r in self.headrevs()]
693 return [self.node(r) for r in self.headrevs()]
693
694
694 if start is None:
695 if start is None:
695 start = nullid
696 start = nullid
696 if stop is None:
697 if stop is None:
697 stop = []
698 stop = []
698 stoprevs = set([self.rev(n) for n in stop])
699 stoprevs = set([self.rev(n) for n in stop])
699 startrev = self.rev(start)
700 startrev = self.rev(start)
700 reachable = set((startrev,))
701 reachable = set((startrev,))
701 heads = set((startrev,))
702 heads = set((startrev,))
702
703
703 parentrevs = self.parentrevs
704 parentrevs = self.parentrevs
704 for r in self.revs(start=startrev + 1):
705 for r in self.revs(start=startrev + 1):
705 for p in parentrevs(r):
706 for p in parentrevs(r):
706 if p in reachable:
707 if p in reachable:
707 if r not in stoprevs:
708 if r not in stoprevs:
708 reachable.add(r)
709 reachable.add(r)
709 heads.add(r)
710 heads.add(r)
710 if p in heads and p not in stoprevs:
711 if p in heads and p not in stoprevs:
711 heads.remove(p)
712 heads.remove(p)
712
713
713 return [self.node(r) for r in heads]
714 return [self.node(r) for r in heads]
714
715
715 def children(self, node):
716 def children(self, node):
716 """find the children of a given node"""
717 """find the children of a given node"""
717 c = []
718 c = []
718 p = self.rev(node)
719 p = self.rev(node)
719 for r in self.revs(start=p + 1):
720 for r in self.revs(start=p + 1):
720 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
721 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
721 if prevs:
722 if prevs:
722 for pr in prevs:
723 for pr in prevs:
723 if pr == p:
724 if pr == p:
724 c.append(self.node(r))
725 c.append(self.node(r))
725 elif p == nullrev:
726 elif p == nullrev:
726 c.append(self.node(r))
727 c.append(self.node(r))
727 return c
728 return c
728
729
729 def descendant(self, start, end):
730 def descendant(self, start, end):
730 if start == nullrev:
731 if start == nullrev:
731 return True
732 return True
732 for i in self.descendants([start]):
733 for i in self.descendants([start]):
733 if i == end:
734 if i == end:
734 return True
735 return True
735 elif i > end:
736 elif i > end:
736 break
737 break
737 return False
738 return False
738
739
739 def commonancestorsheads(self, a, b):
740 def commonancestorsheads(self, a, b):
740 """calculate all the heads of the common ancestors of nodes a and b"""
741 """calculate all the heads of the common ancestors of nodes a and b"""
741 a, b = self.rev(a), self.rev(b)
742 a, b = self.rev(a), self.rev(b)
742 try:
743 try:
743 ancs = self.index.commonancestorsheads(a, b)
744 ancs = self.index.commonancestorsheads(a, b)
744 except (AttributeError, OverflowError): # C implementation failed
745 except (AttributeError, OverflowError): # C implementation failed
745 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
746 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
746 return map(self.node, ancs)
747 return map(self.node, ancs)
747
748
748 def isancestor(self, a, b):
749 def isancestor(self, a, b):
749 """return True if node a is an ancestor of node b
750 """return True if node a is an ancestor of node b
750
751
751 The implementation of this is trivial but the use of
752 The implementation of this is trivial but the use of
752 commonancestorsheads is not."""
753 commonancestorsheads is not."""
753 return a in self.commonancestorsheads(a, b)
754 return a in self.commonancestorsheads(a, b)
754
755
755 def ancestor(self, a, b):
756 def ancestor(self, a, b):
756 """calculate the "best" common ancestor of nodes a and b"""
757 """calculate the "best" common ancestor of nodes a and b"""
757
758
758 a, b = self.rev(a), self.rev(b)
759 a, b = self.rev(a), self.rev(b)
759 try:
760 try:
760 ancs = self.index.ancestors(a, b)
761 ancs = self.index.ancestors(a, b)
761 except (AttributeError, OverflowError):
762 except (AttributeError, OverflowError):
762 ancs = ancestor.ancestors(self.parentrevs, a, b)
763 ancs = ancestor.ancestors(self.parentrevs, a, b)
763 if ancs:
764 if ancs:
764 # choose a consistent winner when there's a tie
765 # choose a consistent winner when there's a tie
765 return min(map(self.node, ancs))
766 return min(map(self.node, ancs))
766 return nullid
767 return nullid
767
768
768 def _match(self, id):
769 def _match(self, id):
769 if isinstance(id, int):
770 if isinstance(id, int):
770 # rev
771 # rev
771 return self.node(id)
772 return self.node(id)
772 if len(id) == 20:
773 if len(id) == 20:
773 # possibly a binary node
774 # possibly a binary node
774 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 try:
776 try:
776 node = id
777 node = id
777 self.rev(node) # quick search the index
778 self.rev(node) # quick search the index
778 return node
779 return node
779 except LookupError:
780 except LookupError:
780 pass # may be partial hex id
781 pass # may be partial hex id
781 try:
782 try:
782 # str(rev)
783 # str(rev)
783 rev = int(id)
784 rev = int(id)
784 if str(rev) != id:
785 if str(rev) != id:
785 raise ValueError
786 raise ValueError
786 if rev < 0:
787 if rev < 0:
787 rev = len(self) + rev
788 rev = len(self) + rev
788 if rev < 0 or rev >= len(self):
789 if rev < 0 or rev >= len(self):
789 raise ValueError
790 raise ValueError
790 return self.node(rev)
791 return self.node(rev)
791 except (ValueError, OverflowError):
792 except (ValueError, OverflowError):
792 pass
793 pass
793 if len(id) == 40:
794 if len(id) == 40:
794 try:
795 try:
795 # a full hex nodeid?
796 # a full hex nodeid?
796 node = bin(id)
797 node = bin(id)
797 self.rev(node)
798 self.rev(node)
798 return node
799 return node
799 except (TypeError, LookupError):
800 except (TypeError, LookupError):
800 pass
801 pass
801
802
802 def _partialmatch(self, id):
803 def _partialmatch(self, id):
803 try:
804 try:
804 n = self.index.partialmatch(id)
805 n = self.index.partialmatch(id)
805 if n and self.hasnode(n):
806 if n and self.hasnode(n):
806 return n
807 return n
807 return None
808 return None
808 except RevlogError:
809 except RevlogError:
809 # parsers.c radix tree lookup gave multiple matches
810 # parsers.c radix tree lookup gave multiple matches
810 # fall through to slow path that filters hidden revisions
811 # fall through to slow path that filters hidden revisions
811 pass
812 pass
812 except (AttributeError, ValueError):
813 except (AttributeError, ValueError):
813 # we are pure python, or key was too short to search radix tree
814 # we are pure python, or key was too short to search radix tree
814 pass
815 pass
815
816
816 if id in self._pcache:
817 if id in self._pcache:
817 return self._pcache[id]
818 return self._pcache[id]
818
819
819 if len(id) < 40:
820 if len(id) < 40:
820 try:
821 try:
821 # hex(node)[:...]
822 # hex(node)[:...]
822 l = len(id) // 2 # grab an even number of digits
823 l = len(id) // 2 # grab an even number of digits
823 prefix = bin(id[:l * 2])
824 prefix = bin(id[:l * 2])
824 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
825 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
825 nl = [n for n in nl if hex(n).startswith(id) and
826 nl = [n for n in nl if hex(n).startswith(id) and
826 self.hasnode(n)]
827 self.hasnode(n)]
827 if len(nl) > 0:
828 if len(nl) > 0:
828 if len(nl) == 1:
829 if len(nl) == 1:
829 self._pcache[id] = nl[0]
830 self._pcache[id] = nl[0]
830 return nl[0]
831 return nl[0]
831 raise LookupError(id, self.indexfile,
832 raise LookupError(id, self.indexfile,
832 _('ambiguous identifier'))
833 _('ambiguous identifier'))
833 return None
834 return None
834 except TypeError:
835 except TypeError:
835 pass
836 pass
836
837
837 def lookup(self, id):
838 def lookup(self, id):
838 """locate a node based on:
839 """locate a node based on:
839 - revision number or str(revision number)
840 - revision number or str(revision number)
840 - nodeid or subset of hex nodeid
841 - nodeid or subset of hex nodeid
841 """
842 """
842 n = self._match(id)
843 n = self._match(id)
843 if n is not None:
844 if n is not None:
844 return n
845 return n
845 n = self._partialmatch(id)
846 n = self._partialmatch(id)
846 if n:
847 if n:
847 return n
848 return n
848
849
849 raise LookupError(id, self.indexfile, _('no match found'))
850 raise LookupError(id, self.indexfile, _('no match found'))
850
851
851 def cmp(self, node, text):
852 def cmp(self, node, text):
852 """compare text with a given file revision
853 """compare text with a given file revision
853
854
854 returns True if text is different than what is stored.
855 returns True if text is different than what is stored.
855 """
856 """
856 p1, p2 = self.parents(node)
857 p1, p2 = self.parents(node)
857 return hash(text, p1, p2) != node
858 return hash(text, p1, p2) != node
858
859
859 def _addchunk(self, offset, data):
860 def _addchunk(self, offset, data):
860 o, d = self._chunkcache
861 o, d = self._chunkcache
861 # try to add to existing cache
862 # try to add to existing cache
862 if o + len(d) == offset and len(d) + len(data) < _chunksize:
863 if o + len(d) == offset and len(d) + len(data) < _chunksize:
863 self._chunkcache = o, d + data
864 self._chunkcache = o, d + data
864 else:
865 else:
865 self._chunkcache = offset, data
866 self._chunkcache = offset, data
866
867
867 def _loadchunk(self, offset, length):
868 def _loadchunk(self, offset, length):
868 if self._inline:
869 if self._inline:
869 df = self.opener(self.indexfile)
870 df = self.opener(self.indexfile)
870 else:
871 else:
871 df = self.opener(self.datafile)
872 df = self.opener(self.datafile)
872
873
873 # Cache data both forward and backward around the requested
874 # Cache data both forward and backward around the requested
874 # data, in a fixed size window. This helps speed up operations
875 # data, in a fixed size window. This helps speed up operations
875 # involving reading the revlog backwards.
876 # involving reading the revlog backwards.
876 cachesize = self._chunkcachesize
877 cachesize = self._chunkcachesize
877 realoffset = offset & ~(cachesize - 1)
878 realoffset = offset & ~(cachesize - 1)
878 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
879 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
879 - realoffset)
880 - realoffset)
880 df.seek(realoffset)
881 df.seek(realoffset)
881 d = df.read(reallength)
882 d = df.read(reallength)
882 df.close()
883 df.close()
883 self._addchunk(realoffset, d)
884 self._addchunk(realoffset, d)
884 if offset != realoffset or reallength != length:
885 if offset != realoffset or reallength != length:
885 return util.buffer(d, offset - realoffset, length)
886 return util.buffer(d, offset - realoffset, length)
886 return d
887 return d
887
888
888 def _getchunk(self, offset, length):
889 def _getchunk(self, offset, length):
889 o, d = self._chunkcache
890 o, d = self._chunkcache
890 l = len(d)
891 l = len(d)
891
892
892 # is it in the cache?
893 # is it in the cache?
893 cachestart = offset - o
894 cachestart = offset - o
894 cacheend = cachestart + length
895 cacheend = cachestart + length
895 if cachestart >= 0 and cacheend <= l:
896 if cachestart >= 0 and cacheend <= l:
896 if cachestart == 0 and cacheend == l:
897 if cachestart == 0 and cacheend == l:
897 return d # avoid a copy
898 return d # avoid a copy
898 return util.buffer(d, cachestart, cacheend - cachestart)
899 return util.buffer(d, cachestart, cacheend - cachestart)
899
900
900 return self._loadchunk(offset, length)
901 return self._loadchunk(offset, length)
901
902
902 def _chunkraw(self, startrev, endrev):
903 def _chunkraw(self, startrev, endrev):
903 start = self.start(startrev)
904 start = self.start(startrev)
904 end = self.end(endrev)
905 end = self.end(endrev)
905 if self._inline:
906 if self._inline:
906 start += (startrev + 1) * self._io.size
907 start += (startrev + 1) * self._io.size
907 end += (endrev + 1) * self._io.size
908 end += (endrev + 1) * self._io.size
908 length = end - start
909 length = end - start
909 return self._getchunk(start, length)
910 return self._getchunk(start, length)
910
911
911 def _chunk(self, rev):
912 def _chunk(self, rev):
912 return decompress(self._chunkraw(rev, rev))
913 return decompress(self._chunkraw(rev, rev))
913
914
914 def _chunks(self, revs):
915 def _chunks(self, revs):
915 '''faster version of [self._chunk(rev) for rev in revs]
916 '''faster version of [self._chunk(rev) for rev in revs]
916
917
917 Assumes that revs is in ascending order.'''
918 Assumes that revs is in ascending order.'''
918 if not revs:
919 if not revs:
919 return []
920 return []
920 start = self.start
921 start = self.start
921 length = self.length
922 length = self.length
922 inline = self._inline
923 inline = self._inline
923 iosize = self._io.size
924 iosize = self._io.size
924 buffer = util.buffer
925 buffer = util.buffer
925
926
926 l = []
927 l = []
927 ladd = l.append
928 ladd = l.append
928
929
929 # preload the cache
930 # preload the cache
930 try:
931 try:
931 while True:
932 while True:
932 # ensure that the cache doesn't change out from under us
933 # ensure that the cache doesn't change out from under us
933 _cache = self._chunkcache
934 _cache = self._chunkcache
934 self._chunkraw(revs[0], revs[-1])
935 self._chunkraw(revs[0], revs[-1])
935 if _cache == self._chunkcache:
936 if _cache == self._chunkcache:
936 break
937 break
937 offset, data = _cache
938 offset, data = _cache
938 except OverflowError:
939 except OverflowError:
939 # issue4215 - we can't cache a run of chunks greater than
940 # issue4215 - we can't cache a run of chunks greater than
940 # 2G on Windows
941 # 2G on Windows
941 return [self._chunk(rev) for rev in revs]
942 return [self._chunk(rev) for rev in revs]
942
943
943 for rev in revs:
944 for rev in revs:
944 chunkstart = start(rev)
945 chunkstart = start(rev)
945 if inline:
946 if inline:
946 chunkstart += (rev + 1) * iosize
947 chunkstart += (rev + 1) * iosize
947 chunklength = length(rev)
948 chunklength = length(rev)
948 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
949 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
949
950
950 return l
951 return l
951
952
952 def _chunkclear(self):
953 def _chunkclear(self):
953 self._chunkcache = (0, '')
954 self._chunkcache = (0, '')
954
955
955 def deltaparent(self, rev):
956 def deltaparent(self, rev):
956 """return deltaparent of the given revision"""
957 """return deltaparent of the given revision"""
957 base = self.index[rev][3]
958 base = self.index[rev][3]
958 if base == rev:
959 if base == rev:
959 return nullrev
960 return nullrev
960 elif self._generaldelta:
961 elif self._generaldelta:
961 return base
962 return base
962 else:
963 else:
963 return rev - 1
964 return rev - 1
964
965
965 def revdiff(self, rev1, rev2):
966 def revdiff(self, rev1, rev2):
966 """return or calculate a delta between two revisions"""
967 """return or calculate a delta between two revisions"""
967 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
968 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
968 return str(self._chunk(rev2))
969 return str(self._chunk(rev2))
969
970
970 return mdiff.textdiff(self.revision(rev1),
971 return mdiff.textdiff(self.revision(rev1),
971 self.revision(rev2))
972 self.revision(rev2))
972
973
973 def revision(self, nodeorrev):
974 def revision(self, nodeorrev):
974 """return an uncompressed revision of a given node or revision
975 """return an uncompressed revision of a given node or revision
975 number.
976 number.
976 """
977 """
977 if isinstance(nodeorrev, int):
978 if isinstance(nodeorrev, int):
978 rev = nodeorrev
979 rev = nodeorrev
979 node = self.node(rev)
980 node = self.node(rev)
980 else:
981 else:
981 node = nodeorrev
982 node = nodeorrev
982 rev = None
983 rev = None
983
984
984 _cache = self._cache # grab local copy of cache to avoid thread race
985 _cache = self._cache # grab local copy of cache to avoid thread race
985 cachedrev = None
986 cachedrev = None
986 if node == nullid:
987 if node == nullid:
987 return ""
988 return ""
988 if _cache:
989 if _cache:
989 if _cache[0] == node:
990 if _cache[0] == node:
990 return _cache[2]
991 return _cache[2]
991 cachedrev = _cache[1]
992 cachedrev = _cache[1]
992
993
993 # look up what we need to read
994 # look up what we need to read
994 text = None
995 text = None
995 if rev is None:
996 if rev is None:
996 rev = self.rev(node)
997 rev = self.rev(node)
997
998
998 # check rev flags
999 # check rev flags
999 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1000 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1000 raise RevlogError(_('incompatible revision flag %x') %
1001 raise RevlogError(_('incompatible revision flag %x') %
1001 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1002 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1002
1003
1003 # build delta chain
1004 # build delta chain
1004 chain = []
1005 chain = []
1005 index = self.index # for performance
1006 index = self.index # for performance
1006 generaldelta = self._generaldelta
1007 generaldelta = self._generaldelta
1007 iterrev = rev
1008 iterrev = rev
1008 e = index[iterrev]
1009 e = index[iterrev]
1009 while iterrev != e[3] and iterrev != cachedrev:
1010 while iterrev != e[3] and iterrev != cachedrev:
1010 chain.append(iterrev)
1011 chain.append(iterrev)
1011 if generaldelta:
1012 if generaldelta:
1012 iterrev = e[3]
1013 iterrev = e[3]
1013 else:
1014 else:
1014 iterrev -= 1
1015 iterrev -= 1
1015 e = index[iterrev]
1016 e = index[iterrev]
1016
1017
1017 if iterrev == cachedrev:
1018 if iterrev == cachedrev:
1018 # cache hit
1019 # cache hit
1019 text = _cache[2]
1020 text = _cache[2]
1020 else:
1021 else:
1021 chain.append(iterrev)
1022 chain.append(iterrev)
1022 chain.reverse()
1023 chain.reverse()
1023
1024
1024 # drop cache to save memory
1025 # drop cache to save memory
1025 self._cache = None
1026 self._cache = None
1026
1027
1027 bins = self._chunks(chain)
1028 bins = self._chunks(chain)
1028 if text is None:
1029 if text is None:
1029 text = str(bins[0])
1030 text = str(bins[0])
1030 bins = bins[1:]
1031 bins = bins[1:]
1031
1032
1032 text = mdiff.patches(text, bins)
1033 text = mdiff.patches(text, bins)
1033
1034
1034 text = self._checkhash(text, node, rev)
1035 text = self._checkhash(text, node, rev)
1035
1036
1036 self._cache = (node, rev, text)
1037 self._cache = (node, rev, text)
1037 return text
1038 return text
1038
1039
1039 def hash(self, text, p1, p2):
1040 def hash(self, text, p1, p2):
1040 """Compute a node hash.
1041 """Compute a node hash.
1041
1042
1042 Available as a function so that subclasses can replace the hash
1043 Available as a function so that subclasses can replace the hash
1043 as needed.
1044 as needed.
1044 """
1045 """
1045 return hash(text, p1, p2)
1046 return hash(text, p1, p2)
1046
1047
1047 def _checkhash(self, text, node, rev):
1048 def _checkhash(self, text, node, rev):
1048 p1, p2 = self.parents(node)
1049 p1, p2 = self.parents(node)
1049 self.checkhash(text, p1, p2, node, rev)
1050 self.checkhash(text, p1, p2, node, rev)
1050 return text
1051 return text
1051
1052
1052 def checkhash(self, text, p1, p2, node, rev=None):
1053 def checkhash(self, text, p1, p2, node, rev=None):
1053 if node != self.hash(text, p1, p2):
1054 if node != self.hash(text, p1, p2):
1054 revornode = rev
1055 revornode = rev
1055 if revornode is None:
1056 if revornode is None:
1056 revornode = templatefilters.short(hex(node))
1057 revornode = templatefilters.short(hex(node))
1057 raise RevlogError(_("integrity check failed on %s:%s")
1058 raise RevlogError(_("integrity check failed on %s:%s")
1058 % (self.indexfile, revornode))
1059 % (self.indexfile, revornode))
1059
1060
1060 def checkinlinesize(self, tr, fp=None):
1061 def checkinlinesize(self, tr, fp=None):
1061 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1062 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1062 return
1063 return
1063
1064
1064 trinfo = tr.find(self.indexfile)
1065 trinfo = tr.find(self.indexfile)
1065 if trinfo is None:
1066 if trinfo is None:
1066 raise RevlogError(_("%s not found in the transaction")
1067 raise RevlogError(_("%s not found in the transaction")
1067 % self.indexfile)
1068 % self.indexfile)
1068
1069
1069 trindex = trinfo[2]
1070 trindex = trinfo[2]
1070 dataoff = self.start(trindex)
1071 dataoff = self.start(trindex)
1071
1072
1072 tr.add(self.datafile, dataoff)
1073 tr.add(self.datafile, dataoff)
1073
1074
1074 if fp:
1075 if fp:
1075 fp.flush()
1076 fp.flush()
1076 fp.close()
1077 fp.close()
1077
1078
1078 df = self.opener(self.datafile, 'w')
1079 df = self.opener(self.datafile, 'w')
1079 try:
1080 try:
1080 for r in self:
1081 for r in self:
1081 df.write(self._chunkraw(r, r))
1082 df.write(self._chunkraw(r, r))
1082 finally:
1083 finally:
1083 df.close()
1084 df.close()
1084
1085
1085 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1086 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1086 self.version &= ~(REVLOGNGINLINEDATA)
1087 self.version &= ~(REVLOGNGINLINEDATA)
1087 self._inline = False
1088 self._inline = False
1088 for i in self:
1089 for i in self:
1089 e = self._io.packentry(self.index[i], self.node, self.version, i)
1090 e = self._io.packentry(self.index[i], self.node, self.version, i)
1090 fp.write(e)
1091 fp.write(e)
1091
1092
1092 # if we don't call close, the temp file will never replace the
1093 # if we don't call close, the temp file will never replace the
1093 # real index
1094 # real index
1094 fp.close()
1095 fp.close()
1095
1096
1096 tr.replace(self.indexfile, trindex * self._io.size)
1097 tr.replace(self.indexfile, trindex * self._io.size)
1097 self._chunkclear()
1098 self._chunkclear()
1098
1099
1099 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1100 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1100 node=None):
1101 node=None):
1101 """add a revision to the log
1102 """add a revision to the log
1102
1103
1103 text - the revision data to add
1104 text - the revision data to add
1104 transaction - the transaction object used for rollback
1105 transaction - the transaction object used for rollback
1105 link - the linkrev data to add
1106 link - the linkrev data to add
1106 p1, p2 - the parent nodeids of the revision
1107 p1, p2 - the parent nodeids of the revision
1107 cachedelta - an optional precomputed delta
1108 cachedelta - an optional precomputed delta
1108 node - nodeid of revision; typically node is not specified, and it is
1109 node - nodeid of revision; typically node is not specified, and it is
1109 computed by default as hash(text, p1, p2), however subclasses might
1110 computed by default as hash(text, p1, p2), however subclasses might
1110 use different hashing method (and override checkhash() in such case)
1111 use different hashing method (and override checkhash() in such case)
1111 """
1112 """
1112 if link == nullrev:
1113 if link == nullrev:
1113 raise RevlogError(_("attempted to add linkrev -1 to %s")
1114 raise RevlogError(_("attempted to add linkrev -1 to %s")
1114 % self.indexfile)
1115 % self.indexfile)
1115 node = node or self.hash(text, p1, p2)
1116 node = node or self.hash(text, p1, p2)
1116 if node in self.nodemap:
1117 if node in self.nodemap:
1117 return node
1118 return node
1118
1119
1119 dfh = None
1120 dfh = None
1120 if not self._inline:
1121 if not self._inline:
1121 dfh = self.opener(self.datafile, "a")
1122 dfh = self.opener(self.datafile, "a")
1122 ifh = self.opener(self.indexfile, "a+")
1123 ifh = self.opener(self.indexfile, "a+")
1123 try:
1124 try:
1124 return self._addrevision(node, text, transaction, link, p1, p2,
1125 return self._addrevision(node, text, transaction, link, p1, p2,
1125 cachedelta, ifh, dfh)
1126 cachedelta, ifh, dfh)
1126 finally:
1127 finally:
1127 if dfh:
1128 if dfh:
1128 dfh.close()
1129 dfh.close()
1129 ifh.close()
1130 ifh.close()
1130
1131
1131 def compress(self, text):
1132 def compress(self, text):
1132 """ generate a possibly-compressed representation of text """
1133 """ generate a possibly-compressed representation of text """
1133 if not text:
1134 if not text:
1134 return ("", text)
1135 return ("", text)
1135 l = len(text)
1136 l = len(text)
1136 bin = None
1137 bin = None
1137 if l < 44:
1138 if l < 44:
1138 pass
1139 pass
1139 elif l > 1000000:
1140 elif l > 1000000:
1140 # zlib makes an internal copy, thus doubling memory usage for
1141 # zlib makes an internal copy, thus doubling memory usage for
1141 # large files, so lets do this in pieces
1142 # large files, so lets do this in pieces
1142 z = zlib.compressobj()
1143 z = zlib.compressobj()
1143 p = []
1144 p = []
1144 pos = 0
1145 pos = 0
1145 while pos < l:
1146 while pos < l:
1146 pos2 = pos + 2**20
1147 pos2 = pos + 2**20
1147 p.append(z.compress(text[pos:pos2]))
1148 p.append(z.compress(text[pos:pos2]))
1148 pos = pos2
1149 pos = pos2
1149 p.append(z.flush())
1150 p.append(z.flush())
1150 if sum(map(len, p)) < l:
1151 if sum(map(len, p)) < l:
1151 bin = "".join(p)
1152 bin = "".join(p)
1152 else:
1153 else:
1153 bin = _compress(text)
1154 bin = _compress(text)
1154 if bin is None or len(bin) > l:
1155 if bin is None or len(bin) > l:
1155 if text[0] == '\0':
1156 if text[0] == '\0':
1156 return ("", text)
1157 return ("", text)
1157 return ('u', text)
1158 return ('u', text)
1158 return ("", bin)
1159 return ("", bin)
1159
1160
1160 def _addrevision(self, node, text, transaction, link, p1, p2,
1161 def _addrevision(self, node, text, transaction, link, p1, p2,
1161 cachedelta, ifh, dfh):
1162 cachedelta, ifh, dfh):
1162 """internal function to add revisions to the log
1163 """internal function to add revisions to the log
1163
1164
1164 see addrevision for argument descriptions.
1165 see addrevision for argument descriptions.
1165 invariants:
1166 invariants:
1166 - text is optional (can be None); if not set, cachedelta must be set.
1167 - text is optional (can be None); if not set, cachedelta must be set.
1167 if both are set, they must correspond to each other.
1168 if both are set, they must correspond to each other.
1168 """
1169 """
1169 btext = [text]
1170 btext = [text]
1170 def buildtext():
1171 def buildtext():
1171 if btext[0] is not None:
1172 if btext[0] is not None:
1172 return btext[0]
1173 return btext[0]
1173 # flush any pending writes here so we can read it in revision
1174 # flush any pending writes here so we can read it in revision
1174 if dfh:
1175 if dfh:
1175 dfh.flush()
1176 dfh.flush()
1176 ifh.flush()
1177 ifh.flush()
1177 basetext = self.revision(self.node(cachedelta[0]))
1178 basetext = self.revision(self.node(cachedelta[0]))
1178 btext[0] = mdiff.patch(basetext, cachedelta[1])
1179 btext[0] = mdiff.patch(basetext, cachedelta[1])
1180 try:
1179 self.checkhash(btext[0], p1, p2, node)
1181 self.checkhash(btext[0], p1, p2, node)
1182 except CensoredNodeError:
1183 pass # always import a censor tombstone.
1180 return btext[0]
1184 return btext[0]
1181
1185
1182 def builddelta(rev):
1186 def builddelta(rev):
1183 # can we use the cached delta?
1187 # can we use the cached delta?
1184 if cachedelta and cachedelta[0] == rev:
1188 if cachedelta and cachedelta[0] == rev:
1185 delta = cachedelta[1]
1189 delta = cachedelta[1]
1186 else:
1190 else:
1187 t = buildtext()
1191 t = buildtext()
1188 ptext = self.revision(self.node(rev))
1192 ptext = self.revision(self.node(rev))
1189 delta = mdiff.textdiff(ptext, t)
1193 delta = mdiff.textdiff(ptext, t)
1190 data = self.compress(delta)
1194 data = self.compress(delta)
1191 l = len(data[1]) + len(data[0])
1195 l = len(data[1]) + len(data[0])
1192 if basecache[0] == rev:
1196 if basecache[0] == rev:
1193 chainbase = basecache[1]
1197 chainbase = basecache[1]
1194 else:
1198 else:
1195 chainbase = self.chainbase(rev)
1199 chainbase = self.chainbase(rev)
1196 dist = l + offset - self.start(chainbase)
1200 dist = l + offset - self.start(chainbase)
1197 if self._generaldelta:
1201 if self._generaldelta:
1198 base = rev
1202 base = rev
1199 else:
1203 else:
1200 base = chainbase
1204 base = chainbase
1201 return dist, l, data, base, chainbase
1205 return dist, l, data, base, chainbase
1202
1206
1203 curr = len(self)
1207 curr = len(self)
1204 prev = curr - 1
1208 prev = curr - 1
1205 base = chainbase = curr
1209 base = chainbase = curr
1206 offset = self.end(prev)
1210 offset = self.end(prev)
1207 flags = 0
1211 flags = 0
1208 d = None
1212 d = None
1209 if self._basecache is None:
1213 if self._basecache is None:
1210 self._basecache = (prev, self.chainbase(prev))
1214 self._basecache = (prev, self.chainbase(prev))
1211 basecache = self._basecache
1215 basecache = self._basecache
1212 p1r, p2r = self.rev(p1), self.rev(p2)
1216 p1r, p2r = self.rev(p1), self.rev(p2)
1213
1217
1214 # should we try to build a delta?
1218 # should we try to build a delta?
1215 if prev != nullrev:
1219 if prev != nullrev:
1216 if self._generaldelta:
1220 if self._generaldelta:
1217 if p1r >= basecache[1]:
1221 if p1r >= basecache[1]:
1218 d = builddelta(p1r)
1222 d = builddelta(p1r)
1219 elif p2r >= basecache[1]:
1223 elif p2r >= basecache[1]:
1220 d = builddelta(p2r)
1224 d = builddelta(p2r)
1221 else:
1225 else:
1222 d = builddelta(prev)
1226 d = builddelta(prev)
1223 else:
1227 else:
1224 d = builddelta(prev)
1228 d = builddelta(prev)
1225 dist, l, data, base, chainbase = d
1229 dist, l, data, base, chainbase = d
1226
1230
1227 # full versions are inserted when the needed deltas
1231 # full versions are inserted when the needed deltas
1228 # become comparable to the uncompressed text
1232 # become comparable to the uncompressed text
1229 if text is None:
1233 if text is None:
1230 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1234 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1231 cachedelta[1])
1235 cachedelta[1])
1232 else:
1236 else:
1233 textlen = len(text)
1237 textlen = len(text)
1234 if d is None or dist > textlen * 2:
1238 if d is None or dist > textlen * 2:
1235 text = buildtext()
1239 text = buildtext()
1236 data = self.compress(text)
1240 data = self.compress(text)
1237 l = len(data[1]) + len(data[0])
1241 l = len(data[1]) + len(data[0])
1238 base = chainbase = curr
1242 base = chainbase = curr
1239
1243
1240 e = (offset_type(offset, flags), l, textlen,
1244 e = (offset_type(offset, flags), l, textlen,
1241 base, link, p1r, p2r, node)
1245 base, link, p1r, p2r, node)
1242 self.index.insert(-1, e)
1246 self.index.insert(-1, e)
1243 self.nodemap[node] = curr
1247 self.nodemap[node] = curr
1244
1248
1245 entry = self._io.packentry(e, self.node, self.version, curr)
1249 entry = self._io.packentry(e, self.node, self.version, curr)
1246 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1250 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1247
1251
1248 if type(text) == str: # only accept immutable objects
1252 if type(text) == str: # only accept immutable objects
1249 self._cache = (node, curr, text)
1253 self._cache = (node, curr, text)
1250 self._basecache = (curr, chainbase)
1254 self._basecache = (curr, chainbase)
1251 return node
1255 return node
1252
1256
1253 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1254 curr = len(self) - 1
1258 curr = len(self) - 1
1255 if not self._inline:
1259 if not self._inline:
1256 transaction.add(self.datafile, offset)
1260 transaction.add(self.datafile, offset)
1257 transaction.add(self.indexfile, curr * len(entry))
1261 transaction.add(self.indexfile, curr * len(entry))
1258 if data[0]:
1262 if data[0]:
1259 dfh.write(data[0])
1263 dfh.write(data[0])
1260 dfh.write(data[1])
1264 dfh.write(data[1])
1261 dfh.flush()
1265 dfh.flush()
1262 ifh.write(entry)
1266 ifh.write(entry)
1263 else:
1267 else:
1264 offset += curr * self._io.size
1268 offset += curr * self._io.size
1265 transaction.add(self.indexfile, offset, curr)
1269 transaction.add(self.indexfile, offset, curr)
1266 ifh.write(entry)
1270 ifh.write(entry)
1267 ifh.write(data[0])
1271 ifh.write(data[0])
1268 ifh.write(data[1])
1272 ifh.write(data[1])
1269 self.checkinlinesize(transaction, ifh)
1273 self.checkinlinesize(transaction, ifh)
1270
1274
1271 def addgroup(self, bundle, linkmapper, transaction):
1275 def addgroup(self, bundle, linkmapper, transaction):
1272 """
1276 """
1273 add a delta group
1277 add a delta group
1274
1278
1275 given a set of deltas, add them to the revision log. the
1279 given a set of deltas, add them to the revision log. the
1276 first delta is against its parent, which should be in our
1280 first delta is against its parent, which should be in our
1277 log, the rest are against the previous delta.
1281 log, the rest are against the previous delta.
1278 """
1282 """
1279
1283
1280 # track the base of the current delta log
1284 # track the base of the current delta log
1281 content = []
1285 content = []
1282 node = None
1286 node = None
1283
1287
1284 r = len(self)
1288 r = len(self)
1285 end = 0
1289 end = 0
1286 if r:
1290 if r:
1287 end = self.end(r - 1)
1291 end = self.end(r - 1)
1288 ifh = self.opener(self.indexfile, "a+")
1292 ifh = self.opener(self.indexfile, "a+")
1289 isize = r * self._io.size
1293 isize = r * self._io.size
1290 if self._inline:
1294 if self._inline:
1291 transaction.add(self.indexfile, end + isize, r)
1295 transaction.add(self.indexfile, end + isize, r)
1292 dfh = None
1296 dfh = None
1293 else:
1297 else:
1294 transaction.add(self.indexfile, isize, r)
1298 transaction.add(self.indexfile, isize, r)
1295 transaction.add(self.datafile, end)
1299 transaction.add(self.datafile, end)
1296 dfh = self.opener(self.datafile, "a")
1300 dfh = self.opener(self.datafile, "a")
1297
1301
1298 try:
1302 try:
1299 # loop through our set of deltas
1303 # loop through our set of deltas
1300 chain = None
1304 chain = None
1301 while True:
1305 while True:
1302 chunkdata = bundle.deltachunk(chain)
1306 chunkdata = bundle.deltachunk(chain)
1303 if not chunkdata:
1307 if not chunkdata:
1304 break
1308 break
1305 node = chunkdata['node']
1309 node = chunkdata['node']
1306 p1 = chunkdata['p1']
1310 p1 = chunkdata['p1']
1307 p2 = chunkdata['p2']
1311 p2 = chunkdata['p2']
1308 cs = chunkdata['cs']
1312 cs = chunkdata['cs']
1309 deltabase = chunkdata['deltabase']
1313 deltabase = chunkdata['deltabase']
1310 delta = chunkdata['delta']
1314 delta = chunkdata['delta']
1311
1315
1312 content.append(node)
1316 content.append(node)
1313
1317
1314 link = linkmapper(cs)
1318 link = linkmapper(cs)
1315 if node in self.nodemap:
1319 if node in self.nodemap:
1316 # this can happen if two branches make the same change
1320 # this can happen if two branches make the same change
1317 chain = node
1321 chain = node
1318 continue
1322 continue
1319
1323
1320 for p in (p1, p2):
1324 for p in (p1, p2):
1321 if p not in self.nodemap:
1325 if p not in self.nodemap:
1322 raise LookupError(p, self.indexfile,
1326 raise LookupError(p, self.indexfile,
1323 _('unknown parent'))
1327 _('unknown parent'))
1324
1328
1325 if deltabase not in self.nodemap:
1329 if deltabase not in self.nodemap:
1326 raise LookupError(deltabase, self.indexfile,
1330 raise LookupError(deltabase, self.indexfile,
1327 _('unknown delta base'))
1331 _('unknown delta base'))
1328
1332
1329 baserev = self.rev(deltabase)
1333 baserev = self.rev(deltabase)
1330 chain = self._addrevision(node, None, transaction, link,
1334 chain = self._addrevision(node, None, transaction, link,
1331 p1, p2, (baserev, delta), ifh, dfh)
1335 p1, p2, (baserev, delta), ifh, dfh)
1332 if not dfh and not self._inline:
1336 if not dfh and not self._inline:
1333 # addrevision switched from inline to conventional
1337 # addrevision switched from inline to conventional
1334 # reopen the index
1338 # reopen the index
1335 ifh.close()
1339 ifh.close()
1336 dfh = self.opener(self.datafile, "a")
1340 dfh = self.opener(self.datafile, "a")
1337 ifh = self.opener(self.indexfile, "a")
1341 ifh = self.opener(self.indexfile, "a")
1338 finally:
1342 finally:
1339 if dfh:
1343 if dfh:
1340 dfh.close()
1344 dfh.close()
1341 ifh.close()
1345 ifh.close()
1342
1346
1343 return content
1347 return content
1344
1348
1345 def getstrippoint(self, minlink):
1349 def getstrippoint(self, minlink):
1346 """find the minimum rev that must be stripped to strip the linkrev
1350 """find the minimum rev that must be stripped to strip the linkrev
1347
1351
1348 Returns a tuple containing the minimum rev and a set of all revs that
1352 Returns a tuple containing the minimum rev and a set of all revs that
1349 have linkrevs that will be broken by this strip.
1353 have linkrevs that will be broken by this strip.
1350 """
1354 """
1351 brokenrevs = set()
1355 brokenrevs = set()
1352 strippoint = len(self)
1356 strippoint = len(self)
1353
1357
1354 heads = {}
1358 heads = {}
1355 futurelargelinkrevs = set()
1359 futurelargelinkrevs = set()
1356 for head in self.headrevs():
1360 for head in self.headrevs():
1357 headlinkrev = self.linkrev(head)
1361 headlinkrev = self.linkrev(head)
1358 heads[head] = headlinkrev
1362 heads[head] = headlinkrev
1359 if headlinkrev >= minlink:
1363 if headlinkrev >= minlink:
1360 futurelargelinkrevs.add(headlinkrev)
1364 futurelargelinkrevs.add(headlinkrev)
1361
1365
1362 # This algorithm involves walking down the rev graph, starting at the
1366 # This algorithm involves walking down the rev graph, starting at the
1363 # heads. Since the revs are topologically sorted according to linkrev,
1367 # heads. Since the revs are topologically sorted according to linkrev,
1364 # once all head linkrevs are below the minlink, we know there are
1368 # once all head linkrevs are below the minlink, we know there are
1365 # no more revs that could have a linkrev greater than minlink.
1369 # no more revs that could have a linkrev greater than minlink.
1366 # So we can stop walking.
1370 # So we can stop walking.
1367 while futurelargelinkrevs:
1371 while futurelargelinkrevs:
1368 strippoint -= 1
1372 strippoint -= 1
1369 linkrev = heads.pop(strippoint)
1373 linkrev = heads.pop(strippoint)
1370
1374
1371 if linkrev < minlink:
1375 if linkrev < minlink:
1372 brokenrevs.add(strippoint)
1376 brokenrevs.add(strippoint)
1373 else:
1377 else:
1374 futurelargelinkrevs.remove(linkrev)
1378 futurelargelinkrevs.remove(linkrev)
1375
1379
1376 for p in self.parentrevs(strippoint):
1380 for p in self.parentrevs(strippoint):
1377 if p != nullrev:
1381 if p != nullrev:
1378 plinkrev = self.linkrev(p)
1382 plinkrev = self.linkrev(p)
1379 heads[p] = plinkrev
1383 heads[p] = plinkrev
1380 if plinkrev >= minlink:
1384 if plinkrev >= minlink:
1381 futurelargelinkrevs.add(plinkrev)
1385 futurelargelinkrevs.add(plinkrev)
1382
1386
1383 return strippoint, brokenrevs
1387 return strippoint, brokenrevs
1384
1388
1385 def strip(self, minlink, transaction):
1389 def strip(self, minlink, transaction):
1386 """truncate the revlog on the first revision with a linkrev >= minlink
1390 """truncate the revlog on the first revision with a linkrev >= minlink
1387
1391
1388 This function is called when we're stripping revision minlink and
1392 This function is called when we're stripping revision minlink and
1389 its descendants from the repository.
1393 its descendants from the repository.
1390
1394
1391 We have to remove all revisions with linkrev >= minlink, because
1395 We have to remove all revisions with linkrev >= minlink, because
1392 the equivalent changelog revisions will be renumbered after the
1396 the equivalent changelog revisions will be renumbered after the
1393 strip.
1397 strip.
1394
1398
1395 So we truncate the revlog on the first of these revisions, and
1399 So we truncate the revlog on the first of these revisions, and
1396 trust that the caller has saved the revisions that shouldn't be
1400 trust that the caller has saved the revisions that shouldn't be
1397 removed and that it'll re-add them after this truncation.
1401 removed and that it'll re-add them after this truncation.
1398 """
1402 """
1399 if len(self) == 0:
1403 if len(self) == 0:
1400 return
1404 return
1401
1405
1402 rev, _ = self.getstrippoint(minlink)
1406 rev, _ = self.getstrippoint(minlink)
1403 if rev == len(self):
1407 if rev == len(self):
1404 return
1408 return
1405
1409
1406 # first truncate the files on disk
1410 # first truncate the files on disk
1407 end = self.start(rev)
1411 end = self.start(rev)
1408 if not self._inline:
1412 if not self._inline:
1409 transaction.add(self.datafile, end)
1413 transaction.add(self.datafile, end)
1410 end = rev * self._io.size
1414 end = rev * self._io.size
1411 else:
1415 else:
1412 end += rev * self._io.size
1416 end += rev * self._io.size
1413
1417
1414 transaction.add(self.indexfile, end)
1418 transaction.add(self.indexfile, end)
1415
1419
1416 # then reset internal state in memory to forget those revisions
1420 # then reset internal state in memory to forget those revisions
1417 self._cache = None
1421 self._cache = None
1418 self._chunkclear()
1422 self._chunkclear()
1419 for x in xrange(rev, len(self)):
1423 for x in xrange(rev, len(self)):
1420 del self.nodemap[self.node(x)]
1424 del self.nodemap[self.node(x)]
1421
1425
1422 del self.index[rev:-1]
1426 del self.index[rev:-1]
1423
1427
1424 def checksize(self):
1428 def checksize(self):
1425 expected = 0
1429 expected = 0
1426 if len(self):
1430 if len(self):
1427 expected = max(0, self.end(len(self) - 1))
1431 expected = max(0, self.end(len(self) - 1))
1428
1432
1429 try:
1433 try:
1430 f = self.opener(self.datafile)
1434 f = self.opener(self.datafile)
1431 f.seek(0, 2)
1435 f.seek(0, 2)
1432 actual = f.tell()
1436 actual = f.tell()
1433 f.close()
1437 f.close()
1434 dd = actual - expected
1438 dd = actual - expected
1435 except IOError, inst:
1439 except IOError, inst:
1436 if inst.errno != errno.ENOENT:
1440 if inst.errno != errno.ENOENT:
1437 raise
1441 raise
1438 dd = 0
1442 dd = 0
1439
1443
1440 try:
1444 try:
1441 f = self.opener(self.indexfile)
1445 f = self.opener(self.indexfile)
1442 f.seek(0, 2)
1446 f.seek(0, 2)
1443 actual = f.tell()
1447 actual = f.tell()
1444 f.close()
1448 f.close()
1445 s = self._io.size
1449 s = self._io.size
1446 i = max(0, actual // s)
1450 i = max(0, actual // s)
1447 di = actual - (i * s)
1451 di = actual - (i * s)
1448 if self._inline:
1452 if self._inline:
1449 databytes = 0
1453 databytes = 0
1450 for r in self:
1454 for r in self:
1451 databytes += max(0, self.length(r))
1455 databytes += max(0, self.length(r))
1452 dd = 0
1456 dd = 0
1453 di = actual - len(self) * s - databytes
1457 di = actual - len(self) * s - databytes
1454 except IOError, inst:
1458 except IOError, inst:
1455 if inst.errno != errno.ENOENT:
1459 if inst.errno != errno.ENOENT:
1456 raise
1460 raise
1457 di = 0
1461 di = 0
1458
1462
1459 return (dd, di)
1463 return (dd, di)
1460
1464
1461 def files(self):
1465 def files(self):
1462 res = [self.indexfile]
1466 res = [self.indexfile]
1463 if not self._inline:
1467 if not self._inline:
1464 res.append(self.datafile)
1468 res.append(self.datafile)
1465 return res
1469 return res
General Comments 0
You need to be logged in to leave comments. Login now