##// END OF EJS Templates
revlog: cache chain info after calculating it for a rev (issue4452)...
Siddharth Agarwal -
r23306:f7a42f8e default
parent child Browse files
Show More
@@ -1,1503 +1,1520 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45 CensoredNodeError = error.CensoredNodeError
46
46
47 def getoffset(q):
47 def getoffset(q):
48 return int(q >> 16)
48 return int(q >> 16)
49
49
50 def gettype(q):
50 def gettype(q):
51 return int(q & 0xFFFF)
51 return int(q & 0xFFFF)
52
52
53 def offset_type(offset, type):
53 def offset_type(offset, type):
54 return long(long(offset) << 16 | type)
54 return long(long(offset) << 16 | type)
55
55
56 _nullhash = _sha(nullid)
56 _nullhash = _sha(nullid)
57
57
58 def hash(text, p1, p2):
58 def hash(text, p1, p2):
59 """generate a hash from the given text and its parent hashes
59 """generate a hash from the given text and its parent hashes
60
60
61 This hash combines both the current file contents and its history
61 This hash combines both the current file contents and its history
62 in a manner that makes it easy to distinguish nodes with the same
62 in a manner that makes it easy to distinguish nodes with the same
63 content in the revision graph.
63 content in the revision graph.
64 """
64 """
65 # As of now, if one of the parent node is null, p2 is null
65 # As of now, if one of the parent node is null, p2 is null
66 if p2 == nullid:
66 if p2 == nullid:
67 # deep copy of a hash is faster than creating one
67 # deep copy of a hash is faster than creating one
68 s = _nullhash.copy()
68 s = _nullhash.copy()
69 s.update(p1)
69 s.update(p1)
70 else:
70 else:
71 # none of the parent nodes are nullid
71 # none of the parent nodes are nullid
72 l = [p1, p2]
72 l = [p1, p2]
73 l.sort()
73 l.sort()
74 s = _sha(l[0])
74 s = _sha(l[0])
75 s.update(l[1])
75 s.update(l[1])
76 s.update(text)
76 s.update(text)
77 return s.digest()
77 return s.digest()
78
78
79 def decompress(bin):
79 def decompress(bin):
80 """ decompress the given input """
80 """ decompress the given input """
81 if not bin:
81 if not bin:
82 return bin
82 return bin
83 t = bin[0]
83 t = bin[0]
84 if t == '\0':
84 if t == '\0':
85 return bin
85 return bin
86 if t == 'x':
86 if t == 'x':
87 try:
87 try:
88 return _decompress(bin)
88 return _decompress(bin)
89 except zlib.error, e:
89 except zlib.error, e:
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 if t == 'u':
91 if t == 'u':
92 return bin[1:]
92 return bin[1:]
93 raise RevlogError(_("unknown compression type %r") % t)
93 raise RevlogError(_("unknown compression type %r") % t)
94
94
95 # index v0:
95 # index v0:
96 # 4 bytes: offset
96 # 4 bytes: offset
97 # 4 bytes: compressed length
97 # 4 bytes: compressed length
98 # 4 bytes: base rev
98 # 4 bytes: base rev
99 # 4 bytes: link rev
99 # 4 bytes: link rev
100 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 1 nodeid
101 # 32 bytes: parent 2 nodeid
101 # 32 bytes: parent 2 nodeid
102 # 32 bytes: nodeid
102 # 32 bytes: nodeid
103 indexformatv0 = ">4l20s20s20s"
103 indexformatv0 = ">4l20s20s20s"
104 v0shaoffset = 56
104 v0shaoffset = 56
105
105
106 class revlogoldio(object):
106 class revlogoldio(object):
107 def __init__(self):
107 def __init__(self):
108 self.size = struct.calcsize(indexformatv0)
108 self.size = struct.calcsize(indexformatv0)
109
109
110 def parseindex(self, data, inline):
110 def parseindex(self, data, inline):
111 s = self.size
111 s = self.size
112 index = []
112 index = []
113 nodemap = {nullid: nullrev}
113 nodemap = {nullid: nullrev}
114 n = off = 0
114 n = off = 0
115 l = len(data)
115 l = len(data)
116 while off + s <= l:
116 while off + s <= l:
117 cur = data[off:off + s]
117 cur = data[off:off + s]
118 off += s
118 off += s
119 e = _unpack(indexformatv0, cur)
119 e = _unpack(indexformatv0, cur)
120 # transform to revlogv1 format
120 # transform to revlogv1 format
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 index.append(e2)
123 index.append(e2)
124 nodemap[e[6]] = n
124 nodemap[e[6]] = n
125 n += 1
125 n += 1
126
126
127 # add the magic null revision at -1
127 # add the magic null revision at -1
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129
129
130 return index, nodemap, None
130 return index, nodemap, None
131
131
132 def packentry(self, entry, node, version, rev):
132 def packentry(self, entry, node, version, rev):
133 if gettype(entry[0]):
133 if gettype(entry[0]):
134 raise RevlogError(_("index entry flags need RevlogNG"))
134 raise RevlogError(_("index entry flags need RevlogNG"))
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 node(entry[5]), node(entry[6]), entry[7])
136 node(entry[5]), node(entry[6]), entry[7])
137 return _pack(indexformatv0, *e2)
137 return _pack(indexformatv0, *e2)
138
138
139 # index ng:
139 # index ng:
140 # 6 bytes: offset
140 # 6 bytes: offset
141 # 2 bytes: flags
141 # 2 bytes: flags
142 # 4 bytes: compressed length
142 # 4 bytes: compressed length
143 # 4 bytes: uncompressed length
143 # 4 bytes: uncompressed length
144 # 4 bytes: base rev
144 # 4 bytes: base rev
145 # 4 bytes: link rev
145 # 4 bytes: link rev
146 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 1 rev
147 # 4 bytes: parent 2 rev
147 # 4 bytes: parent 2 rev
148 # 32 bytes: nodeid
148 # 32 bytes: nodeid
149 indexformatng = ">Qiiiiii20s12x"
149 indexformatng = ">Qiiiiii20s12x"
150 ngshaoffset = 32
150 ngshaoffset = 32
151 versionformat = ">I"
151 versionformat = ">I"
152
152
153 class revlogio(object):
153 class revlogio(object):
154 def __init__(self):
154 def __init__(self):
155 self.size = struct.calcsize(indexformatng)
155 self.size = struct.calcsize(indexformatng)
156
156
157 def parseindex(self, data, inline):
157 def parseindex(self, data, inline):
158 # call the C implementation to parse the index data
158 # call the C implementation to parse the index data
159 index, cache = parsers.parse_index2(data, inline)
159 index, cache = parsers.parse_index2(data, inline)
160 return index, getattr(index, 'nodemap', None), cache
160 return index, getattr(index, 'nodemap', None), cache
161
161
162 def packentry(self, entry, node, version, rev):
162 def packentry(self, entry, node, version, rev):
163 p = _pack(indexformatng, *entry)
163 p = _pack(indexformatng, *entry)
164 if rev == 0:
164 if rev == 0:
165 p = _pack(versionformat, version) + p[4:]
165 p = _pack(versionformat, version) + p[4:]
166 return p
166 return p
167
167
168 class revlog(object):
168 class revlog(object):
169 """
169 """
170 the underlying revision storage object
170 the underlying revision storage object
171
171
172 A revlog consists of two parts, an index and the revision data.
172 A revlog consists of two parts, an index and the revision data.
173
173
174 The index is a file with a fixed record size containing
174 The index is a file with a fixed record size containing
175 information on each revision, including its nodeid (hash), the
175 information on each revision, including its nodeid (hash), the
176 nodeids of its parents, the position and offset of its data within
176 nodeids of its parents, the position and offset of its data within
177 the data file, and the revision it's based on. Finally, each entry
177 the data file, and the revision it's based on. Finally, each entry
178 contains a linkrev entry that can serve as a pointer to external
178 contains a linkrev entry that can serve as a pointer to external
179 data.
179 data.
180
180
181 The revision data itself is a linear collection of data chunks.
181 The revision data itself is a linear collection of data chunks.
182 Each chunk represents a revision and is usually represented as a
182 Each chunk represents a revision and is usually represented as a
183 delta against the previous chunk. To bound lookup time, runs of
183 delta against the previous chunk. To bound lookup time, runs of
184 deltas are limited to about 2 times the length of the original
184 deltas are limited to about 2 times the length of the original
185 version data. This makes retrieval of a version proportional to
185 version data. This makes retrieval of a version proportional to
186 its size, or O(1) relative to the number of revisions.
186 its size, or O(1) relative to the number of revisions.
187
187
188 Both pieces of the revlog are written to in an append-only
188 Both pieces of the revlog are written to in an append-only
189 fashion, which means we never need to rewrite a file to insert or
189 fashion, which means we never need to rewrite a file to insert or
190 remove data, and can use some simple techniques to avoid the need
190 remove data, and can use some simple techniques to avoid the need
191 for locking while reading.
191 for locking while reading.
192 """
192 """
193 def __init__(self, opener, indexfile):
193 def __init__(self, opener, indexfile):
194 """
194 """
195 create a revlog object
195 create a revlog object
196
196
197 opener is a function that abstracts the file opening operation
197 opener is a function that abstracts the file opening operation
198 and can be used to implement COW semantics or the like.
198 and can be used to implement COW semantics or the like.
199 """
199 """
200 self.indexfile = indexfile
200 self.indexfile = indexfile
201 self.datafile = indexfile[:-2] + ".d"
201 self.datafile = indexfile[:-2] + ".d"
202 self.opener = opener
202 self.opener = opener
203 self._cache = None
203 self._cache = None
204 self._basecache = None
204 self._basecache = None
205 self._chunkcache = (0, '')
205 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536
206 self._chunkcachesize = 65536
207 self._maxchainlen = None
207 self._maxchainlen = None
208 self.index = []
208 self.index = []
209 self._pcache = {}
209 self._pcache = {}
210 self._nodecache = {nullid: nullrev}
210 self._nodecache = {nullid: nullrev}
211 self._nodepos = None
211 self._nodepos = None
212
212
213 v = REVLOG_DEFAULT_VERSION
213 v = REVLOG_DEFAULT_VERSION
214 opts = getattr(opener, 'options', None)
214 opts = getattr(opener, 'options', None)
215 if opts is not None:
215 if opts is not None:
216 if 'revlogv1' in opts:
216 if 'revlogv1' in opts:
217 if 'generaldelta' in opts:
217 if 'generaldelta' in opts:
218 v |= REVLOGGENERALDELTA
218 v |= REVLOGGENERALDELTA
219 else:
219 else:
220 v = 0
220 v = 0
221 if 'chunkcachesize' in opts:
221 if 'chunkcachesize' in opts:
222 self._chunkcachesize = opts['chunkcachesize']
222 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
223 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
224 self._maxchainlen = opts['maxchainlen']
225
225
226 if self._chunkcachesize <= 0:
226 if self._chunkcachesize <= 0:
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 'than 0') % self._chunkcachesize)
228 'than 0') % self._chunkcachesize)
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 'of 2') % self._chunkcachesize)
231 'of 2') % self._chunkcachesize)
232
232
233 i = ''
233 i = ''
234 self._initempty = True
234 self._initempty = True
235 try:
235 try:
236 f = self.opener(self.indexfile)
236 f = self.opener(self.indexfile)
237 i = f.read()
237 i = f.read()
238 f.close()
238 f.close()
239 if len(i) > 0:
239 if len(i) > 0:
240 v = struct.unpack(versionformat, i[:4])[0]
240 v = struct.unpack(versionformat, i[:4])[0]
241 self._initempty = False
241 self._initempty = False
242 except IOError, inst:
242 except IOError, inst:
243 if inst.errno != errno.ENOENT:
243 if inst.errno != errno.ENOENT:
244 raise
244 raise
245
245
246 self.version = v
246 self.version = v
247 self._inline = v & REVLOGNGINLINEDATA
247 self._inline = v & REVLOGNGINLINEDATA
248 self._generaldelta = v & REVLOGGENERALDELTA
248 self._generaldelta = v & REVLOGGENERALDELTA
249 flags = v & ~0xFFFF
249 flags = v & ~0xFFFF
250 fmt = v & 0xFFFF
250 fmt = v & 0xFFFF
251 if fmt == REVLOGV0 and flags:
251 if fmt == REVLOGV0 and flags:
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 % (self.indexfile, flags >> 16))
253 % (self.indexfile, flags >> 16))
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 % (self.indexfile, flags >> 16))
256 % (self.indexfile, flags >> 16))
257 elif fmt > REVLOGNG:
257 elif fmt > REVLOGNG:
258 raise RevlogError(_("index %s unknown format %d")
258 raise RevlogError(_("index %s unknown format %d")
259 % (self.indexfile, fmt))
259 % (self.indexfile, fmt))
260
260
261 self._io = revlogio()
261 self._io = revlogio()
262 if self.version == REVLOGV0:
262 if self.version == REVLOGV0:
263 self._io = revlogoldio()
263 self._io = revlogoldio()
264 try:
264 try:
265 d = self._io.parseindex(i, self._inline)
265 d = self._io.parseindex(i, self._inline)
266 except (ValueError, IndexError):
266 except (ValueError, IndexError):
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 self.index, nodemap, self._chunkcache = d
268 self.index, nodemap, self._chunkcache = d
269 if nodemap is not None:
269 if nodemap is not None:
270 self.nodemap = self._nodecache = nodemap
270 self.nodemap = self._nodecache = nodemap
271 if not self._chunkcache:
271 if not self._chunkcache:
272 self._chunkclear()
272 self._chunkclear()
273 # revnum -> (chain-length, sum-delta-length)
274 self._chaininfocache = {}
273
275
274 def tip(self):
276 def tip(self):
275 return self.node(len(self.index) - 2)
277 return self.node(len(self.index) - 2)
276 def __len__(self):
278 def __len__(self):
277 return len(self.index) - 1
279 return len(self.index) - 1
278 def __iter__(self):
280 def __iter__(self):
279 return iter(xrange(len(self)))
281 return iter(xrange(len(self)))
280 def revs(self, start=0, stop=None):
282 def revs(self, start=0, stop=None):
281 """iterate over all rev in this revlog (from start to stop)"""
283 """iterate over all rev in this revlog (from start to stop)"""
282 step = 1
284 step = 1
283 if stop is not None:
285 if stop is not None:
284 if start > stop:
286 if start > stop:
285 step = -1
287 step = -1
286 stop += step
288 stop += step
287 else:
289 else:
288 stop = len(self)
290 stop = len(self)
289 return xrange(start, stop, step)
291 return xrange(start, stop, step)
290
292
291 @util.propertycache
293 @util.propertycache
292 def nodemap(self):
294 def nodemap(self):
293 self.rev(self.node(0))
295 self.rev(self.node(0))
294 return self._nodecache
296 return self._nodecache
295
297
296 def hasnode(self, node):
298 def hasnode(self, node):
297 try:
299 try:
298 self.rev(node)
300 self.rev(node)
299 return True
301 return True
300 except KeyError:
302 except KeyError:
301 return False
303 return False
302
304
303 def clearcaches(self):
305 def clearcaches(self):
304 try:
306 try:
305 self._nodecache.clearcaches()
307 self._nodecache.clearcaches()
306 except AttributeError:
308 except AttributeError:
307 self._nodecache = {nullid: nullrev}
309 self._nodecache = {nullid: nullrev}
308 self._nodepos = None
310 self._nodepos = None
309
311
310 def rev(self, node):
312 def rev(self, node):
311 try:
313 try:
312 return self._nodecache[node]
314 return self._nodecache[node]
313 except TypeError:
315 except TypeError:
314 raise
316 raise
315 except RevlogError:
317 except RevlogError:
316 # parsers.c radix tree lookup failed
318 # parsers.c radix tree lookup failed
317 raise LookupError(node, self.indexfile, _('no node'))
319 raise LookupError(node, self.indexfile, _('no node'))
318 except KeyError:
320 except KeyError:
319 # pure python cache lookup failed
321 # pure python cache lookup failed
320 n = self._nodecache
322 n = self._nodecache
321 i = self.index
323 i = self.index
322 p = self._nodepos
324 p = self._nodepos
323 if p is None:
325 if p is None:
324 p = len(i) - 2
326 p = len(i) - 2
325 for r in xrange(p, -1, -1):
327 for r in xrange(p, -1, -1):
326 v = i[r][7]
328 v = i[r][7]
327 n[v] = r
329 n[v] = r
328 if v == node:
330 if v == node:
329 self._nodepos = r - 1
331 self._nodepos = r - 1
330 return r
332 return r
331 raise LookupError(node, self.indexfile, _('no node'))
333 raise LookupError(node, self.indexfile, _('no node'))
332
334
333 def node(self, rev):
335 def node(self, rev):
334 return self.index[rev][7]
336 return self.index[rev][7]
335 def linkrev(self, rev):
337 def linkrev(self, rev):
336 return self.index[rev][4]
338 return self.index[rev][4]
337 def parents(self, node):
339 def parents(self, node):
338 i = self.index
340 i = self.index
339 d = i[self.rev(node)]
341 d = i[self.rev(node)]
340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
342 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 def parentrevs(self, rev):
343 def parentrevs(self, rev):
342 return self.index[rev][5:7]
344 return self.index[rev][5:7]
343 def start(self, rev):
345 def start(self, rev):
344 return int(self.index[rev][0] >> 16)
346 return int(self.index[rev][0] >> 16)
345 def end(self, rev):
347 def end(self, rev):
346 return self.start(rev) + self.length(rev)
348 return self.start(rev) + self.length(rev)
347 def length(self, rev):
349 def length(self, rev):
348 return self.index[rev][1]
350 return self.index[rev][1]
349 def chainbase(self, rev):
351 def chainbase(self, rev):
350 index = self.index
352 index = self.index
351 base = index[rev][3]
353 base = index[rev][3]
352 while base != rev:
354 while base != rev:
353 rev = base
355 rev = base
354 base = index[rev][3]
356 base = index[rev][3]
355 return base
357 return base
356 def chainlen(self, rev):
358 def chainlen(self, rev):
357 return self._chaininfo(rev)[0]
359 return self._chaininfo(rev)[0]
360
358 def _chaininfo(self, rev):
361 def _chaininfo(self, rev):
362 chaininfocache = self._chaininfocache
363 if rev in chaininfocache:
364 return chaininfocache[rev]
359 index = self.index
365 index = self.index
360 generaldelta = self._generaldelta
366 generaldelta = self._generaldelta
361 iterrev = rev
367 iterrev = rev
362 e = index[iterrev]
368 e = index[iterrev]
363 clen = 0
369 clen = 0
364 compresseddeltalen = 0
370 compresseddeltalen = 0
365 while iterrev != e[3]:
371 while iterrev != e[3]:
366 clen += 1
372 clen += 1
367 compresseddeltalen += e[1]
373 compresseddeltalen += e[1]
368 if generaldelta:
374 if generaldelta:
369 iterrev = e[3]
375 iterrev = e[3]
370 else:
376 else:
371 iterrev -= 1
377 iterrev -= 1
378 if iterrev in chaininfocache:
379 t = chaininfocache[iterrev]
380 clen += t[0]
381 compresseddeltalen += t[1]
382 break
372 e = index[iterrev]
383 e = index[iterrev]
373 # add text length of base since decompressing that also takes work
384 else:
374 compresseddeltalen += e[1]
385 # Add text length of base since decompressing that also takes
375 return clen, compresseddeltalen
386 # work. For cache hits the length is already included.
387 compresseddeltalen += e[1]
388 r = (clen, compresseddeltalen)
389 chaininfocache[rev] = r
390 return r
391
376 def flags(self, rev):
392 def flags(self, rev):
377 return self.index[rev][0] & 0xFFFF
393 return self.index[rev][0] & 0xFFFF
378 def rawsize(self, rev):
394 def rawsize(self, rev):
379 """return the length of the uncompressed text for a given revision"""
395 """return the length of the uncompressed text for a given revision"""
380 l = self.index[rev][2]
396 l = self.index[rev][2]
381 if l >= 0:
397 if l >= 0:
382 return l
398 return l
383
399
384 t = self.revision(self.node(rev))
400 t = self.revision(self.node(rev))
385 return len(t)
401 return len(t)
386 size = rawsize
402 size = rawsize
387
403
388 def ancestors(self, revs, stoprev=0, inclusive=False):
404 def ancestors(self, revs, stoprev=0, inclusive=False):
389 """Generate the ancestors of 'revs' in reverse topological order.
405 """Generate the ancestors of 'revs' in reverse topological order.
390 Does not generate revs lower than stoprev.
406 Does not generate revs lower than stoprev.
391
407
392 See the documentation for ancestor.lazyancestors for more details."""
408 See the documentation for ancestor.lazyancestors for more details."""
393
409
394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
410 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
395 inclusive=inclusive)
411 inclusive=inclusive)
396
412
397 def descendants(self, revs):
413 def descendants(self, revs):
398 """Generate the descendants of 'revs' in revision order.
414 """Generate the descendants of 'revs' in revision order.
399
415
400 Yield a sequence of revision numbers starting with a child of
416 Yield a sequence of revision numbers starting with a child of
401 some rev in revs, i.e., each revision is *not* considered a
417 some rev in revs, i.e., each revision is *not* considered a
402 descendant of itself. Results are ordered by revision number (a
418 descendant of itself. Results are ordered by revision number (a
403 topological sort)."""
419 topological sort)."""
404 first = min(revs)
420 first = min(revs)
405 if first == nullrev:
421 if first == nullrev:
406 for i in self:
422 for i in self:
407 yield i
423 yield i
408 return
424 return
409
425
410 seen = set(revs)
426 seen = set(revs)
411 for i in self.revs(start=first + 1):
427 for i in self.revs(start=first + 1):
412 for x in self.parentrevs(i):
428 for x in self.parentrevs(i):
413 if x != nullrev and x in seen:
429 if x != nullrev and x in seen:
414 seen.add(i)
430 seen.add(i)
415 yield i
431 yield i
416 break
432 break
417
433
418 def findcommonmissing(self, common=None, heads=None):
434 def findcommonmissing(self, common=None, heads=None):
419 """Return a tuple of the ancestors of common and the ancestors of heads
435 """Return a tuple of the ancestors of common and the ancestors of heads
420 that are not ancestors of common. In revset terminology, we return the
436 that are not ancestors of common. In revset terminology, we return the
421 tuple:
437 tuple:
422
438
423 ::common, (::heads) - (::common)
439 ::common, (::heads) - (::common)
424
440
425 The list is sorted by revision number, meaning it is
441 The list is sorted by revision number, meaning it is
426 topologically sorted.
442 topologically sorted.
427
443
428 'heads' and 'common' are both lists of node IDs. If heads is
444 'heads' and 'common' are both lists of node IDs. If heads is
429 not supplied, uses all of the revlog's heads. If common is not
445 not supplied, uses all of the revlog's heads. If common is not
430 supplied, uses nullid."""
446 supplied, uses nullid."""
431 if common is None:
447 if common is None:
432 common = [nullid]
448 common = [nullid]
433 if heads is None:
449 if heads is None:
434 heads = self.heads()
450 heads = self.heads()
435
451
436 common = [self.rev(n) for n in common]
452 common = [self.rev(n) for n in common]
437 heads = [self.rev(n) for n in heads]
453 heads = [self.rev(n) for n in heads]
438
454
439 # we want the ancestors, but inclusive
455 # we want the ancestors, but inclusive
440 class lazyset(object):
456 class lazyset(object):
441 def __init__(self, lazyvalues):
457 def __init__(self, lazyvalues):
442 self.addedvalues = set()
458 self.addedvalues = set()
443 self.lazyvalues = lazyvalues
459 self.lazyvalues = lazyvalues
444
460
445 def __contains__(self, value):
461 def __contains__(self, value):
446 return value in self.addedvalues or value in self.lazyvalues
462 return value in self.addedvalues or value in self.lazyvalues
447
463
448 def __iter__(self):
464 def __iter__(self):
449 added = self.addedvalues
465 added = self.addedvalues
450 for r in added:
466 for r in added:
451 yield r
467 yield r
452 for r in self.lazyvalues:
468 for r in self.lazyvalues:
453 if not r in added:
469 if not r in added:
454 yield r
470 yield r
455
471
456 def add(self, value):
472 def add(self, value):
457 self.addedvalues.add(value)
473 self.addedvalues.add(value)
458
474
459 def update(self, values):
475 def update(self, values):
460 self.addedvalues.update(values)
476 self.addedvalues.update(values)
461
477
462 has = lazyset(self.ancestors(common))
478 has = lazyset(self.ancestors(common))
463 has.add(nullrev)
479 has.add(nullrev)
464 has.update(common)
480 has.update(common)
465
481
466 # take all ancestors from heads that aren't in has
482 # take all ancestors from heads that aren't in has
467 missing = set()
483 missing = set()
468 visit = util.deque(r for r in heads if r not in has)
484 visit = util.deque(r for r in heads if r not in has)
469 while visit:
485 while visit:
470 r = visit.popleft()
486 r = visit.popleft()
471 if r in missing:
487 if r in missing:
472 continue
488 continue
473 else:
489 else:
474 missing.add(r)
490 missing.add(r)
475 for p in self.parentrevs(r):
491 for p in self.parentrevs(r):
476 if p not in has:
492 if p not in has:
477 visit.append(p)
493 visit.append(p)
478 missing = list(missing)
494 missing = list(missing)
479 missing.sort()
495 missing.sort()
480 return has, [self.node(r) for r in missing]
496 return has, [self.node(r) for r in missing]
481
497
482 def findmissingrevs(self, common=None, heads=None):
498 def findmissingrevs(self, common=None, heads=None):
483 """Return the revision numbers of the ancestors of heads that
499 """Return the revision numbers of the ancestors of heads that
484 are not ancestors of common.
500 are not ancestors of common.
485
501
486 More specifically, return a list of revision numbers corresponding to
502 More specifically, return a list of revision numbers corresponding to
487 nodes N such that every N satisfies the following constraints:
503 nodes N such that every N satisfies the following constraints:
488
504
489 1. N is an ancestor of some node in 'heads'
505 1. N is an ancestor of some node in 'heads'
490 2. N is not an ancestor of any node in 'common'
506 2. N is not an ancestor of any node in 'common'
491
507
492 The list is sorted by revision number, meaning it is
508 The list is sorted by revision number, meaning it is
493 topologically sorted.
509 topologically sorted.
494
510
495 'heads' and 'common' are both lists of revision numbers. If heads is
511 'heads' and 'common' are both lists of revision numbers. If heads is
496 not supplied, uses all of the revlog's heads. If common is not
512 not supplied, uses all of the revlog's heads. If common is not
497 supplied, uses nullid."""
513 supplied, uses nullid."""
498 if common is None:
514 if common is None:
499 common = [nullrev]
515 common = [nullrev]
500 if heads is None:
516 if heads is None:
501 heads = self.headrevs()
517 heads = self.headrevs()
502
518
503 return ancestor.missingancestors(heads, common, self.parentrevs)
519 return ancestor.missingancestors(heads, common, self.parentrevs)
504
520
505 def findmissing(self, common=None, heads=None):
521 def findmissing(self, common=None, heads=None):
506 """Return the ancestors of heads that are not ancestors of common.
522 """Return the ancestors of heads that are not ancestors of common.
507
523
508 More specifically, return a list of nodes N such that every N
524 More specifically, return a list of nodes N such that every N
509 satisfies the following constraints:
525 satisfies the following constraints:
510
526
511 1. N is an ancestor of some node in 'heads'
527 1. N is an ancestor of some node in 'heads'
512 2. N is not an ancestor of any node in 'common'
528 2. N is not an ancestor of any node in 'common'
513
529
514 The list is sorted by revision number, meaning it is
530 The list is sorted by revision number, meaning it is
515 topologically sorted.
531 topologically sorted.
516
532
517 'heads' and 'common' are both lists of node IDs. If heads is
533 'heads' and 'common' are both lists of node IDs. If heads is
518 not supplied, uses all of the revlog's heads. If common is not
534 not supplied, uses all of the revlog's heads. If common is not
519 supplied, uses nullid."""
535 supplied, uses nullid."""
520 if common is None:
536 if common is None:
521 common = [nullid]
537 common = [nullid]
522 if heads is None:
538 if heads is None:
523 heads = self.heads()
539 heads = self.heads()
524
540
525 common = [self.rev(n) for n in common]
541 common = [self.rev(n) for n in common]
526 heads = [self.rev(n) for n in heads]
542 heads = [self.rev(n) for n in heads]
527
543
528 return [self.node(r) for r in
544 return [self.node(r) for r in
529 ancestor.missingancestors(heads, common, self.parentrevs)]
545 ancestor.missingancestors(heads, common, self.parentrevs)]
530
546
531 def nodesbetween(self, roots=None, heads=None):
547 def nodesbetween(self, roots=None, heads=None):
532 """Return a topological path from 'roots' to 'heads'.
548 """Return a topological path from 'roots' to 'heads'.
533
549
534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
550 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
535 topologically sorted list of all nodes N that satisfy both of
551 topologically sorted list of all nodes N that satisfy both of
536 these constraints:
552 these constraints:
537
553
538 1. N is a descendant of some node in 'roots'
554 1. N is a descendant of some node in 'roots'
539 2. N is an ancestor of some node in 'heads'
555 2. N is an ancestor of some node in 'heads'
540
556
541 Every node is considered to be both a descendant and an ancestor
557 Every node is considered to be both a descendant and an ancestor
542 of itself, so every reachable node in 'roots' and 'heads' will be
558 of itself, so every reachable node in 'roots' and 'heads' will be
543 included in 'nodes'.
559 included in 'nodes'.
544
560
545 'outroots' is the list of reachable nodes in 'roots', i.e., the
561 'outroots' is the list of reachable nodes in 'roots', i.e., the
546 subset of 'roots' that is returned in 'nodes'. Likewise,
562 subset of 'roots' that is returned in 'nodes'. Likewise,
547 'outheads' is the subset of 'heads' that is also in 'nodes'.
563 'outheads' is the subset of 'heads' that is also in 'nodes'.
548
564
549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
565 'roots' and 'heads' are both lists of node IDs. If 'roots' is
550 unspecified, uses nullid as the only root. If 'heads' is
566 unspecified, uses nullid as the only root. If 'heads' is
551 unspecified, uses list of all of the revlog's heads."""
567 unspecified, uses list of all of the revlog's heads."""
552 nonodes = ([], [], [])
568 nonodes = ([], [], [])
553 if roots is not None:
569 if roots is not None:
554 roots = list(roots)
570 roots = list(roots)
555 if not roots:
571 if not roots:
556 return nonodes
572 return nonodes
557 lowestrev = min([self.rev(n) for n in roots])
573 lowestrev = min([self.rev(n) for n in roots])
558 else:
574 else:
559 roots = [nullid] # Everybody's a descendant of nullid
575 roots = [nullid] # Everybody's a descendant of nullid
560 lowestrev = nullrev
576 lowestrev = nullrev
561 if (lowestrev == nullrev) and (heads is None):
577 if (lowestrev == nullrev) and (heads is None):
562 # We want _all_ the nodes!
578 # We want _all_ the nodes!
563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
579 return ([self.node(r) for r in self], [nullid], list(self.heads()))
564 if heads is None:
580 if heads is None:
565 # All nodes are ancestors, so the latest ancestor is the last
581 # All nodes are ancestors, so the latest ancestor is the last
566 # node.
582 # node.
567 highestrev = len(self) - 1
583 highestrev = len(self) - 1
568 # Set ancestors to None to signal that every node is an ancestor.
584 # Set ancestors to None to signal that every node is an ancestor.
569 ancestors = None
585 ancestors = None
570 # Set heads to an empty dictionary for later discovery of heads
586 # Set heads to an empty dictionary for later discovery of heads
571 heads = {}
587 heads = {}
572 else:
588 else:
573 heads = list(heads)
589 heads = list(heads)
574 if not heads:
590 if not heads:
575 return nonodes
591 return nonodes
576 ancestors = set()
592 ancestors = set()
577 # Turn heads into a dictionary so we can remove 'fake' heads.
593 # Turn heads into a dictionary so we can remove 'fake' heads.
578 # Also, later we will be using it to filter out the heads we can't
594 # Also, later we will be using it to filter out the heads we can't
579 # find from roots.
595 # find from roots.
580 heads = dict.fromkeys(heads, False)
596 heads = dict.fromkeys(heads, False)
581 # Start at the top and keep marking parents until we're done.
597 # Start at the top and keep marking parents until we're done.
582 nodestotag = set(heads)
598 nodestotag = set(heads)
583 # Remember where the top was so we can use it as a limit later.
599 # Remember where the top was so we can use it as a limit later.
584 highestrev = max([self.rev(n) for n in nodestotag])
600 highestrev = max([self.rev(n) for n in nodestotag])
585 while nodestotag:
601 while nodestotag:
586 # grab a node to tag
602 # grab a node to tag
587 n = nodestotag.pop()
603 n = nodestotag.pop()
588 # Never tag nullid
604 # Never tag nullid
589 if n == nullid:
605 if n == nullid:
590 continue
606 continue
591 # A node's revision number represents its place in a
607 # A node's revision number represents its place in a
592 # topologically sorted list of nodes.
608 # topologically sorted list of nodes.
593 r = self.rev(n)
609 r = self.rev(n)
594 if r >= lowestrev:
610 if r >= lowestrev:
595 if n not in ancestors:
611 if n not in ancestors:
596 # If we are possibly a descendant of one of the roots
612 # If we are possibly a descendant of one of the roots
597 # and we haven't already been marked as an ancestor
613 # and we haven't already been marked as an ancestor
598 ancestors.add(n) # Mark as ancestor
614 ancestors.add(n) # Mark as ancestor
599 # Add non-nullid parents to list of nodes to tag.
615 # Add non-nullid parents to list of nodes to tag.
600 nodestotag.update([p for p in self.parents(n) if
616 nodestotag.update([p for p in self.parents(n) if
601 p != nullid])
617 p != nullid])
602 elif n in heads: # We've seen it before, is it a fake head?
618 elif n in heads: # We've seen it before, is it a fake head?
603 # So it is, real heads should not be the ancestors of
619 # So it is, real heads should not be the ancestors of
604 # any other heads.
620 # any other heads.
605 heads.pop(n)
621 heads.pop(n)
606 if not ancestors:
622 if not ancestors:
607 return nonodes
623 return nonodes
608 # Now that we have our set of ancestors, we want to remove any
624 # Now that we have our set of ancestors, we want to remove any
609 # roots that are not ancestors.
625 # roots that are not ancestors.
610
626
611 # If one of the roots was nullid, everything is included anyway.
627 # If one of the roots was nullid, everything is included anyway.
612 if lowestrev > nullrev:
628 if lowestrev > nullrev:
613 # But, since we weren't, let's recompute the lowest rev to not
629 # But, since we weren't, let's recompute the lowest rev to not
614 # include roots that aren't ancestors.
630 # include roots that aren't ancestors.
615
631
616 # Filter out roots that aren't ancestors of heads
632 # Filter out roots that aren't ancestors of heads
617 roots = [n for n in roots if n in ancestors]
633 roots = [n for n in roots if n in ancestors]
618 # Recompute the lowest revision
634 # Recompute the lowest revision
619 if roots:
635 if roots:
620 lowestrev = min([self.rev(n) for n in roots])
636 lowestrev = min([self.rev(n) for n in roots])
621 else:
637 else:
622 # No more roots? Return empty list
638 # No more roots? Return empty list
623 return nonodes
639 return nonodes
624 else:
640 else:
625 # We are descending from nullid, and don't need to care about
641 # We are descending from nullid, and don't need to care about
626 # any other roots.
642 # any other roots.
627 lowestrev = nullrev
643 lowestrev = nullrev
628 roots = [nullid]
644 roots = [nullid]
629 # Transform our roots list into a set.
645 # Transform our roots list into a set.
630 descendants = set(roots)
646 descendants = set(roots)
631 # Also, keep the original roots so we can filter out roots that aren't
647 # Also, keep the original roots so we can filter out roots that aren't
632 # 'real' roots (i.e. are descended from other roots).
648 # 'real' roots (i.e. are descended from other roots).
633 roots = descendants.copy()
649 roots = descendants.copy()
634 # Our topologically sorted list of output nodes.
650 # Our topologically sorted list of output nodes.
635 orderedout = []
651 orderedout = []
636 # Don't start at nullid since we don't want nullid in our output list,
652 # Don't start at nullid since we don't want nullid in our output list,
637 # and if nullid shows up in descendants, empty parents will look like
653 # and if nullid shows up in descendants, empty parents will look like
638 # they're descendants.
654 # they're descendants.
639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
655 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
640 n = self.node(r)
656 n = self.node(r)
641 isdescendant = False
657 isdescendant = False
642 if lowestrev == nullrev: # Everybody is a descendant of nullid
658 if lowestrev == nullrev: # Everybody is a descendant of nullid
643 isdescendant = True
659 isdescendant = True
644 elif n in descendants:
660 elif n in descendants:
645 # n is already a descendant
661 # n is already a descendant
646 isdescendant = True
662 isdescendant = True
647 # This check only needs to be done here because all the roots
663 # This check only needs to be done here because all the roots
648 # will start being marked is descendants before the loop.
664 # will start being marked is descendants before the loop.
649 if n in roots:
665 if n in roots:
650 # If n was a root, check if it's a 'real' root.
666 # If n was a root, check if it's a 'real' root.
651 p = tuple(self.parents(n))
667 p = tuple(self.parents(n))
652 # If any of its parents are descendants, it's not a root.
668 # If any of its parents are descendants, it's not a root.
653 if (p[0] in descendants) or (p[1] in descendants):
669 if (p[0] in descendants) or (p[1] in descendants):
654 roots.remove(n)
670 roots.remove(n)
655 else:
671 else:
656 p = tuple(self.parents(n))
672 p = tuple(self.parents(n))
657 # A node is a descendant if either of its parents are
673 # A node is a descendant if either of its parents are
658 # descendants. (We seeded the dependents list with the roots
674 # descendants. (We seeded the dependents list with the roots
659 # up there, remember?)
675 # up there, remember?)
660 if (p[0] in descendants) or (p[1] in descendants):
676 if (p[0] in descendants) or (p[1] in descendants):
661 descendants.add(n)
677 descendants.add(n)
662 isdescendant = True
678 isdescendant = True
663 if isdescendant and ((ancestors is None) or (n in ancestors)):
679 if isdescendant and ((ancestors is None) or (n in ancestors)):
664 # Only include nodes that are both descendants and ancestors.
680 # Only include nodes that are both descendants and ancestors.
665 orderedout.append(n)
681 orderedout.append(n)
666 if (ancestors is not None) and (n in heads):
682 if (ancestors is not None) and (n in heads):
667 # We're trying to figure out which heads are reachable
683 # We're trying to figure out which heads are reachable
668 # from roots.
684 # from roots.
669 # Mark this head as having been reached
685 # Mark this head as having been reached
670 heads[n] = True
686 heads[n] = True
671 elif ancestors is None:
687 elif ancestors is None:
672 # Otherwise, we're trying to discover the heads.
688 # Otherwise, we're trying to discover the heads.
673 # Assume this is a head because if it isn't, the next step
689 # Assume this is a head because if it isn't, the next step
674 # will eventually remove it.
690 # will eventually remove it.
675 heads[n] = True
691 heads[n] = True
676 # But, obviously its parents aren't.
692 # But, obviously its parents aren't.
677 for p in self.parents(n):
693 for p in self.parents(n):
678 heads.pop(p, None)
694 heads.pop(p, None)
679 heads = [n for n, flag in heads.iteritems() if flag]
695 heads = [n for n, flag in heads.iteritems() if flag]
680 roots = list(roots)
696 roots = list(roots)
681 assert orderedout
697 assert orderedout
682 assert roots
698 assert roots
683 assert heads
699 assert heads
684 return (orderedout, roots, heads)
700 return (orderedout, roots, heads)
685
701
686 def headrevs(self):
702 def headrevs(self):
687 try:
703 try:
688 return self.index.headrevs()
704 return self.index.headrevs()
689 except AttributeError:
705 except AttributeError:
690 return self._headrevs()
706 return self._headrevs()
691
707
692 def _headrevs(self):
708 def _headrevs(self):
693 count = len(self)
709 count = len(self)
694 if not count:
710 if not count:
695 return [nullrev]
711 return [nullrev]
696 # we won't iter over filtered rev so nobody is a head at start
712 # we won't iter over filtered rev so nobody is a head at start
697 ishead = [0] * (count + 1)
713 ishead = [0] * (count + 1)
698 index = self.index
714 index = self.index
699 for r in self:
715 for r in self:
700 ishead[r] = 1 # I may be an head
716 ishead[r] = 1 # I may be an head
701 e = index[r]
717 e = index[r]
702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
718 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
703 return [r for r, val in enumerate(ishead) if val]
719 return [r for r, val in enumerate(ishead) if val]
704
720
705 def heads(self, start=None, stop=None):
721 def heads(self, start=None, stop=None):
706 """return the list of all nodes that have no children
722 """return the list of all nodes that have no children
707
723
708 if start is specified, only heads that are descendants of
724 if start is specified, only heads that are descendants of
709 start will be returned
725 start will be returned
710 if stop is specified, it will consider all the revs from stop
726 if stop is specified, it will consider all the revs from stop
711 as if they had no children
727 as if they had no children
712 """
728 """
713 if start is None and stop is None:
729 if start is None and stop is None:
714 if not len(self):
730 if not len(self):
715 return [nullid]
731 return [nullid]
716 return [self.node(r) for r in self.headrevs()]
732 return [self.node(r) for r in self.headrevs()]
717
733
718 if start is None:
734 if start is None:
719 start = nullid
735 start = nullid
720 if stop is None:
736 if stop is None:
721 stop = []
737 stop = []
722 stoprevs = set([self.rev(n) for n in stop])
738 stoprevs = set([self.rev(n) for n in stop])
723 startrev = self.rev(start)
739 startrev = self.rev(start)
724 reachable = set((startrev,))
740 reachable = set((startrev,))
725 heads = set((startrev,))
741 heads = set((startrev,))
726
742
727 parentrevs = self.parentrevs
743 parentrevs = self.parentrevs
728 for r in self.revs(start=startrev + 1):
744 for r in self.revs(start=startrev + 1):
729 for p in parentrevs(r):
745 for p in parentrevs(r):
730 if p in reachable:
746 if p in reachable:
731 if r not in stoprevs:
747 if r not in stoprevs:
732 reachable.add(r)
748 reachable.add(r)
733 heads.add(r)
749 heads.add(r)
734 if p in heads and p not in stoprevs:
750 if p in heads and p not in stoprevs:
735 heads.remove(p)
751 heads.remove(p)
736
752
737 return [self.node(r) for r in heads]
753 return [self.node(r) for r in heads]
738
754
739 def children(self, node):
755 def children(self, node):
740 """find the children of a given node"""
756 """find the children of a given node"""
741 c = []
757 c = []
742 p = self.rev(node)
758 p = self.rev(node)
743 for r in self.revs(start=p + 1):
759 for r in self.revs(start=p + 1):
744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
760 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
745 if prevs:
761 if prevs:
746 for pr in prevs:
762 for pr in prevs:
747 if pr == p:
763 if pr == p:
748 c.append(self.node(r))
764 c.append(self.node(r))
749 elif p == nullrev:
765 elif p == nullrev:
750 c.append(self.node(r))
766 c.append(self.node(r))
751 return c
767 return c
752
768
753 def descendant(self, start, end):
769 def descendant(self, start, end):
754 if start == nullrev:
770 if start == nullrev:
755 return True
771 return True
756 for i in self.descendants([start]):
772 for i in self.descendants([start]):
757 if i == end:
773 if i == end:
758 return True
774 return True
759 elif i > end:
775 elif i > end:
760 break
776 break
761 return False
777 return False
762
778
763 def commonancestorsheads(self, a, b):
779 def commonancestorsheads(self, a, b):
764 """calculate all the heads of the common ancestors of nodes a and b"""
780 """calculate all the heads of the common ancestors of nodes a and b"""
765 a, b = self.rev(a), self.rev(b)
781 a, b = self.rev(a), self.rev(b)
766 try:
782 try:
767 ancs = self.index.commonancestorsheads(a, b)
783 ancs = self.index.commonancestorsheads(a, b)
768 except (AttributeError, OverflowError): # C implementation failed
784 except (AttributeError, OverflowError): # C implementation failed
769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
785 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
770 return map(self.node, ancs)
786 return map(self.node, ancs)
771
787
772 def isancestor(self, a, b):
788 def isancestor(self, a, b):
773 """return True if node a is an ancestor of node b
789 """return True if node a is an ancestor of node b
774
790
775 The implementation of this is trivial but the use of
791 The implementation of this is trivial but the use of
776 commonancestorsheads is not."""
792 commonancestorsheads is not."""
777 return a in self.commonancestorsheads(a, b)
793 return a in self.commonancestorsheads(a, b)
778
794
779 def ancestor(self, a, b):
795 def ancestor(self, a, b):
780 """calculate the "best" common ancestor of nodes a and b"""
796 """calculate the "best" common ancestor of nodes a and b"""
781
797
782 a, b = self.rev(a), self.rev(b)
798 a, b = self.rev(a), self.rev(b)
783 try:
799 try:
784 ancs = self.index.ancestors(a, b)
800 ancs = self.index.ancestors(a, b)
785 except (AttributeError, OverflowError):
801 except (AttributeError, OverflowError):
786 ancs = ancestor.ancestors(self.parentrevs, a, b)
802 ancs = ancestor.ancestors(self.parentrevs, a, b)
787 if ancs:
803 if ancs:
788 # choose a consistent winner when there's a tie
804 # choose a consistent winner when there's a tie
789 return min(map(self.node, ancs))
805 return min(map(self.node, ancs))
790 return nullid
806 return nullid
791
807
792 def _match(self, id):
808 def _match(self, id):
793 if isinstance(id, int):
809 if isinstance(id, int):
794 # rev
810 # rev
795 return self.node(id)
811 return self.node(id)
796 if len(id) == 20:
812 if len(id) == 20:
797 # possibly a binary node
813 # possibly a binary node
798 # odds of a binary node being all hex in ASCII are 1 in 10**25
814 # odds of a binary node being all hex in ASCII are 1 in 10**25
799 try:
815 try:
800 node = id
816 node = id
801 self.rev(node) # quick search the index
817 self.rev(node) # quick search the index
802 return node
818 return node
803 except LookupError:
819 except LookupError:
804 pass # may be partial hex id
820 pass # may be partial hex id
805 try:
821 try:
806 # str(rev)
822 # str(rev)
807 rev = int(id)
823 rev = int(id)
808 if str(rev) != id:
824 if str(rev) != id:
809 raise ValueError
825 raise ValueError
810 if rev < 0:
826 if rev < 0:
811 rev = len(self) + rev
827 rev = len(self) + rev
812 if rev < 0 or rev >= len(self):
828 if rev < 0 or rev >= len(self):
813 raise ValueError
829 raise ValueError
814 return self.node(rev)
830 return self.node(rev)
815 except (ValueError, OverflowError):
831 except (ValueError, OverflowError):
816 pass
832 pass
817 if len(id) == 40:
833 if len(id) == 40:
818 try:
834 try:
819 # a full hex nodeid?
835 # a full hex nodeid?
820 node = bin(id)
836 node = bin(id)
821 self.rev(node)
837 self.rev(node)
822 return node
838 return node
823 except (TypeError, LookupError):
839 except (TypeError, LookupError):
824 pass
840 pass
825
841
826 def _partialmatch(self, id):
842 def _partialmatch(self, id):
827 try:
843 try:
828 n = self.index.partialmatch(id)
844 n = self.index.partialmatch(id)
829 if n and self.hasnode(n):
845 if n and self.hasnode(n):
830 return n
846 return n
831 return None
847 return None
832 except RevlogError:
848 except RevlogError:
833 # parsers.c radix tree lookup gave multiple matches
849 # parsers.c radix tree lookup gave multiple matches
834 # fall through to slow path that filters hidden revisions
850 # fall through to slow path that filters hidden revisions
835 pass
851 pass
836 except (AttributeError, ValueError):
852 except (AttributeError, ValueError):
837 # we are pure python, or key was too short to search radix tree
853 # we are pure python, or key was too short to search radix tree
838 pass
854 pass
839
855
840 if id in self._pcache:
856 if id in self._pcache:
841 return self._pcache[id]
857 return self._pcache[id]
842
858
843 if len(id) < 40:
859 if len(id) < 40:
844 try:
860 try:
845 # hex(node)[:...]
861 # hex(node)[:...]
846 l = len(id) // 2 # grab an even number of digits
862 l = len(id) // 2 # grab an even number of digits
847 prefix = bin(id[:l * 2])
863 prefix = bin(id[:l * 2])
848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
864 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
849 nl = [n for n in nl if hex(n).startswith(id) and
865 nl = [n for n in nl if hex(n).startswith(id) and
850 self.hasnode(n)]
866 self.hasnode(n)]
851 if len(nl) > 0:
867 if len(nl) > 0:
852 if len(nl) == 1:
868 if len(nl) == 1:
853 self._pcache[id] = nl[0]
869 self._pcache[id] = nl[0]
854 return nl[0]
870 return nl[0]
855 raise LookupError(id, self.indexfile,
871 raise LookupError(id, self.indexfile,
856 _('ambiguous identifier'))
872 _('ambiguous identifier'))
857 return None
873 return None
858 except TypeError:
874 except TypeError:
859 pass
875 pass
860
876
861 def lookup(self, id):
877 def lookup(self, id):
862 """locate a node based on:
878 """locate a node based on:
863 - revision number or str(revision number)
879 - revision number or str(revision number)
864 - nodeid or subset of hex nodeid
880 - nodeid or subset of hex nodeid
865 """
881 """
866 n = self._match(id)
882 n = self._match(id)
867 if n is not None:
883 if n is not None:
868 return n
884 return n
869 n = self._partialmatch(id)
885 n = self._partialmatch(id)
870 if n:
886 if n:
871 return n
887 return n
872
888
873 raise LookupError(id, self.indexfile, _('no match found'))
889 raise LookupError(id, self.indexfile, _('no match found'))
874
890
875 def cmp(self, node, text):
891 def cmp(self, node, text):
876 """compare text with a given file revision
892 """compare text with a given file revision
877
893
878 returns True if text is different than what is stored.
894 returns True if text is different than what is stored.
879 """
895 """
880 p1, p2 = self.parents(node)
896 p1, p2 = self.parents(node)
881 return hash(text, p1, p2) != node
897 return hash(text, p1, p2) != node
882
898
883 def _addchunk(self, offset, data):
899 def _addchunk(self, offset, data):
884 o, d = self._chunkcache
900 o, d = self._chunkcache
885 # try to add to existing cache
901 # try to add to existing cache
886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
902 if o + len(d) == offset and len(d) + len(data) < _chunksize:
887 self._chunkcache = o, d + data
903 self._chunkcache = o, d + data
888 else:
904 else:
889 self._chunkcache = offset, data
905 self._chunkcache = offset, data
890
906
891 def _loadchunk(self, offset, length):
907 def _loadchunk(self, offset, length):
892 if self._inline:
908 if self._inline:
893 df = self.opener(self.indexfile)
909 df = self.opener(self.indexfile)
894 else:
910 else:
895 df = self.opener(self.datafile)
911 df = self.opener(self.datafile)
896
912
897 # Cache data both forward and backward around the requested
913 # Cache data both forward and backward around the requested
898 # data, in a fixed size window. This helps speed up operations
914 # data, in a fixed size window. This helps speed up operations
899 # involving reading the revlog backwards.
915 # involving reading the revlog backwards.
900 cachesize = self._chunkcachesize
916 cachesize = self._chunkcachesize
901 realoffset = offset & ~(cachesize - 1)
917 realoffset = offset & ~(cachesize - 1)
902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
918 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
903 - realoffset)
919 - realoffset)
904 df.seek(realoffset)
920 df.seek(realoffset)
905 d = df.read(reallength)
921 d = df.read(reallength)
906 df.close()
922 df.close()
907 self._addchunk(realoffset, d)
923 self._addchunk(realoffset, d)
908 if offset != realoffset or reallength != length:
924 if offset != realoffset or reallength != length:
909 return util.buffer(d, offset - realoffset, length)
925 return util.buffer(d, offset - realoffset, length)
910 return d
926 return d
911
927
912 def _getchunk(self, offset, length):
928 def _getchunk(self, offset, length):
913 o, d = self._chunkcache
929 o, d = self._chunkcache
914 l = len(d)
930 l = len(d)
915
931
916 # is it in the cache?
932 # is it in the cache?
917 cachestart = offset - o
933 cachestart = offset - o
918 cacheend = cachestart + length
934 cacheend = cachestart + length
919 if cachestart >= 0 and cacheend <= l:
935 if cachestart >= 0 and cacheend <= l:
920 if cachestart == 0 and cacheend == l:
936 if cachestart == 0 and cacheend == l:
921 return d # avoid a copy
937 return d # avoid a copy
922 return util.buffer(d, cachestart, cacheend - cachestart)
938 return util.buffer(d, cachestart, cacheend - cachestart)
923
939
924 return self._loadchunk(offset, length)
940 return self._loadchunk(offset, length)
925
941
926 def _chunkraw(self, startrev, endrev):
942 def _chunkraw(self, startrev, endrev):
927 start = self.start(startrev)
943 start = self.start(startrev)
928 end = self.end(endrev)
944 end = self.end(endrev)
929 if self._inline:
945 if self._inline:
930 start += (startrev + 1) * self._io.size
946 start += (startrev + 1) * self._io.size
931 end += (endrev + 1) * self._io.size
947 end += (endrev + 1) * self._io.size
932 length = end - start
948 length = end - start
933 return self._getchunk(start, length)
949 return self._getchunk(start, length)
934
950
935 def _chunk(self, rev):
951 def _chunk(self, rev):
936 return decompress(self._chunkraw(rev, rev))
952 return decompress(self._chunkraw(rev, rev))
937
953
938 def _chunks(self, revs):
954 def _chunks(self, revs):
939 '''faster version of [self._chunk(rev) for rev in revs]
955 '''faster version of [self._chunk(rev) for rev in revs]
940
956
941 Assumes that revs is in ascending order.'''
957 Assumes that revs is in ascending order.'''
942 if not revs:
958 if not revs:
943 return []
959 return []
944 start = self.start
960 start = self.start
945 length = self.length
961 length = self.length
946 inline = self._inline
962 inline = self._inline
947 iosize = self._io.size
963 iosize = self._io.size
948 buffer = util.buffer
964 buffer = util.buffer
949
965
950 l = []
966 l = []
951 ladd = l.append
967 ladd = l.append
952
968
953 # preload the cache
969 # preload the cache
954 try:
970 try:
955 while True:
971 while True:
956 # ensure that the cache doesn't change out from under us
972 # ensure that the cache doesn't change out from under us
957 _cache = self._chunkcache
973 _cache = self._chunkcache
958 self._chunkraw(revs[0], revs[-1])
974 self._chunkraw(revs[0], revs[-1])
959 if _cache == self._chunkcache:
975 if _cache == self._chunkcache:
960 break
976 break
961 offset, data = _cache
977 offset, data = _cache
962 except OverflowError:
978 except OverflowError:
963 # issue4215 - we can't cache a run of chunks greater than
979 # issue4215 - we can't cache a run of chunks greater than
964 # 2G on Windows
980 # 2G on Windows
965 return [self._chunk(rev) for rev in revs]
981 return [self._chunk(rev) for rev in revs]
966
982
967 for rev in revs:
983 for rev in revs:
968 chunkstart = start(rev)
984 chunkstart = start(rev)
969 if inline:
985 if inline:
970 chunkstart += (rev + 1) * iosize
986 chunkstart += (rev + 1) * iosize
971 chunklength = length(rev)
987 chunklength = length(rev)
972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
988 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
973
989
974 return l
990 return l
975
991
976 def _chunkclear(self):
992 def _chunkclear(self):
977 self._chunkcache = (0, '')
993 self._chunkcache = (0, '')
978
994
979 def deltaparent(self, rev):
995 def deltaparent(self, rev):
980 """return deltaparent of the given revision"""
996 """return deltaparent of the given revision"""
981 base = self.index[rev][3]
997 base = self.index[rev][3]
982 if base == rev:
998 if base == rev:
983 return nullrev
999 return nullrev
984 elif self._generaldelta:
1000 elif self._generaldelta:
985 return base
1001 return base
986 else:
1002 else:
987 return rev - 1
1003 return rev - 1
988
1004
989 def revdiff(self, rev1, rev2):
1005 def revdiff(self, rev1, rev2):
990 """return or calculate a delta between two revisions"""
1006 """return or calculate a delta between two revisions"""
991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1007 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
992 return str(self._chunk(rev2))
1008 return str(self._chunk(rev2))
993
1009
994 return mdiff.textdiff(self.revision(rev1),
1010 return mdiff.textdiff(self.revision(rev1),
995 self.revision(rev2))
1011 self.revision(rev2))
996
1012
997 def revision(self, nodeorrev):
1013 def revision(self, nodeorrev):
998 """return an uncompressed revision of a given node or revision
1014 """return an uncompressed revision of a given node or revision
999 number.
1015 number.
1000 """
1016 """
1001 if isinstance(nodeorrev, int):
1017 if isinstance(nodeorrev, int):
1002 rev = nodeorrev
1018 rev = nodeorrev
1003 node = self.node(rev)
1019 node = self.node(rev)
1004 else:
1020 else:
1005 node = nodeorrev
1021 node = nodeorrev
1006 rev = None
1022 rev = None
1007
1023
1008 _cache = self._cache # grab local copy of cache to avoid thread race
1024 _cache = self._cache # grab local copy of cache to avoid thread race
1009 cachedrev = None
1025 cachedrev = None
1010 if node == nullid:
1026 if node == nullid:
1011 return ""
1027 return ""
1012 if _cache:
1028 if _cache:
1013 if _cache[0] == node:
1029 if _cache[0] == node:
1014 return _cache[2]
1030 return _cache[2]
1015 cachedrev = _cache[1]
1031 cachedrev = _cache[1]
1016
1032
1017 # look up what we need to read
1033 # look up what we need to read
1018 text = None
1034 text = None
1019 if rev is None:
1035 if rev is None:
1020 rev = self.rev(node)
1036 rev = self.rev(node)
1021
1037
1022 # check rev flags
1038 # check rev flags
1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1039 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1024 raise RevlogError(_('incompatible revision flag %x') %
1040 raise RevlogError(_('incompatible revision flag %x') %
1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1041 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1026
1042
1027 # build delta chain
1043 # build delta chain
1028 chain = []
1044 chain = []
1029 index = self.index # for performance
1045 index = self.index # for performance
1030 generaldelta = self._generaldelta
1046 generaldelta = self._generaldelta
1031 iterrev = rev
1047 iterrev = rev
1032 e = index[iterrev]
1048 e = index[iterrev]
1033 while iterrev != e[3] and iterrev != cachedrev:
1049 while iterrev != e[3] and iterrev != cachedrev:
1034 chain.append(iterrev)
1050 chain.append(iterrev)
1035 if generaldelta:
1051 if generaldelta:
1036 iterrev = e[3]
1052 iterrev = e[3]
1037 else:
1053 else:
1038 iterrev -= 1
1054 iterrev -= 1
1039 e = index[iterrev]
1055 e = index[iterrev]
1040
1056
1041 if iterrev == cachedrev:
1057 if iterrev == cachedrev:
1042 # cache hit
1058 # cache hit
1043 text = _cache[2]
1059 text = _cache[2]
1044 else:
1060 else:
1045 chain.append(iterrev)
1061 chain.append(iterrev)
1046 chain.reverse()
1062 chain.reverse()
1047
1063
1048 # drop cache to save memory
1064 # drop cache to save memory
1049 self._cache = None
1065 self._cache = None
1050
1066
1051 bins = self._chunks(chain)
1067 bins = self._chunks(chain)
1052 if text is None:
1068 if text is None:
1053 text = str(bins[0])
1069 text = str(bins[0])
1054 bins = bins[1:]
1070 bins = bins[1:]
1055
1071
1056 text = mdiff.patches(text, bins)
1072 text = mdiff.patches(text, bins)
1057
1073
1058 text = self._checkhash(text, node, rev)
1074 text = self._checkhash(text, node, rev)
1059
1075
1060 self._cache = (node, rev, text)
1076 self._cache = (node, rev, text)
1061 return text
1077 return text
1062
1078
1063 def hash(self, text, p1, p2):
1079 def hash(self, text, p1, p2):
1064 """Compute a node hash.
1080 """Compute a node hash.
1065
1081
1066 Available as a function so that subclasses can replace the hash
1082 Available as a function so that subclasses can replace the hash
1067 as needed.
1083 as needed.
1068 """
1084 """
1069 return hash(text, p1, p2)
1085 return hash(text, p1, p2)
1070
1086
1071 def _checkhash(self, text, node, rev):
1087 def _checkhash(self, text, node, rev):
1072 p1, p2 = self.parents(node)
1088 p1, p2 = self.parents(node)
1073 self.checkhash(text, p1, p2, node, rev)
1089 self.checkhash(text, p1, p2, node, rev)
1074 return text
1090 return text
1075
1091
1076 def checkhash(self, text, p1, p2, node, rev=None):
1092 def checkhash(self, text, p1, p2, node, rev=None):
1077 if node != self.hash(text, p1, p2):
1093 if node != self.hash(text, p1, p2):
1078 revornode = rev
1094 revornode = rev
1079 if revornode is None:
1095 if revornode is None:
1080 revornode = templatefilters.short(hex(node))
1096 revornode = templatefilters.short(hex(node))
1081 raise RevlogError(_("integrity check failed on %s:%s")
1097 raise RevlogError(_("integrity check failed on %s:%s")
1082 % (self.indexfile, revornode))
1098 % (self.indexfile, revornode))
1083
1099
1084 def checkinlinesize(self, tr, fp=None):
1100 def checkinlinesize(self, tr, fp=None):
1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1101 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1086 return
1102 return
1087
1103
1088 trinfo = tr.find(self.indexfile)
1104 trinfo = tr.find(self.indexfile)
1089 if trinfo is None:
1105 if trinfo is None:
1090 raise RevlogError(_("%s not found in the transaction")
1106 raise RevlogError(_("%s not found in the transaction")
1091 % self.indexfile)
1107 % self.indexfile)
1092
1108
1093 trindex = trinfo[2]
1109 trindex = trinfo[2]
1094 dataoff = self.start(trindex)
1110 dataoff = self.start(trindex)
1095
1111
1096 tr.add(self.datafile, dataoff)
1112 tr.add(self.datafile, dataoff)
1097
1113
1098 if fp:
1114 if fp:
1099 fp.flush()
1115 fp.flush()
1100 fp.close()
1116 fp.close()
1101
1117
1102 df = self.opener(self.datafile, 'w')
1118 df = self.opener(self.datafile, 'w')
1103 try:
1119 try:
1104 for r in self:
1120 for r in self:
1105 df.write(self._chunkraw(r, r))
1121 df.write(self._chunkraw(r, r))
1106 finally:
1122 finally:
1107 df.close()
1123 df.close()
1108
1124
1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1125 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1110 self.version &= ~(REVLOGNGINLINEDATA)
1126 self.version &= ~(REVLOGNGINLINEDATA)
1111 self._inline = False
1127 self._inline = False
1112 for i in self:
1128 for i in self:
1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1129 e = self._io.packentry(self.index[i], self.node, self.version, i)
1114 fp.write(e)
1130 fp.write(e)
1115
1131
1116 # if we don't call close, the temp file will never replace the
1132 # if we don't call close, the temp file will never replace the
1117 # real index
1133 # real index
1118 fp.close()
1134 fp.close()
1119
1135
1120 tr.replace(self.indexfile, trindex * self._io.size)
1136 tr.replace(self.indexfile, trindex * self._io.size)
1121 self._chunkclear()
1137 self._chunkclear()
1122
1138
1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1139 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1124 node=None):
1140 node=None):
1125 """add a revision to the log
1141 """add a revision to the log
1126
1142
1127 text - the revision data to add
1143 text - the revision data to add
1128 transaction - the transaction object used for rollback
1144 transaction - the transaction object used for rollback
1129 link - the linkrev data to add
1145 link - the linkrev data to add
1130 p1, p2 - the parent nodeids of the revision
1146 p1, p2 - the parent nodeids of the revision
1131 cachedelta - an optional precomputed delta
1147 cachedelta - an optional precomputed delta
1132 node - nodeid of revision; typically node is not specified, and it is
1148 node - nodeid of revision; typically node is not specified, and it is
1133 computed by default as hash(text, p1, p2), however subclasses might
1149 computed by default as hash(text, p1, p2), however subclasses might
1134 use different hashing method (and override checkhash() in such case)
1150 use different hashing method (and override checkhash() in such case)
1135 """
1151 """
1136 if link == nullrev:
1152 if link == nullrev:
1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1153 raise RevlogError(_("attempted to add linkrev -1 to %s")
1138 % self.indexfile)
1154 % self.indexfile)
1139 node = node or self.hash(text, p1, p2)
1155 node = node or self.hash(text, p1, p2)
1140 if node in self.nodemap:
1156 if node in self.nodemap:
1141 return node
1157 return node
1142
1158
1143 dfh = None
1159 dfh = None
1144 if not self._inline:
1160 if not self._inline:
1145 dfh = self.opener(self.datafile, "a")
1161 dfh = self.opener(self.datafile, "a")
1146 ifh = self.opener(self.indexfile, "a+")
1162 ifh = self.opener(self.indexfile, "a+")
1147 try:
1163 try:
1148 return self._addrevision(node, text, transaction, link, p1, p2,
1164 return self._addrevision(node, text, transaction, link, p1, p2,
1149 cachedelta, ifh, dfh)
1165 cachedelta, ifh, dfh)
1150 finally:
1166 finally:
1151 if dfh:
1167 if dfh:
1152 dfh.close()
1168 dfh.close()
1153 ifh.close()
1169 ifh.close()
1154
1170
1155 def compress(self, text):
1171 def compress(self, text):
1156 """ generate a possibly-compressed representation of text """
1172 """ generate a possibly-compressed representation of text """
1157 if not text:
1173 if not text:
1158 return ("", text)
1174 return ("", text)
1159 l = len(text)
1175 l = len(text)
1160 bin = None
1176 bin = None
1161 if l < 44:
1177 if l < 44:
1162 pass
1178 pass
1163 elif l > 1000000:
1179 elif l > 1000000:
1164 # zlib makes an internal copy, thus doubling memory usage for
1180 # zlib makes an internal copy, thus doubling memory usage for
1165 # large files, so lets do this in pieces
1181 # large files, so lets do this in pieces
1166 z = zlib.compressobj()
1182 z = zlib.compressobj()
1167 p = []
1183 p = []
1168 pos = 0
1184 pos = 0
1169 while pos < l:
1185 while pos < l:
1170 pos2 = pos + 2**20
1186 pos2 = pos + 2**20
1171 p.append(z.compress(text[pos:pos2]))
1187 p.append(z.compress(text[pos:pos2]))
1172 pos = pos2
1188 pos = pos2
1173 p.append(z.flush())
1189 p.append(z.flush())
1174 if sum(map(len, p)) < l:
1190 if sum(map(len, p)) < l:
1175 bin = "".join(p)
1191 bin = "".join(p)
1176 else:
1192 else:
1177 bin = _compress(text)
1193 bin = _compress(text)
1178 if bin is None or len(bin) > l:
1194 if bin is None or len(bin) > l:
1179 if text[0] == '\0':
1195 if text[0] == '\0':
1180 return ("", text)
1196 return ("", text)
1181 return ('u', text)
1197 return ('u', text)
1182 return ("", bin)
1198 return ("", bin)
1183
1199
1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1200 def _addrevision(self, node, text, transaction, link, p1, p2,
1185 cachedelta, ifh, dfh):
1201 cachedelta, ifh, dfh):
1186 """internal function to add revisions to the log
1202 """internal function to add revisions to the log
1187
1203
1188 see addrevision for argument descriptions.
1204 see addrevision for argument descriptions.
1189 invariants:
1205 invariants:
1190 - text is optional (can be None); if not set, cachedelta must be set.
1206 - text is optional (can be None); if not set, cachedelta must be set.
1191 if both are set, they must correspond to each other.
1207 if both are set, they must correspond to each other.
1192 """
1208 """
1193 btext = [text]
1209 btext = [text]
1194 def buildtext():
1210 def buildtext():
1195 if btext[0] is not None:
1211 if btext[0] is not None:
1196 return btext[0]
1212 return btext[0]
1197 # flush any pending writes here so we can read it in revision
1213 # flush any pending writes here so we can read it in revision
1198 if dfh:
1214 if dfh:
1199 dfh.flush()
1215 dfh.flush()
1200 ifh.flush()
1216 ifh.flush()
1201 basetext = self.revision(self.node(cachedelta[0]))
1217 basetext = self.revision(self.node(cachedelta[0]))
1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1218 btext[0] = mdiff.patch(basetext, cachedelta[1])
1203 try:
1219 try:
1204 self.checkhash(btext[0], p1, p2, node)
1220 self.checkhash(btext[0], p1, p2, node)
1205 except CensoredNodeError:
1221 except CensoredNodeError:
1206 pass # always import a censor tombstone.
1222 pass # always import a censor tombstone.
1207 return btext[0]
1223 return btext[0]
1208
1224
1209 def builddelta(rev):
1225 def builddelta(rev):
1210 # can we use the cached delta?
1226 # can we use the cached delta?
1211 if cachedelta and cachedelta[0] == rev:
1227 if cachedelta and cachedelta[0] == rev:
1212 delta = cachedelta[1]
1228 delta = cachedelta[1]
1213 else:
1229 else:
1214 t = buildtext()
1230 t = buildtext()
1215 ptext = self.revision(self.node(rev))
1231 ptext = self.revision(self.node(rev))
1216 delta = mdiff.textdiff(ptext, t)
1232 delta = mdiff.textdiff(ptext, t)
1217 data = self.compress(delta)
1233 data = self.compress(delta)
1218 l = len(data[1]) + len(data[0])
1234 l = len(data[1]) + len(data[0])
1219 if basecache[0] == rev:
1235 if basecache[0] == rev:
1220 chainbase = basecache[1]
1236 chainbase = basecache[1]
1221 else:
1237 else:
1222 chainbase = self.chainbase(rev)
1238 chainbase = self.chainbase(rev)
1223 dist = l + offset - self.start(chainbase)
1239 dist = l + offset - self.start(chainbase)
1224 if self._generaldelta:
1240 if self._generaldelta:
1225 base = rev
1241 base = rev
1226 else:
1242 else:
1227 base = chainbase
1243 base = chainbase
1228 chainlen, compresseddeltalen = self._chaininfo(rev)
1244 chainlen, compresseddeltalen = self._chaininfo(rev)
1229 chainlen += 1
1245 chainlen += 1
1230 compresseddeltalen += l
1246 compresseddeltalen += l
1231 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1247 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1232
1248
1233 curr = len(self)
1249 curr = len(self)
1234 prev = curr - 1
1250 prev = curr - 1
1235 base = chainbase = curr
1251 base = chainbase = curr
1236 chainlen = None
1252 chainlen = None
1237 offset = self.end(prev)
1253 offset = self.end(prev)
1238 flags = 0
1254 flags = 0
1239 d = None
1255 d = None
1240 if self._basecache is None:
1256 if self._basecache is None:
1241 self._basecache = (prev, self.chainbase(prev))
1257 self._basecache = (prev, self.chainbase(prev))
1242 basecache = self._basecache
1258 basecache = self._basecache
1243 p1r, p2r = self.rev(p1), self.rev(p2)
1259 p1r, p2r = self.rev(p1), self.rev(p2)
1244
1260
1245 # should we try to build a delta?
1261 # should we try to build a delta?
1246 if prev != nullrev:
1262 if prev != nullrev:
1247 if self._generaldelta:
1263 if self._generaldelta:
1248 if p1r >= basecache[1]:
1264 if p1r >= basecache[1]:
1249 d = builddelta(p1r)
1265 d = builddelta(p1r)
1250 elif p2r >= basecache[1]:
1266 elif p2r >= basecache[1]:
1251 d = builddelta(p2r)
1267 d = builddelta(p2r)
1252 else:
1268 else:
1253 d = builddelta(prev)
1269 d = builddelta(prev)
1254 else:
1270 else:
1255 d = builddelta(prev)
1271 d = builddelta(prev)
1256 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1272 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1257
1273
1258 # full versions are inserted when the needed deltas
1274 # full versions are inserted when the needed deltas
1259 # become comparable to the uncompressed text
1275 # become comparable to the uncompressed text
1260 if text is None:
1276 if text is None:
1261 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1277 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1262 cachedelta[1])
1278 cachedelta[1])
1263 else:
1279 else:
1264 textlen = len(text)
1280 textlen = len(text)
1265
1281
1266 # - 'dist' is the distance from the base revision -- bounding it limits
1282 # - 'dist' is the distance from the base revision -- bounding it limits
1267 # the amount of I/O we need to do.
1283 # the amount of I/O we need to do.
1268 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1284 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1269 # to apply -- bounding it limits the amount of CPU we consume.
1285 # to apply -- bounding it limits the amount of CPU we consume.
1270 if (d is None or dist > textlen * 4 or l > textlen or
1286 if (d is None or dist > textlen * 4 or l > textlen or
1271 compresseddeltalen > textlen * 2 or
1287 compresseddeltalen > textlen * 2 or
1272 (self._maxchainlen and chainlen > self._maxchainlen)):
1288 (self._maxchainlen and chainlen > self._maxchainlen)):
1273 text = buildtext()
1289 text = buildtext()
1274 data = self.compress(text)
1290 data = self.compress(text)
1275 l = len(data[1]) + len(data[0])
1291 l = len(data[1]) + len(data[0])
1276 base = chainbase = curr
1292 base = chainbase = curr
1277
1293
1278 e = (offset_type(offset, flags), l, textlen,
1294 e = (offset_type(offset, flags), l, textlen,
1279 base, link, p1r, p2r, node)
1295 base, link, p1r, p2r, node)
1280 self.index.insert(-1, e)
1296 self.index.insert(-1, e)
1281 self.nodemap[node] = curr
1297 self.nodemap[node] = curr
1282
1298
1283 entry = self._io.packentry(e, self.node, self.version, curr)
1299 entry = self._io.packentry(e, self.node, self.version, curr)
1284 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1300 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1285
1301
1286 if type(text) == str: # only accept immutable objects
1302 if type(text) == str: # only accept immutable objects
1287 self._cache = (node, curr, text)
1303 self._cache = (node, curr, text)
1288 self._basecache = (curr, chainbase)
1304 self._basecache = (curr, chainbase)
1289 return node
1305 return node
1290
1306
1291 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1307 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1292 curr = len(self) - 1
1308 curr = len(self) - 1
1293 if not self._inline:
1309 if not self._inline:
1294 transaction.add(self.datafile, offset)
1310 transaction.add(self.datafile, offset)
1295 transaction.add(self.indexfile, curr * len(entry))
1311 transaction.add(self.indexfile, curr * len(entry))
1296 if data[0]:
1312 if data[0]:
1297 dfh.write(data[0])
1313 dfh.write(data[0])
1298 dfh.write(data[1])
1314 dfh.write(data[1])
1299 dfh.flush()
1315 dfh.flush()
1300 ifh.write(entry)
1316 ifh.write(entry)
1301 else:
1317 else:
1302 offset += curr * self._io.size
1318 offset += curr * self._io.size
1303 transaction.add(self.indexfile, offset, curr)
1319 transaction.add(self.indexfile, offset, curr)
1304 ifh.write(entry)
1320 ifh.write(entry)
1305 ifh.write(data[0])
1321 ifh.write(data[0])
1306 ifh.write(data[1])
1322 ifh.write(data[1])
1307 self.checkinlinesize(transaction, ifh)
1323 self.checkinlinesize(transaction, ifh)
1308
1324
1309 def addgroup(self, bundle, linkmapper, transaction):
1325 def addgroup(self, bundle, linkmapper, transaction):
1310 """
1326 """
1311 add a delta group
1327 add a delta group
1312
1328
1313 given a set of deltas, add them to the revision log. the
1329 given a set of deltas, add them to the revision log. the
1314 first delta is against its parent, which should be in our
1330 first delta is against its parent, which should be in our
1315 log, the rest are against the previous delta.
1331 log, the rest are against the previous delta.
1316 """
1332 """
1317
1333
1318 # track the base of the current delta log
1334 # track the base of the current delta log
1319 content = []
1335 content = []
1320 node = None
1336 node = None
1321
1337
1322 r = len(self)
1338 r = len(self)
1323 end = 0
1339 end = 0
1324 if r:
1340 if r:
1325 end = self.end(r - 1)
1341 end = self.end(r - 1)
1326 ifh = self.opener(self.indexfile, "a+")
1342 ifh = self.opener(self.indexfile, "a+")
1327 isize = r * self._io.size
1343 isize = r * self._io.size
1328 if self._inline:
1344 if self._inline:
1329 transaction.add(self.indexfile, end + isize, r)
1345 transaction.add(self.indexfile, end + isize, r)
1330 dfh = None
1346 dfh = None
1331 else:
1347 else:
1332 transaction.add(self.indexfile, isize, r)
1348 transaction.add(self.indexfile, isize, r)
1333 transaction.add(self.datafile, end)
1349 transaction.add(self.datafile, end)
1334 dfh = self.opener(self.datafile, "a")
1350 dfh = self.opener(self.datafile, "a")
1335
1351
1336 try:
1352 try:
1337 # loop through our set of deltas
1353 # loop through our set of deltas
1338 chain = None
1354 chain = None
1339 while True:
1355 while True:
1340 chunkdata = bundle.deltachunk(chain)
1356 chunkdata = bundle.deltachunk(chain)
1341 if not chunkdata:
1357 if not chunkdata:
1342 break
1358 break
1343 node = chunkdata['node']
1359 node = chunkdata['node']
1344 p1 = chunkdata['p1']
1360 p1 = chunkdata['p1']
1345 p2 = chunkdata['p2']
1361 p2 = chunkdata['p2']
1346 cs = chunkdata['cs']
1362 cs = chunkdata['cs']
1347 deltabase = chunkdata['deltabase']
1363 deltabase = chunkdata['deltabase']
1348 delta = chunkdata['delta']
1364 delta = chunkdata['delta']
1349
1365
1350 content.append(node)
1366 content.append(node)
1351
1367
1352 link = linkmapper(cs)
1368 link = linkmapper(cs)
1353 if node in self.nodemap:
1369 if node in self.nodemap:
1354 # this can happen if two branches make the same change
1370 # this can happen if two branches make the same change
1355 chain = node
1371 chain = node
1356 continue
1372 continue
1357
1373
1358 for p in (p1, p2):
1374 for p in (p1, p2):
1359 if p not in self.nodemap:
1375 if p not in self.nodemap:
1360 raise LookupError(p, self.indexfile,
1376 raise LookupError(p, self.indexfile,
1361 _('unknown parent'))
1377 _('unknown parent'))
1362
1378
1363 if deltabase not in self.nodemap:
1379 if deltabase not in self.nodemap:
1364 raise LookupError(deltabase, self.indexfile,
1380 raise LookupError(deltabase, self.indexfile,
1365 _('unknown delta base'))
1381 _('unknown delta base'))
1366
1382
1367 baserev = self.rev(deltabase)
1383 baserev = self.rev(deltabase)
1368 chain = self._addrevision(node, None, transaction, link,
1384 chain = self._addrevision(node, None, transaction, link,
1369 p1, p2, (baserev, delta), ifh, dfh)
1385 p1, p2, (baserev, delta), ifh, dfh)
1370 if not dfh and not self._inline:
1386 if not dfh and not self._inline:
1371 # addrevision switched from inline to conventional
1387 # addrevision switched from inline to conventional
1372 # reopen the index
1388 # reopen the index
1373 ifh.close()
1389 ifh.close()
1374 dfh = self.opener(self.datafile, "a")
1390 dfh = self.opener(self.datafile, "a")
1375 ifh = self.opener(self.indexfile, "a")
1391 ifh = self.opener(self.indexfile, "a")
1376 finally:
1392 finally:
1377 if dfh:
1393 if dfh:
1378 dfh.close()
1394 dfh.close()
1379 ifh.close()
1395 ifh.close()
1380
1396
1381 return content
1397 return content
1382
1398
1383 def getstrippoint(self, minlink):
1399 def getstrippoint(self, minlink):
1384 """find the minimum rev that must be stripped to strip the linkrev
1400 """find the minimum rev that must be stripped to strip the linkrev
1385
1401
1386 Returns a tuple containing the minimum rev and a set of all revs that
1402 Returns a tuple containing the minimum rev and a set of all revs that
1387 have linkrevs that will be broken by this strip.
1403 have linkrevs that will be broken by this strip.
1388 """
1404 """
1389 brokenrevs = set()
1405 brokenrevs = set()
1390 strippoint = len(self)
1406 strippoint = len(self)
1391
1407
1392 heads = {}
1408 heads = {}
1393 futurelargelinkrevs = set()
1409 futurelargelinkrevs = set()
1394 for head in self.headrevs():
1410 for head in self.headrevs():
1395 headlinkrev = self.linkrev(head)
1411 headlinkrev = self.linkrev(head)
1396 heads[head] = headlinkrev
1412 heads[head] = headlinkrev
1397 if headlinkrev >= minlink:
1413 if headlinkrev >= minlink:
1398 futurelargelinkrevs.add(headlinkrev)
1414 futurelargelinkrevs.add(headlinkrev)
1399
1415
1400 # This algorithm involves walking down the rev graph, starting at the
1416 # This algorithm involves walking down the rev graph, starting at the
1401 # heads. Since the revs are topologically sorted according to linkrev,
1417 # heads. Since the revs are topologically sorted according to linkrev,
1402 # once all head linkrevs are below the minlink, we know there are
1418 # once all head linkrevs are below the minlink, we know there are
1403 # no more revs that could have a linkrev greater than minlink.
1419 # no more revs that could have a linkrev greater than minlink.
1404 # So we can stop walking.
1420 # So we can stop walking.
1405 while futurelargelinkrevs:
1421 while futurelargelinkrevs:
1406 strippoint -= 1
1422 strippoint -= 1
1407 linkrev = heads.pop(strippoint)
1423 linkrev = heads.pop(strippoint)
1408
1424
1409 if linkrev < minlink:
1425 if linkrev < minlink:
1410 brokenrevs.add(strippoint)
1426 brokenrevs.add(strippoint)
1411 else:
1427 else:
1412 futurelargelinkrevs.remove(linkrev)
1428 futurelargelinkrevs.remove(linkrev)
1413
1429
1414 for p in self.parentrevs(strippoint):
1430 for p in self.parentrevs(strippoint):
1415 if p != nullrev:
1431 if p != nullrev:
1416 plinkrev = self.linkrev(p)
1432 plinkrev = self.linkrev(p)
1417 heads[p] = plinkrev
1433 heads[p] = plinkrev
1418 if plinkrev >= minlink:
1434 if plinkrev >= minlink:
1419 futurelargelinkrevs.add(plinkrev)
1435 futurelargelinkrevs.add(plinkrev)
1420
1436
1421 return strippoint, brokenrevs
1437 return strippoint, brokenrevs
1422
1438
1423 def strip(self, minlink, transaction):
1439 def strip(self, minlink, transaction):
1424 """truncate the revlog on the first revision with a linkrev >= minlink
1440 """truncate the revlog on the first revision with a linkrev >= minlink
1425
1441
1426 This function is called when we're stripping revision minlink and
1442 This function is called when we're stripping revision minlink and
1427 its descendants from the repository.
1443 its descendants from the repository.
1428
1444
1429 We have to remove all revisions with linkrev >= minlink, because
1445 We have to remove all revisions with linkrev >= minlink, because
1430 the equivalent changelog revisions will be renumbered after the
1446 the equivalent changelog revisions will be renumbered after the
1431 strip.
1447 strip.
1432
1448
1433 So we truncate the revlog on the first of these revisions, and
1449 So we truncate the revlog on the first of these revisions, and
1434 trust that the caller has saved the revisions that shouldn't be
1450 trust that the caller has saved the revisions that shouldn't be
1435 removed and that it'll re-add them after this truncation.
1451 removed and that it'll re-add them after this truncation.
1436 """
1452 """
1437 if len(self) == 0:
1453 if len(self) == 0:
1438 return
1454 return
1439
1455
1440 rev, _ = self.getstrippoint(minlink)
1456 rev, _ = self.getstrippoint(minlink)
1441 if rev == len(self):
1457 if rev == len(self):
1442 return
1458 return
1443
1459
1444 # first truncate the files on disk
1460 # first truncate the files on disk
1445 end = self.start(rev)
1461 end = self.start(rev)
1446 if not self._inline:
1462 if not self._inline:
1447 transaction.add(self.datafile, end)
1463 transaction.add(self.datafile, end)
1448 end = rev * self._io.size
1464 end = rev * self._io.size
1449 else:
1465 else:
1450 end += rev * self._io.size
1466 end += rev * self._io.size
1451
1467
1452 transaction.add(self.indexfile, end)
1468 transaction.add(self.indexfile, end)
1453
1469
1454 # then reset internal state in memory to forget those revisions
1470 # then reset internal state in memory to forget those revisions
1455 self._cache = None
1471 self._cache = None
1472 self._chaininfocache = {}
1456 self._chunkclear()
1473 self._chunkclear()
1457 for x in xrange(rev, len(self)):
1474 for x in xrange(rev, len(self)):
1458 del self.nodemap[self.node(x)]
1475 del self.nodemap[self.node(x)]
1459
1476
1460 del self.index[rev:-1]
1477 del self.index[rev:-1]
1461
1478
1462 def checksize(self):
1479 def checksize(self):
1463 expected = 0
1480 expected = 0
1464 if len(self):
1481 if len(self):
1465 expected = max(0, self.end(len(self) - 1))
1482 expected = max(0, self.end(len(self) - 1))
1466
1483
1467 try:
1484 try:
1468 f = self.opener(self.datafile)
1485 f = self.opener(self.datafile)
1469 f.seek(0, 2)
1486 f.seek(0, 2)
1470 actual = f.tell()
1487 actual = f.tell()
1471 f.close()
1488 f.close()
1472 dd = actual - expected
1489 dd = actual - expected
1473 except IOError, inst:
1490 except IOError, inst:
1474 if inst.errno != errno.ENOENT:
1491 if inst.errno != errno.ENOENT:
1475 raise
1492 raise
1476 dd = 0
1493 dd = 0
1477
1494
1478 try:
1495 try:
1479 f = self.opener(self.indexfile)
1496 f = self.opener(self.indexfile)
1480 f.seek(0, 2)
1497 f.seek(0, 2)
1481 actual = f.tell()
1498 actual = f.tell()
1482 f.close()
1499 f.close()
1483 s = self._io.size
1500 s = self._io.size
1484 i = max(0, actual // s)
1501 i = max(0, actual // s)
1485 di = actual - (i * s)
1502 di = actual - (i * s)
1486 if self._inline:
1503 if self._inline:
1487 databytes = 0
1504 databytes = 0
1488 for r in self:
1505 for r in self:
1489 databytes += max(0, self.length(r))
1506 databytes += max(0, self.length(r))
1490 dd = 0
1507 dd = 0
1491 di = actual - len(self) * s - databytes
1508 di = actual - len(self) * s - databytes
1492 except IOError, inst:
1509 except IOError, inst:
1493 if inst.errno != errno.ENOENT:
1510 if inst.errno != errno.ENOENT:
1494 raise
1511 raise
1495 di = 0
1512 di = 0
1496
1513
1497 return (dd, di)
1514 return (dd, di)
1498
1515
1499 def files(self):
1516 def files(self):
1500 res = [self.indexfile]
1517 res = [self.indexfile]
1501 if not self._inline:
1518 if not self._inline:
1502 res.append(self.datafile)
1519 res.append(self.datafile)
1503 return res
1520 return res
General Comments 0
You need to be logged in to leave comments. Login now