##// END OF EJS Templates
revlog: compute length of compressed deltas along with chain length...
Siddharth Agarwal -
r23286:40e00678 default
parent child Browse files
Show More
@@ -1,1489 +1,1495 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, templatefilters
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45 CensoredNodeError = error.CensoredNodeError
45 CensoredNodeError = error.CensoredNodeError
46
46
47 def getoffset(q):
47 def getoffset(q):
48 return int(q >> 16)
48 return int(q >> 16)
49
49
50 def gettype(q):
50 def gettype(q):
51 return int(q & 0xFFFF)
51 return int(q & 0xFFFF)
52
52
53 def offset_type(offset, type):
53 def offset_type(offset, type):
54 return long(long(offset) << 16 | type)
54 return long(long(offset) << 16 | type)
55
55
56 _nullhash = _sha(nullid)
56 _nullhash = _sha(nullid)
57
57
58 def hash(text, p1, p2):
58 def hash(text, p1, p2):
59 """generate a hash from the given text and its parent hashes
59 """generate a hash from the given text and its parent hashes
60
60
61 This hash combines both the current file contents and its history
61 This hash combines both the current file contents and its history
62 in a manner that makes it easy to distinguish nodes with the same
62 in a manner that makes it easy to distinguish nodes with the same
63 content in the revision graph.
63 content in the revision graph.
64 """
64 """
65 # As of now, if one of the parent node is null, p2 is null
65 # As of now, if one of the parent node is null, p2 is null
66 if p2 == nullid:
66 if p2 == nullid:
67 # deep copy of a hash is faster than creating one
67 # deep copy of a hash is faster than creating one
68 s = _nullhash.copy()
68 s = _nullhash.copy()
69 s.update(p1)
69 s.update(p1)
70 else:
70 else:
71 # none of the parent nodes are nullid
71 # none of the parent nodes are nullid
72 l = [p1, p2]
72 l = [p1, p2]
73 l.sort()
73 l.sort()
74 s = _sha(l[0])
74 s = _sha(l[0])
75 s.update(l[1])
75 s.update(l[1])
76 s.update(text)
76 s.update(text)
77 return s.digest()
77 return s.digest()
78
78
79 def decompress(bin):
79 def decompress(bin):
80 """ decompress the given input """
80 """ decompress the given input """
81 if not bin:
81 if not bin:
82 return bin
82 return bin
83 t = bin[0]
83 t = bin[0]
84 if t == '\0':
84 if t == '\0':
85 return bin
85 return bin
86 if t == 'x':
86 if t == 'x':
87 try:
87 try:
88 return _decompress(bin)
88 return _decompress(bin)
89 except zlib.error, e:
89 except zlib.error, e:
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 if t == 'u':
91 if t == 'u':
92 return bin[1:]
92 return bin[1:]
93 raise RevlogError(_("unknown compression type %r") % t)
93 raise RevlogError(_("unknown compression type %r") % t)
94
94
95 # index v0:
95 # index v0:
96 # 4 bytes: offset
96 # 4 bytes: offset
97 # 4 bytes: compressed length
97 # 4 bytes: compressed length
98 # 4 bytes: base rev
98 # 4 bytes: base rev
99 # 4 bytes: link rev
99 # 4 bytes: link rev
100 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 1 nodeid
101 # 32 bytes: parent 2 nodeid
101 # 32 bytes: parent 2 nodeid
102 # 32 bytes: nodeid
102 # 32 bytes: nodeid
103 indexformatv0 = ">4l20s20s20s"
103 indexformatv0 = ">4l20s20s20s"
104 v0shaoffset = 56
104 v0shaoffset = 56
105
105
106 class revlogoldio(object):
106 class revlogoldio(object):
107 def __init__(self):
107 def __init__(self):
108 self.size = struct.calcsize(indexformatv0)
108 self.size = struct.calcsize(indexformatv0)
109
109
110 def parseindex(self, data, inline):
110 def parseindex(self, data, inline):
111 s = self.size
111 s = self.size
112 index = []
112 index = []
113 nodemap = {nullid: nullrev}
113 nodemap = {nullid: nullrev}
114 n = off = 0
114 n = off = 0
115 l = len(data)
115 l = len(data)
116 while off + s <= l:
116 while off + s <= l:
117 cur = data[off:off + s]
117 cur = data[off:off + s]
118 off += s
118 off += s
119 e = _unpack(indexformatv0, cur)
119 e = _unpack(indexformatv0, cur)
120 # transform to revlogv1 format
120 # transform to revlogv1 format
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 index.append(e2)
123 index.append(e2)
124 nodemap[e[6]] = n
124 nodemap[e[6]] = n
125 n += 1
125 n += 1
126
126
127 # add the magic null revision at -1
127 # add the magic null revision at -1
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129
129
130 return index, nodemap, None
130 return index, nodemap, None
131
131
132 def packentry(self, entry, node, version, rev):
132 def packentry(self, entry, node, version, rev):
133 if gettype(entry[0]):
133 if gettype(entry[0]):
134 raise RevlogError(_("index entry flags need RevlogNG"))
134 raise RevlogError(_("index entry flags need RevlogNG"))
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 node(entry[5]), node(entry[6]), entry[7])
136 node(entry[5]), node(entry[6]), entry[7])
137 return _pack(indexformatv0, *e2)
137 return _pack(indexformatv0, *e2)
138
138
139 # index ng:
139 # index ng:
140 # 6 bytes: offset
140 # 6 bytes: offset
141 # 2 bytes: flags
141 # 2 bytes: flags
142 # 4 bytes: compressed length
142 # 4 bytes: compressed length
143 # 4 bytes: uncompressed length
143 # 4 bytes: uncompressed length
144 # 4 bytes: base rev
144 # 4 bytes: base rev
145 # 4 bytes: link rev
145 # 4 bytes: link rev
146 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 1 rev
147 # 4 bytes: parent 2 rev
147 # 4 bytes: parent 2 rev
148 # 32 bytes: nodeid
148 # 32 bytes: nodeid
149 indexformatng = ">Qiiiiii20s12x"
149 indexformatng = ">Qiiiiii20s12x"
150 ngshaoffset = 32
150 ngshaoffset = 32
151 versionformat = ">I"
151 versionformat = ">I"
152
152
153 class revlogio(object):
153 class revlogio(object):
154 def __init__(self):
154 def __init__(self):
155 self.size = struct.calcsize(indexformatng)
155 self.size = struct.calcsize(indexformatng)
156
156
157 def parseindex(self, data, inline):
157 def parseindex(self, data, inline):
158 # call the C implementation to parse the index data
158 # call the C implementation to parse the index data
159 index, cache = parsers.parse_index2(data, inline)
159 index, cache = parsers.parse_index2(data, inline)
160 return index, getattr(index, 'nodemap', None), cache
160 return index, getattr(index, 'nodemap', None), cache
161
161
162 def packentry(self, entry, node, version, rev):
162 def packentry(self, entry, node, version, rev):
163 p = _pack(indexformatng, *entry)
163 p = _pack(indexformatng, *entry)
164 if rev == 0:
164 if rev == 0:
165 p = _pack(versionformat, version) + p[4:]
165 p = _pack(versionformat, version) + p[4:]
166 return p
166 return p
167
167
168 class revlog(object):
168 class revlog(object):
169 """
169 """
170 the underlying revision storage object
170 the underlying revision storage object
171
171
172 A revlog consists of two parts, an index and the revision data.
172 A revlog consists of two parts, an index and the revision data.
173
173
174 The index is a file with a fixed record size containing
174 The index is a file with a fixed record size containing
175 information on each revision, including its nodeid (hash), the
175 information on each revision, including its nodeid (hash), the
176 nodeids of its parents, the position and offset of its data within
176 nodeids of its parents, the position and offset of its data within
177 the data file, and the revision it's based on. Finally, each entry
177 the data file, and the revision it's based on. Finally, each entry
178 contains a linkrev entry that can serve as a pointer to external
178 contains a linkrev entry that can serve as a pointer to external
179 data.
179 data.
180
180
181 The revision data itself is a linear collection of data chunks.
181 The revision data itself is a linear collection of data chunks.
182 Each chunk represents a revision and is usually represented as a
182 Each chunk represents a revision and is usually represented as a
183 delta against the previous chunk. To bound lookup time, runs of
183 delta against the previous chunk. To bound lookup time, runs of
184 deltas are limited to about 2 times the length of the original
184 deltas are limited to about 2 times the length of the original
185 version data. This makes retrieval of a version proportional to
185 version data. This makes retrieval of a version proportional to
186 its size, or O(1) relative to the number of revisions.
186 its size, or O(1) relative to the number of revisions.
187
187
188 Both pieces of the revlog are written to in an append-only
188 Both pieces of the revlog are written to in an append-only
189 fashion, which means we never need to rewrite a file to insert or
189 fashion, which means we never need to rewrite a file to insert or
190 remove data, and can use some simple techniques to avoid the need
190 remove data, and can use some simple techniques to avoid the need
191 for locking while reading.
191 for locking while reading.
192 """
192 """
193 def __init__(self, opener, indexfile):
193 def __init__(self, opener, indexfile):
194 """
194 """
195 create a revlog object
195 create a revlog object
196
196
197 opener is a function that abstracts the file opening operation
197 opener is a function that abstracts the file opening operation
198 and can be used to implement COW semantics or the like.
198 and can be used to implement COW semantics or the like.
199 """
199 """
200 self.indexfile = indexfile
200 self.indexfile = indexfile
201 self.datafile = indexfile[:-2] + ".d"
201 self.datafile = indexfile[:-2] + ".d"
202 self.opener = opener
202 self.opener = opener
203 self._cache = None
203 self._cache = None
204 self._basecache = None
204 self._basecache = None
205 self._chunkcache = (0, '')
205 self._chunkcache = (0, '')
206 self._chunkcachesize = 65536
206 self._chunkcachesize = 65536
207 self._maxchainlen = None
207 self._maxchainlen = None
208 self.index = []
208 self.index = []
209 self._pcache = {}
209 self._pcache = {}
210 self._nodecache = {nullid: nullrev}
210 self._nodecache = {nullid: nullrev}
211 self._nodepos = None
211 self._nodepos = None
212
212
213 v = REVLOG_DEFAULT_VERSION
213 v = REVLOG_DEFAULT_VERSION
214 opts = getattr(opener, 'options', None)
214 opts = getattr(opener, 'options', None)
215 if opts is not None:
215 if opts is not None:
216 if 'revlogv1' in opts:
216 if 'revlogv1' in opts:
217 if 'generaldelta' in opts:
217 if 'generaldelta' in opts:
218 v |= REVLOGGENERALDELTA
218 v |= REVLOGGENERALDELTA
219 else:
219 else:
220 v = 0
220 v = 0
221 if 'chunkcachesize' in opts:
221 if 'chunkcachesize' in opts:
222 self._chunkcachesize = opts['chunkcachesize']
222 self._chunkcachesize = opts['chunkcachesize']
223 if 'maxchainlen' in opts:
223 if 'maxchainlen' in opts:
224 self._maxchainlen = opts['maxchainlen']
224 self._maxchainlen = opts['maxchainlen']
225
225
226 if self._chunkcachesize <= 0:
226 if self._chunkcachesize <= 0:
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
227 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 'than 0') % self._chunkcachesize)
228 'than 0') % self._chunkcachesize)
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
229 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
230 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 'of 2') % self._chunkcachesize)
231 'of 2') % self._chunkcachesize)
232
232
233 i = ''
233 i = ''
234 self._initempty = True
234 self._initempty = True
235 try:
235 try:
236 f = self.opener(self.indexfile)
236 f = self.opener(self.indexfile)
237 i = f.read()
237 i = f.read()
238 f.close()
238 f.close()
239 if len(i) > 0:
239 if len(i) > 0:
240 v = struct.unpack(versionformat, i[:4])[0]
240 v = struct.unpack(versionformat, i[:4])[0]
241 self._initempty = False
241 self._initempty = False
242 except IOError, inst:
242 except IOError, inst:
243 if inst.errno != errno.ENOENT:
243 if inst.errno != errno.ENOENT:
244 raise
244 raise
245
245
246 self.version = v
246 self.version = v
247 self._inline = v & REVLOGNGINLINEDATA
247 self._inline = v & REVLOGNGINLINEDATA
248 self._generaldelta = v & REVLOGGENERALDELTA
248 self._generaldelta = v & REVLOGGENERALDELTA
249 flags = v & ~0xFFFF
249 flags = v & ~0xFFFF
250 fmt = v & 0xFFFF
250 fmt = v & 0xFFFF
251 if fmt == REVLOGV0 and flags:
251 if fmt == REVLOGV0 and flags:
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
252 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 % (self.indexfile, flags >> 16))
253 % (self.indexfile, flags >> 16))
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
254 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
255 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 % (self.indexfile, flags >> 16))
256 % (self.indexfile, flags >> 16))
257 elif fmt > REVLOGNG:
257 elif fmt > REVLOGNG:
258 raise RevlogError(_("index %s unknown format %d")
258 raise RevlogError(_("index %s unknown format %d")
259 % (self.indexfile, fmt))
259 % (self.indexfile, fmt))
260
260
261 self._io = revlogio()
261 self._io = revlogio()
262 if self.version == REVLOGV0:
262 if self.version == REVLOGV0:
263 self._io = revlogoldio()
263 self._io = revlogoldio()
264 try:
264 try:
265 d = self._io.parseindex(i, self._inline)
265 d = self._io.parseindex(i, self._inline)
266 except (ValueError, IndexError):
266 except (ValueError, IndexError):
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
267 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 self.index, nodemap, self._chunkcache = d
268 self.index, nodemap, self._chunkcache = d
269 if nodemap is not None:
269 if nodemap is not None:
270 self.nodemap = self._nodecache = nodemap
270 self.nodemap = self._nodecache = nodemap
271 if not self._chunkcache:
271 if not self._chunkcache:
272 self._chunkclear()
272 self._chunkclear()
273
273
274 def tip(self):
274 def tip(self):
275 return self.node(len(self.index) - 2)
275 return self.node(len(self.index) - 2)
276 def __len__(self):
276 def __len__(self):
277 return len(self.index) - 1
277 return len(self.index) - 1
278 def __iter__(self):
278 def __iter__(self):
279 return iter(xrange(len(self)))
279 return iter(xrange(len(self)))
280 def revs(self, start=0, stop=None):
280 def revs(self, start=0, stop=None):
281 """iterate over all rev in this revlog (from start to stop)"""
281 """iterate over all rev in this revlog (from start to stop)"""
282 step = 1
282 step = 1
283 if stop is not None:
283 if stop is not None:
284 if start > stop:
284 if start > stop:
285 step = -1
285 step = -1
286 stop += step
286 stop += step
287 else:
287 else:
288 stop = len(self)
288 stop = len(self)
289 return xrange(start, stop, step)
289 return xrange(start, stop, step)
290
290
291 @util.propertycache
291 @util.propertycache
292 def nodemap(self):
292 def nodemap(self):
293 self.rev(self.node(0))
293 self.rev(self.node(0))
294 return self._nodecache
294 return self._nodecache
295
295
296 def hasnode(self, node):
296 def hasnode(self, node):
297 try:
297 try:
298 self.rev(node)
298 self.rev(node)
299 return True
299 return True
300 except KeyError:
300 except KeyError:
301 return False
301 return False
302
302
303 def clearcaches(self):
303 def clearcaches(self):
304 try:
304 try:
305 self._nodecache.clearcaches()
305 self._nodecache.clearcaches()
306 except AttributeError:
306 except AttributeError:
307 self._nodecache = {nullid: nullrev}
307 self._nodecache = {nullid: nullrev}
308 self._nodepos = None
308 self._nodepos = None
309
309
310 def rev(self, node):
310 def rev(self, node):
311 try:
311 try:
312 return self._nodecache[node]
312 return self._nodecache[node]
313 except TypeError:
313 except TypeError:
314 raise
314 raise
315 except RevlogError:
315 except RevlogError:
316 # parsers.c radix tree lookup failed
316 # parsers.c radix tree lookup failed
317 raise LookupError(node, self.indexfile, _('no node'))
317 raise LookupError(node, self.indexfile, _('no node'))
318 except KeyError:
318 except KeyError:
319 # pure python cache lookup failed
319 # pure python cache lookup failed
320 n = self._nodecache
320 n = self._nodecache
321 i = self.index
321 i = self.index
322 p = self._nodepos
322 p = self._nodepos
323 if p is None:
323 if p is None:
324 p = len(i) - 2
324 p = len(i) - 2
325 for r in xrange(p, -1, -1):
325 for r in xrange(p, -1, -1):
326 v = i[r][7]
326 v = i[r][7]
327 n[v] = r
327 n[v] = r
328 if v == node:
328 if v == node:
329 self._nodepos = r - 1
329 self._nodepos = r - 1
330 return r
330 return r
331 raise LookupError(node, self.indexfile, _('no node'))
331 raise LookupError(node, self.indexfile, _('no node'))
332
332
333 def node(self, rev):
333 def node(self, rev):
334 return self.index[rev][7]
334 return self.index[rev][7]
335 def linkrev(self, rev):
335 def linkrev(self, rev):
336 return self.index[rev][4]
336 return self.index[rev][4]
337 def parents(self, node):
337 def parents(self, node):
338 i = self.index
338 i = self.index
339 d = i[self.rev(node)]
339 d = i[self.rev(node)]
340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
340 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
341 def parentrevs(self, rev):
341 def parentrevs(self, rev):
342 return self.index[rev][5:7]
342 return self.index[rev][5:7]
343 def start(self, rev):
343 def start(self, rev):
344 return int(self.index[rev][0] >> 16)
344 return int(self.index[rev][0] >> 16)
345 def end(self, rev):
345 def end(self, rev):
346 return self.start(rev) + self.length(rev)
346 return self.start(rev) + self.length(rev)
347 def length(self, rev):
347 def length(self, rev):
348 return self.index[rev][1]
348 return self.index[rev][1]
349 def chainbase(self, rev):
349 def chainbase(self, rev):
350 index = self.index
350 index = self.index
351 base = index[rev][3]
351 base = index[rev][3]
352 while base != rev:
352 while base != rev:
353 rev = base
353 rev = base
354 base = index[rev][3]
354 base = index[rev][3]
355 return base
355 return base
356 def chainlen(self, rev):
356 def chainlen(self, rev):
357 return self._chaininfo(rev)[0]
358 def _chaininfo(self, rev):
357 index = self.index
359 index = self.index
358 generaldelta = self._generaldelta
360 generaldelta = self._generaldelta
359 iterrev = rev
361 iterrev = rev
360 e = index[iterrev]
362 e = index[iterrev]
361 clen = 0
363 clen = 0
364 compresseddeltalen = 0
362 while iterrev != e[3]:
365 while iterrev != e[3]:
363 clen += 1
366 clen += 1
367 compresseddeltalen += e[1]
364 if generaldelta:
368 if generaldelta:
365 iterrev = e[3]
369 iterrev = e[3]
366 else:
370 else:
367 iterrev -= 1
371 iterrev -= 1
368 e = index[iterrev]
372 e = index[iterrev]
369 return clen
373 # add text length of base since decompressing that also takes work
374 compresseddeltalen += e[1]
375 return clen, compresseddeltalen
370 def flags(self, rev):
376 def flags(self, rev):
371 return self.index[rev][0] & 0xFFFF
377 return self.index[rev][0] & 0xFFFF
372 def rawsize(self, rev):
378 def rawsize(self, rev):
373 """return the length of the uncompressed text for a given revision"""
379 """return the length of the uncompressed text for a given revision"""
374 l = self.index[rev][2]
380 l = self.index[rev][2]
375 if l >= 0:
381 if l >= 0:
376 return l
382 return l
377
383
378 t = self.revision(self.node(rev))
384 t = self.revision(self.node(rev))
379 return len(t)
385 return len(t)
380 size = rawsize
386 size = rawsize
381
387
382 def ancestors(self, revs, stoprev=0, inclusive=False):
388 def ancestors(self, revs, stoprev=0, inclusive=False):
383 """Generate the ancestors of 'revs' in reverse topological order.
389 """Generate the ancestors of 'revs' in reverse topological order.
384 Does not generate revs lower than stoprev.
390 Does not generate revs lower than stoprev.
385
391
386 See the documentation for ancestor.lazyancestors for more details."""
392 See the documentation for ancestor.lazyancestors for more details."""
387
393
388 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
394 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
389 inclusive=inclusive)
395 inclusive=inclusive)
390
396
391 def descendants(self, revs):
397 def descendants(self, revs):
392 """Generate the descendants of 'revs' in revision order.
398 """Generate the descendants of 'revs' in revision order.
393
399
394 Yield a sequence of revision numbers starting with a child of
400 Yield a sequence of revision numbers starting with a child of
395 some rev in revs, i.e., each revision is *not* considered a
401 some rev in revs, i.e., each revision is *not* considered a
396 descendant of itself. Results are ordered by revision number (a
402 descendant of itself. Results are ordered by revision number (a
397 topological sort)."""
403 topological sort)."""
398 first = min(revs)
404 first = min(revs)
399 if first == nullrev:
405 if first == nullrev:
400 for i in self:
406 for i in self:
401 yield i
407 yield i
402 return
408 return
403
409
404 seen = set(revs)
410 seen = set(revs)
405 for i in self.revs(start=first + 1):
411 for i in self.revs(start=first + 1):
406 for x in self.parentrevs(i):
412 for x in self.parentrevs(i):
407 if x != nullrev and x in seen:
413 if x != nullrev and x in seen:
408 seen.add(i)
414 seen.add(i)
409 yield i
415 yield i
410 break
416 break
411
417
412 def findcommonmissing(self, common=None, heads=None):
418 def findcommonmissing(self, common=None, heads=None):
413 """Return a tuple of the ancestors of common and the ancestors of heads
419 """Return a tuple of the ancestors of common and the ancestors of heads
414 that are not ancestors of common. In revset terminology, we return the
420 that are not ancestors of common. In revset terminology, we return the
415 tuple:
421 tuple:
416
422
417 ::common, (::heads) - (::common)
423 ::common, (::heads) - (::common)
418
424
419 The list is sorted by revision number, meaning it is
425 The list is sorted by revision number, meaning it is
420 topologically sorted.
426 topologically sorted.
421
427
422 'heads' and 'common' are both lists of node IDs. If heads is
428 'heads' and 'common' are both lists of node IDs. If heads is
423 not supplied, uses all of the revlog's heads. If common is not
429 not supplied, uses all of the revlog's heads. If common is not
424 supplied, uses nullid."""
430 supplied, uses nullid."""
425 if common is None:
431 if common is None:
426 common = [nullid]
432 common = [nullid]
427 if heads is None:
433 if heads is None:
428 heads = self.heads()
434 heads = self.heads()
429
435
430 common = [self.rev(n) for n in common]
436 common = [self.rev(n) for n in common]
431 heads = [self.rev(n) for n in heads]
437 heads = [self.rev(n) for n in heads]
432
438
433 # we want the ancestors, but inclusive
439 # we want the ancestors, but inclusive
434 class lazyset(object):
440 class lazyset(object):
435 def __init__(self, lazyvalues):
441 def __init__(self, lazyvalues):
436 self.addedvalues = set()
442 self.addedvalues = set()
437 self.lazyvalues = lazyvalues
443 self.lazyvalues = lazyvalues
438
444
439 def __contains__(self, value):
445 def __contains__(self, value):
440 return value in self.addedvalues or value in self.lazyvalues
446 return value in self.addedvalues or value in self.lazyvalues
441
447
442 def __iter__(self):
448 def __iter__(self):
443 added = self.addedvalues
449 added = self.addedvalues
444 for r in added:
450 for r in added:
445 yield r
451 yield r
446 for r in self.lazyvalues:
452 for r in self.lazyvalues:
447 if not r in added:
453 if not r in added:
448 yield r
454 yield r
449
455
450 def add(self, value):
456 def add(self, value):
451 self.addedvalues.add(value)
457 self.addedvalues.add(value)
452
458
453 def update(self, values):
459 def update(self, values):
454 self.addedvalues.update(values)
460 self.addedvalues.update(values)
455
461
456 has = lazyset(self.ancestors(common))
462 has = lazyset(self.ancestors(common))
457 has.add(nullrev)
463 has.add(nullrev)
458 has.update(common)
464 has.update(common)
459
465
460 # take all ancestors from heads that aren't in has
466 # take all ancestors from heads that aren't in has
461 missing = set()
467 missing = set()
462 visit = util.deque(r for r in heads if r not in has)
468 visit = util.deque(r for r in heads if r not in has)
463 while visit:
469 while visit:
464 r = visit.popleft()
470 r = visit.popleft()
465 if r in missing:
471 if r in missing:
466 continue
472 continue
467 else:
473 else:
468 missing.add(r)
474 missing.add(r)
469 for p in self.parentrevs(r):
475 for p in self.parentrevs(r):
470 if p not in has:
476 if p not in has:
471 visit.append(p)
477 visit.append(p)
472 missing = list(missing)
478 missing = list(missing)
473 missing.sort()
479 missing.sort()
474 return has, [self.node(r) for r in missing]
480 return has, [self.node(r) for r in missing]
475
481
476 def findmissingrevs(self, common=None, heads=None):
482 def findmissingrevs(self, common=None, heads=None):
477 """Return the revision numbers of the ancestors of heads that
483 """Return the revision numbers of the ancestors of heads that
478 are not ancestors of common.
484 are not ancestors of common.
479
485
480 More specifically, return a list of revision numbers corresponding to
486 More specifically, return a list of revision numbers corresponding to
481 nodes N such that every N satisfies the following constraints:
487 nodes N such that every N satisfies the following constraints:
482
488
483 1. N is an ancestor of some node in 'heads'
489 1. N is an ancestor of some node in 'heads'
484 2. N is not an ancestor of any node in 'common'
490 2. N is not an ancestor of any node in 'common'
485
491
486 The list is sorted by revision number, meaning it is
492 The list is sorted by revision number, meaning it is
487 topologically sorted.
493 topologically sorted.
488
494
489 'heads' and 'common' are both lists of revision numbers. If heads is
495 'heads' and 'common' are both lists of revision numbers. If heads is
490 not supplied, uses all of the revlog's heads. If common is not
496 not supplied, uses all of the revlog's heads. If common is not
491 supplied, uses nullid."""
497 supplied, uses nullid."""
492 if common is None:
498 if common is None:
493 common = [nullrev]
499 common = [nullrev]
494 if heads is None:
500 if heads is None:
495 heads = self.headrevs()
501 heads = self.headrevs()
496
502
497 return ancestor.missingancestors(heads, common, self.parentrevs)
503 return ancestor.missingancestors(heads, common, self.parentrevs)
498
504
499 def findmissing(self, common=None, heads=None):
505 def findmissing(self, common=None, heads=None):
500 """Return the ancestors of heads that are not ancestors of common.
506 """Return the ancestors of heads that are not ancestors of common.
501
507
502 More specifically, return a list of nodes N such that every N
508 More specifically, return a list of nodes N such that every N
503 satisfies the following constraints:
509 satisfies the following constraints:
504
510
505 1. N is an ancestor of some node in 'heads'
511 1. N is an ancestor of some node in 'heads'
506 2. N is not an ancestor of any node in 'common'
512 2. N is not an ancestor of any node in 'common'
507
513
508 The list is sorted by revision number, meaning it is
514 The list is sorted by revision number, meaning it is
509 topologically sorted.
515 topologically sorted.
510
516
511 'heads' and 'common' are both lists of node IDs. If heads is
517 'heads' and 'common' are both lists of node IDs. If heads is
512 not supplied, uses all of the revlog's heads. If common is not
518 not supplied, uses all of the revlog's heads. If common is not
513 supplied, uses nullid."""
519 supplied, uses nullid."""
514 if common is None:
520 if common is None:
515 common = [nullid]
521 common = [nullid]
516 if heads is None:
522 if heads is None:
517 heads = self.heads()
523 heads = self.heads()
518
524
519 common = [self.rev(n) for n in common]
525 common = [self.rev(n) for n in common]
520 heads = [self.rev(n) for n in heads]
526 heads = [self.rev(n) for n in heads]
521
527
522 return [self.node(r) for r in
528 return [self.node(r) for r in
523 ancestor.missingancestors(heads, common, self.parentrevs)]
529 ancestor.missingancestors(heads, common, self.parentrevs)]
524
530
525 def nodesbetween(self, roots=None, heads=None):
531 def nodesbetween(self, roots=None, heads=None):
526 """Return a topological path from 'roots' to 'heads'.
532 """Return a topological path from 'roots' to 'heads'.
527
533
528 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
534 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
529 topologically sorted list of all nodes N that satisfy both of
535 topologically sorted list of all nodes N that satisfy both of
530 these constraints:
536 these constraints:
531
537
532 1. N is a descendant of some node in 'roots'
538 1. N is a descendant of some node in 'roots'
533 2. N is an ancestor of some node in 'heads'
539 2. N is an ancestor of some node in 'heads'
534
540
535 Every node is considered to be both a descendant and an ancestor
541 Every node is considered to be both a descendant and an ancestor
536 of itself, so every reachable node in 'roots' and 'heads' will be
542 of itself, so every reachable node in 'roots' and 'heads' will be
537 included in 'nodes'.
543 included in 'nodes'.
538
544
539 'outroots' is the list of reachable nodes in 'roots', i.e., the
545 'outroots' is the list of reachable nodes in 'roots', i.e., the
540 subset of 'roots' that is returned in 'nodes'. Likewise,
546 subset of 'roots' that is returned in 'nodes'. Likewise,
541 'outheads' is the subset of 'heads' that is also in 'nodes'.
547 'outheads' is the subset of 'heads' that is also in 'nodes'.
542
548
543 'roots' and 'heads' are both lists of node IDs. If 'roots' is
549 'roots' and 'heads' are both lists of node IDs. If 'roots' is
544 unspecified, uses nullid as the only root. If 'heads' is
550 unspecified, uses nullid as the only root. If 'heads' is
545 unspecified, uses list of all of the revlog's heads."""
551 unspecified, uses list of all of the revlog's heads."""
546 nonodes = ([], [], [])
552 nonodes = ([], [], [])
547 if roots is not None:
553 if roots is not None:
548 roots = list(roots)
554 roots = list(roots)
549 if not roots:
555 if not roots:
550 return nonodes
556 return nonodes
551 lowestrev = min([self.rev(n) for n in roots])
557 lowestrev = min([self.rev(n) for n in roots])
552 else:
558 else:
553 roots = [nullid] # Everybody's a descendant of nullid
559 roots = [nullid] # Everybody's a descendant of nullid
554 lowestrev = nullrev
560 lowestrev = nullrev
555 if (lowestrev == nullrev) and (heads is None):
561 if (lowestrev == nullrev) and (heads is None):
556 # We want _all_ the nodes!
562 # We want _all_ the nodes!
557 return ([self.node(r) for r in self], [nullid], list(self.heads()))
563 return ([self.node(r) for r in self], [nullid], list(self.heads()))
558 if heads is None:
564 if heads is None:
559 # All nodes are ancestors, so the latest ancestor is the last
565 # All nodes are ancestors, so the latest ancestor is the last
560 # node.
566 # node.
561 highestrev = len(self) - 1
567 highestrev = len(self) - 1
562 # Set ancestors to None to signal that every node is an ancestor.
568 # Set ancestors to None to signal that every node is an ancestor.
563 ancestors = None
569 ancestors = None
564 # Set heads to an empty dictionary for later discovery of heads
570 # Set heads to an empty dictionary for later discovery of heads
565 heads = {}
571 heads = {}
566 else:
572 else:
567 heads = list(heads)
573 heads = list(heads)
568 if not heads:
574 if not heads:
569 return nonodes
575 return nonodes
570 ancestors = set()
576 ancestors = set()
571 # Turn heads into a dictionary so we can remove 'fake' heads.
577 # Turn heads into a dictionary so we can remove 'fake' heads.
572 # Also, later we will be using it to filter out the heads we can't
578 # Also, later we will be using it to filter out the heads we can't
573 # find from roots.
579 # find from roots.
574 heads = dict.fromkeys(heads, False)
580 heads = dict.fromkeys(heads, False)
575 # Start at the top and keep marking parents until we're done.
581 # Start at the top and keep marking parents until we're done.
576 nodestotag = set(heads)
582 nodestotag = set(heads)
577 # Remember where the top was so we can use it as a limit later.
583 # Remember where the top was so we can use it as a limit later.
578 highestrev = max([self.rev(n) for n in nodestotag])
584 highestrev = max([self.rev(n) for n in nodestotag])
579 while nodestotag:
585 while nodestotag:
580 # grab a node to tag
586 # grab a node to tag
581 n = nodestotag.pop()
587 n = nodestotag.pop()
582 # Never tag nullid
588 # Never tag nullid
583 if n == nullid:
589 if n == nullid:
584 continue
590 continue
585 # A node's revision number represents its place in a
591 # A node's revision number represents its place in a
586 # topologically sorted list of nodes.
592 # topologically sorted list of nodes.
587 r = self.rev(n)
593 r = self.rev(n)
588 if r >= lowestrev:
594 if r >= lowestrev:
589 if n not in ancestors:
595 if n not in ancestors:
590 # If we are possibly a descendant of one of the roots
596 # If we are possibly a descendant of one of the roots
591 # and we haven't already been marked as an ancestor
597 # and we haven't already been marked as an ancestor
592 ancestors.add(n) # Mark as ancestor
598 ancestors.add(n) # Mark as ancestor
593 # Add non-nullid parents to list of nodes to tag.
599 # Add non-nullid parents to list of nodes to tag.
594 nodestotag.update([p for p in self.parents(n) if
600 nodestotag.update([p for p in self.parents(n) if
595 p != nullid])
601 p != nullid])
596 elif n in heads: # We've seen it before, is it a fake head?
602 elif n in heads: # We've seen it before, is it a fake head?
597 # So it is, real heads should not be the ancestors of
603 # So it is, real heads should not be the ancestors of
598 # any other heads.
604 # any other heads.
599 heads.pop(n)
605 heads.pop(n)
600 if not ancestors:
606 if not ancestors:
601 return nonodes
607 return nonodes
602 # Now that we have our set of ancestors, we want to remove any
608 # Now that we have our set of ancestors, we want to remove any
603 # roots that are not ancestors.
609 # roots that are not ancestors.
604
610
605 # If one of the roots was nullid, everything is included anyway.
611 # If one of the roots was nullid, everything is included anyway.
606 if lowestrev > nullrev:
612 if lowestrev > nullrev:
607 # But, since we weren't, let's recompute the lowest rev to not
613 # But, since we weren't, let's recompute the lowest rev to not
608 # include roots that aren't ancestors.
614 # include roots that aren't ancestors.
609
615
610 # Filter out roots that aren't ancestors of heads
616 # Filter out roots that aren't ancestors of heads
611 roots = [n for n in roots if n in ancestors]
617 roots = [n for n in roots if n in ancestors]
612 # Recompute the lowest revision
618 # Recompute the lowest revision
613 if roots:
619 if roots:
614 lowestrev = min([self.rev(n) for n in roots])
620 lowestrev = min([self.rev(n) for n in roots])
615 else:
621 else:
616 # No more roots? Return empty list
622 # No more roots? Return empty list
617 return nonodes
623 return nonodes
618 else:
624 else:
619 # We are descending from nullid, and don't need to care about
625 # We are descending from nullid, and don't need to care about
620 # any other roots.
626 # any other roots.
621 lowestrev = nullrev
627 lowestrev = nullrev
622 roots = [nullid]
628 roots = [nullid]
623 # Transform our roots list into a set.
629 # Transform our roots list into a set.
624 descendants = set(roots)
630 descendants = set(roots)
625 # Also, keep the original roots so we can filter out roots that aren't
631 # Also, keep the original roots so we can filter out roots that aren't
626 # 'real' roots (i.e. are descended from other roots).
632 # 'real' roots (i.e. are descended from other roots).
627 roots = descendants.copy()
633 roots = descendants.copy()
628 # Our topologically sorted list of output nodes.
634 # Our topologically sorted list of output nodes.
629 orderedout = []
635 orderedout = []
630 # Don't start at nullid since we don't want nullid in our output list,
636 # Don't start at nullid since we don't want nullid in our output list,
631 # and if nullid shows up in descendants, empty parents will look like
637 # and if nullid shows up in descendants, empty parents will look like
632 # they're descendants.
638 # they're descendants.
633 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
639 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
634 n = self.node(r)
640 n = self.node(r)
635 isdescendant = False
641 isdescendant = False
636 if lowestrev == nullrev: # Everybody is a descendant of nullid
642 if lowestrev == nullrev: # Everybody is a descendant of nullid
637 isdescendant = True
643 isdescendant = True
638 elif n in descendants:
644 elif n in descendants:
639 # n is already a descendant
645 # n is already a descendant
640 isdescendant = True
646 isdescendant = True
641 # This check only needs to be done here because all the roots
647 # This check only needs to be done here because all the roots
642 # will start being marked is descendants before the loop.
648 # will start being marked is descendants before the loop.
643 if n in roots:
649 if n in roots:
644 # If n was a root, check if it's a 'real' root.
650 # If n was a root, check if it's a 'real' root.
645 p = tuple(self.parents(n))
651 p = tuple(self.parents(n))
646 # If any of its parents are descendants, it's not a root.
652 # If any of its parents are descendants, it's not a root.
647 if (p[0] in descendants) or (p[1] in descendants):
653 if (p[0] in descendants) or (p[1] in descendants):
648 roots.remove(n)
654 roots.remove(n)
649 else:
655 else:
650 p = tuple(self.parents(n))
656 p = tuple(self.parents(n))
651 # A node is a descendant if either of its parents are
657 # A node is a descendant if either of its parents are
652 # descendants. (We seeded the dependents list with the roots
658 # descendants. (We seeded the dependents list with the roots
653 # up there, remember?)
659 # up there, remember?)
654 if (p[0] in descendants) or (p[1] in descendants):
660 if (p[0] in descendants) or (p[1] in descendants):
655 descendants.add(n)
661 descendants.add(n)
656 isdescendant = True
662 isdescendant = True
657 if isdescendant and ((ancestors is None) or (n in ancestors)):
663 if isdescendant and ((ancestors is None) or (n in ancestors)):
658 # Only include nodes that are both descendants and ancestors.
664 # Only include nodes that are both descendants and ancestors.
659 orderedout.append(n)
665 orderedout.append(n)
660 if (ancestors is not None) and (n in heads):
666 if (ancestors is not None) and (n in heads):
661 # We're trying to figure out which heads are reachable
667 # We're trying to figure out which heads are reachable
662 # from roots.
668 # from roots.
663 # Mark this head as having been reached
669 # Mark this head as having been reached
664 heads[n] = True
670 heads[n] = True
665 elif ancestors is None:
671 elif ancestors is None:
666 # Otherwise, we're trying to discover the heads.
672 # Otherwise, we're trying to discover the heads.
667 # Assume this is a head because if it isn't, the next step
673 # Assume this is a head because if it isn't, the next step
668 # will eventually remove it.
674 # will eventually remove it.
669 heads[n] = True
675 heads[n] = True
670 # But, obviously its parents aren't.
676 # But, obviously its parents aren't.
671 for p in self.parents(n):
677 for p in self.parents(n):
672 heads.pop(p, None)
678 heads.pop(p, None)
673 heads = [n for n, flag in heads.iteritems() if flag]
679 heads = [n for n, flag in heads.iteritems() if flag]
674 roots = list(roots)
680 roots = list(roots)
675 assert orderedout
681 assert orderedout
676 assert roots
682 assert roots
677 assert heads
683 assert heads
678 return (orderedout, roots, heads)
684 return (orderedout, roots, heads)
679
685
680 def headrevs(self):
686 def headrevs(self):
681 try:
687 try:
682 return self.index.headrevs()
688 return self.index.headrevs()
683 except AttributeError:
689 except AttributeError:
684 return self._headrevs()
690 return self._headrevs()
685
691
686 def _headrevs(self):
692 def _headrevs(self):
687 count = len(self)
693 count = len(self)
688 if not count:
694 if not count:
689 return [nullrev]
695 return [nullrev]
690 # we won't iter over filtered rev so nobody is a head at start
696 # we won't iter over filtered rev so nobody is a head at start
691 ishead = [0] * (count + 1)
697 ishead = [0] * (count + 1)
692 index = self.index
698 index = self.index
693 for r in self:
699 for r in self:
694 ishead[r] = 1 # I may be an head
700 ishead[r] = 1 # I may be an head
695 e = index[r]
701 e = index[r]
696 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
702 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
697 return [r for r, val in enumerate(ishead) if val]
703 return [r for r, val in enumerate(ishead) if val]
698
704
699 def heads(self, start=None, stop=None):
705 def heads(self, start=None, stop=None):
700 """return the list of all nodes that have no children
706 """return the list of all nodes that have no children
701
707
702 if start is specified, only heads that are descendants of
708 if start is specified, only heads that are descendants of
703 start will be returned
709 start will be returned
704 if stop is specified, it will consider all the revs from stop
710 if stop is specified, it will consider all the revs from stop
705 as if they had no children
711 as if they had no children
706 """
712 """
707 if start is None and stop is None:
713 if start is None and stop is None:
708 if not len(self):
714 if not len(self):
709 return [nullid]
715 return [nullid]
710 return [self.node(r) for r in self.headrevs()]
716 return [self.node(r) for r in self.headrevs()]
711
717
712 if start is None:
718 if start is None:
713 start = nullid
719 start = nullid
714 if stop is None:
720 if stop is None:
715 stop = []
721 stop = []
716 stoprevs = set([self.rev(n) for n in stop])
722 stoprevs = set([self.rev(n) for n in stop])
717 startrev = self.rev(start)
723 startrev = self.rev(start)
718 reachable = set((startrev,))
724 reachable = set((startrev,))
719 heads = set((startrev,))
725 heads = set((startrev,))
720
726
721 parentrevs = self.parentrevs
727 parentrevs = self.parentrevs
722 for r in self.revs(start=startrev + 1):
728 for r in self.revs(start=startrev + 1):
723 for p in parentrevs(r):
729 for p in parentrevs(r):
724 if p in reachable:
730 if p in reachable:
725 if r not in stoprevs:
731 if r not in stoprevs:
726 reachable.add(r)
732 reachable.add(r)
727 heads.add(r)
733 heads.add(r)
728 if p in heads and p not in stoprevs:
734 if p in heads and p not in stoprevs:
729 heads.remove(p)
735 heads.remove(p)
730
736
731 return [self.node(r) for r in heads]
737 return [self.node(r) for r in heads]
732
738
733 def children(self, node):
739 def children(self, node):
734 """find the children of a given node"""
740 """find the children of a given node"""
735 c = []
741 c = []
736 p = self.rev(node)
742 p = self.rev(node)
737 for r in self.revs(start=p + 1):
743 for r in self.revs(start=p + 1):
738 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
744 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
739 if prevs:
745 if prevs:
740 for pr in prevs:
746 for pr in prevs:
741 if pr == p:
747 if pr == p:
742 c.append(self.node(r))
748 c.append(self.node(r))
743 elif p == nullrev:
749 elif p == nullrev:
744 c.append(self.node(r))
750 c.append(self.node(r))
745 return c
751 return c
746
752
747 def descendant(self, start, end):
753 def descendant(self, start, end):
748 if start == nullrev:
754 if start == nullrev:
749 return True
755 return True
750 for i in self.descendants([start]):
756 for i in self.descendants([start]):
751 if i == end:
757 if i == end:
752 return True
758 return True
753 elif i > end:
759 elif i > end:
754 break
760 break
755 return False
761 return False
756
762
757 def commonancestorsheads(self, a, b):
763 def commonancestorsheads(self, a, b):
758 """calculate all the heads of the common ancestors of nodes a and b"""
764 """calculate all the heads of the common ancestors of nodes a and b"""
759 a, b = self.rev(a), self.rev(b)
765 a, b = self.rev(a), self.rev(b)
760 try:
766 try:
761 ancs = self.index.commonancestorsheads(a, b)
767 ancs = self.index.commonancestorsheads(a, b)
762 except (AttributeError, OverflowError): # C implementation failed
768 except (AttributeError, OverflowError): # C implementation failed
763 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
769 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
764 return map(self.node, ancs)
770 return map(self.node, ancs)
765
771
766 def isancestor(self, a, b):
772 def isancestor(self, a, b):
767 """return True if node a is an ancestor of node b
773 """return True if node a is an ancestor of node b
768
774
769 The implementation of this is trivial but the use of
775 The implementation of this is trivial but the use of
770 commonancestorsheads is not."""
776 commonancestorsheads is not."""
771 return a in self.commonancestorsheads(a, b)
777 return a in self.commonancestorsheads(a, b)
772
778
773 def ancestor(self, a, b):
779 def ancestor(self, a, b):
774 """calculate the "best" common ancestor of nodes a and b"""
780 """calculate the "best" common ancestor of nodes a and b"""
775
781
776 a, b = self.rev(a), self.rev(b)
782 a, b = self.rev(a), self.rev(b)
777 try:
783 try:
778 ancs = self.index.ancestors(a, b)
784 ancs = self.index.ancestors(a, b)
779 except (AttributeError, OverflowError):
785 except (AttributeError, OverflowError):
780 ancs = ancestor.ancestors(self.parentrevs, a, b)
786 ancs = ancestor.ancestors(self.parentrevs, a, b)
781 if ancs:
787 if ancs:
782 # choose a consistent winner when there's a tie
788 # choose a consistent winner when there's a tie
783 return min(map(self.node, ancs))
789 return min(map(self.node, ancs))
784 return nullid
790 return nullid
785
791
786 def _match(self, id):
792 def _match(self, id):
787 if isinstance(id, int):
793 if isinstance(id, int):
788 # rev
794 # rev
789 return self.node(id)
795 return self.node(id)
790 if len(id) == 20:
796 if len(id) == 20:
791 # possibly a binary node
797 # possibly a binary node
792 # odds of a binary node being all hex in ASCII are 1 in 10**25
798 # odds of a binary node being all hex in ASCII are 1 in 10**25
793 try:
799 try:
794 node = id
800 node = id
795 self.rev(node) # quick search the index
801 self.rev(node) # quick search the index
796 return node
802 return node
797 except LookupError:
803 except LookupError:
798 pass # may be partial hex id
804 pass # may be partial hex id
799 try:
805 try:
800 # str(rev)
806 # str(rev)
801 rev = int(id)
807 rev = int(id)
802 if str(rev) != id:
808 if str(rev) != id:
803 raise ValueError
809 raise ValueError
804 if rev < 0:
810 if rev < 0:
805 rev = len(self) + rev
811 rev = len(self) + rev
806 if rev < 0 or rev >= len(self):
812 if rev < 0 or rev >= len(self):
807 raise ValueError
813 raise ValueError
808 return self.node(rev)
814 return self.node(rev)
809 except (ValueError, OverflowError):
815 except (ValueError, OverflowError):
810 pass
816 pass
811 if len(id) == 40:
817 if len(id) == 40:
812 try:
818 try:
813 # a full hex nodeid?
819 # a full hex nodeid?
814 node = bin(id)
820 node = bin(id)
815 self.rev(node)
821 self.rev(node)
816 return node
822 return node
817 except (TypeError, LookupError):
823 except (TypeError, LookupError):
818 pass
824 pass
819
825
820 def _partialmatch(self, id):
826 def _partialmatch(self, id):
821 try:
827 try:
822 n = self.index.partialmatch(id)
828 n = self.index.partialmatch(id)
823 if n and self.hasnode(n):
829 if n and self.hasnode(n):
824 return n
830 return n
825 return None
831 return None
826 except RevlogError:
832 except RevlogError:
827 # parsers.c radix tree lookup gave multiple matches
833 # parsers.c radix tree lookup gave multiple matches
828 # fall through to slow path that filters hidden revisions
834 # fall through to slow path that filters hidden revisions
829 pass
835 pass
830 except (AttributeError, ValueError):
836 except (AttributeError, ValueError):
831 # we are pure python, or key was too short to search radix tree
837 # we are pure python, or key was too short to search radix tree
832 pass
838 pass
833
839
834 if id in self._pcache:
840 if id in self._pcache:
835 return self._pcache[id]
841 return self._pcache[id]
836
842
837 if len(id) < 40:
843 if len(id) < 40:
838 try:
844 try:
839 # hex(node)[:...]
845 # hex(node)[:...]
840 l = len(id) // 2 # grab an even number of digits
846 l = len(id) // 2 # grab an even number of digits
841 prefix = bin(id[:l * 2])
847 prefix = bin(id[:l * 2])
842 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
848 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
843 nl = [n for n in nl if hex(n).startswith(id) and
849 nl = [n for n in nl if hex(n).startswith(id) and
844 self.hasnode(n)]
850 self.hasnode(n)]
845 if len(nl) > 0:
851 if len(nl) > 0:
846 if len(nl) == 1:
852 if len(nl) == 1:
847 self._pcache[id] = nl[0]
853 self._pcache[id] = nl[0]
848 return nl[0]
854 return nl[0]
849 raise LookupError(id, self.indexfile,
855 raise LookupError(id, self.indexfile,
850 _('ambiguous identifier'))
856 _('ambiguous identifier'))
851 return None
857 return None
852 except TypeError:
858 except TypeError:
853 pass
859 pass
854
860
855 def lookup(self, id):
861 def lookup(self, id):
856 """locate a node based on:
862 """locate a node based on:
857 - revision number or str(revision number)
863 - revision number or str(revision number)
858 - nodeid or subset of hex nodeid
864 - nodeid or subset of hex nodeid
859 """
865 """
860 n = self._match(id)
866 n = self._match(id)
861 if n is not None:
867 if n is not None:
862 return n
868 return n
863 n = self._partialmatch(id)
869 n = self._partialmatch(id)
864 if n:
870 if n:
865 return n
871 return n
866
872
867 raise LookupError(id, self.indexfile, _('no match found'))
873 raise LookupError(id, self.indexfile, _('no match found'))
868
874
869 def cmp(self, node, text):
875 def cmp(self, node, text):
870 """compare text with a given file revision
876 """compare text with a given file revision
871
877
872 returns True if text is different than what is stored.
878 returns True if text is different than what is stored.
873 """
879 """
874 p1, p2 = self.parents(node)
880 p1, p2 = self.parents(node)
875 return hash(text, p1, p2) != node
881 return hash(text, p1, p2) != node
876
882
877 def _addchunk(self, offset, data):
883 def _addchunk(self, offset, data):
878 o, d = self._chunkcache
884 o, d = self._chunkcache
879 # try to add to existing cache
885 # try to add to existing cache
880 if o + len(d) == offset and len(d) + len(data) < _chunksize:
886 if o + len(d) == offset and len(d) + len(data) < _chunksize:
881 self._chunkcache = o, d + data
887 self._chunkcache = o, d + data
882 else:
888 else:
883 self._chunkcache = offset, data
889 self._chunkcache = offset, data
884
890
885 def _loadchunk(self, offset, length):
891 def _loadchunk(self, offset, length):
886 if self._inline:
892 if self._inline:
887 df = self.opener(self.indexfile)
893 df = self.opener(self.indexfile)
888 else:
894 else:
889 df = self.opener(self.datafile)
895 df = self.opener(self.datafile)
890
896
891 # Cache data both forward and backward around the requested
897 # Cache data both forward and backward around the requested
892 # data, in a fixed size window. This helps speed up operations
898 # data, in a fixed size window. This helps speed up operations
893 # involving reading the revlog backwards.
899 # involving reading the revlog backwards.
894 cachesize = self._chunkcachesize
900 cachesize = self._chunkcachesize
895 realoffset = offset & ~(cachesize - 1)
901 realoffset = offset & ~(cachesize - 1)
896 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
902 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
897 - realoffset)
903 - realoffset)
898 df.seek(realoffset)
904 df.seek(realoffset)
899 d = df.read(reallength)
905 d = df.read(reallength)
900 df.close()
906 df.close()
901 self._addchunk(realoffset, d)
907 self._addchunk(realoffset, d)
902 if offset != realoffset or reallength != length:
908 if offset != realoffset or reallength != length:
903 return util.buffer(d, offset - realoffset, length)
909 return util.buffer(d, offset - realoffset, length)
904 return d
910 return d
905
911
906 def _getchunk(self, offset, length):
912 def _getchunk(self, offset, length):
907 o, d = self._chunkcache
913 o, d = self._chunkcache
908 l = len(d)
914 l = len(d)
909
915
910 # is it in the cache?
916 # is it in the cache?
911 cachestart = offset - o
917 cachestart = offset - o
912 cacheend = cachestart + length
918 cacheend = cachestart + length
913 if cachestart >= 0 and cacheend <= l:
919 if cachestart >= 0 and cacheend <= l:
914 if cachestart == 0 and cacheend == l:
920 if cachestart == 0 and cacheend == l:
915 return d # avoid a copy
921 return d # avoid a copy
916 return util.buffer(d, cachestart, cacheend - cachestart)
922 return util.buffer(d, cachestart, cacheend - cachestart)
917
923
918 return self._loadchunk(offset, length)
924 return self._loadchunk(offset, length)
919
925
920 def _chunkraw(self, startrev, endrev):
926 def _chunkraw(self, startrev, endrev):
921 start = self.start(startrev)
927 start = self.start(startrev)
922 end = self.end(endrev)
928 end = self.end(endrev)
923 if self._inline:
929 if self._inline:
924 start += (startrev + 1) * self._io.size
930 start += (startrev + 1) * self._io.size
925 end += (endrev + 1) * self._io.size
931 end += (endrev + 1) * self._io.size
926 length = end - start
932 length = end - start
927 return self._getchunk(start, length)
933 return self._getchunk(start, length)
928
934
929 def _chunk(self, rev):
935 def _chunk(self, rev):
930 return decompress(self._chunkraw(rev, rev))
936 return decompress(self._chunkraw(rev, rev))
931
937
932 def _chunks(self, revs):
938 def _chunks(self, revs):
933 '''faster version of [self._chunk(rev) for rev in revs]
939 '''faster version of [self._chunk(rev) for rev in revs]
934
940
935 Assumes that revs is in ascending order.'''
941 Assumes that revs is in ascending order.'''
936 if not revs:
942 if not revs:
937 return []
943 return []
938 start = self.start
944 start = self.start
939 length = self.length
945 length = self.length
940 inline = self._inline
946 inline = self._inline
941 iosize = self._io.size
947 iosize = self._io.size
942 buffer = util.buffer
948 buffer = util.buffer
943
949
944 l = []
950 l = []
945 ladd = l.append
951 ladd = l.append
946
952
947 # preload the cache
953 # preload the cache
948 try:
954 try:
949 while True:
955 while True:
950 # ensure that the cache doesn't change out from under us
956 # ensure that the cache doesn't change out from under us
951 _cache = self._chunkcache
957 _cache = self._chunkcache
952 self._chunkraw(revs[0], revs[-1])
958 self._chunkraw(revs[0], revs[-1])
953 if _cache == self._chunkcache:
959 if _cache == self._chunkcache:
954 break
960 break
955 offset, data = _cache
961 offset, data = _cache
956 except OverflowError:
962 except OverflowError:
957 # issue4215 - we can't cache a run of chunks greater than
963 # issue4215 - we can't cache a run of chunks greater than
958 # 2G on Windows
964 # 2G on Windows
959 return [self._chunk(rev) for rev in revs]
965 return [self._chunk(rev) for rev in revs]
960
966
961 for rev in revs:
967 for rev in revs:
962 chunkstart = start(rev)
968 chunkstart = start(rev)
963 if inline:
969 if inline:
964 chunkstart += (rev + 1) * iosize
970 chunkstart += (rev + 1) * iosize
965 chunklength = length(rev)
971 chunklength = length(rev)
966 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
972 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
967
973
968 return l
974 return l
969
975
970 def _chunkclear(self):
976 def _chunkclear(self):
971 self._chunkcache = (0, '')
977 self._chunkcache = (0, '')
972
978
973 def deltaparent(self, rev):
979 def deltaparent(self, rev):
974 """return deltaparent of the given revision"""
980 """return deltaparent of the given revision"""
975 base = self.index[rev][3]
981 base = self.index[rev][3]
976 if base == rev:
982 if base == rev:
977 return nullrev
983 return nullrev
978 elif self._generaldelta:
984 elif self._generaldelta:
979 return base
985 return base
980 else:
986 else:
981 return rev - 1
987 return rev - 1
982
988
983 def revdiff(self, rev1, rev2):
989 def revdiff(self, rev1, rev2):
984 """return or calculate a delta between two revisions"""
990 """return or calculate a delta between two revisions"""
985 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
991 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
986 return str(self._chunk(rev2))
992 return str(self._chunk(rev2))
987
993
988 return mdiff.textdiff(self.revision(rev1),
994 return mdiff.textdiff(self.revision(rev1),
989 self.revision(rev2))
995 self.revision(rev2))
990
996
991 def revision(self, nodeorrev):
997 def revision(self, nodeorrev):
992 """return an uncompressed revision of a given node or revision
998 """return an uncompressed revision of a given node or revision
993 number.
999 number.
994 """
1000 """
995 if isinstance(nodeorrev, int):
1001 if isinstance(nodeorrev, int):
996 rev = nodeorrev
1002 rev = nodeorrev
997 node = self.node(rev)
1003 node = self.node(rev)
998 else:
1004 else:
999 node = nodeorrev
1005 node = nodeorrev
1000 rev = None
1006 rev = None
1001
1007
1002 _cache = self._cache # grab local copy of cache to avoid thread race
1008 _cache = self._cache # grab local copy of cache to avoid thread race
1003 cachedrev = None
1009 cachedrev = None
1004 if node == nullid:
1010 if node == nullid:
1005 return ""
1011 return ""
1006 if _cache:
1012 if _cache:
1007 if _cache[0] == node:
1013 if _cache[0] == node:
1008 return _cache[2]
1014 return _cache[2]
1009 cachedrev = _cache[1]
1015 cachedrev = _cache[1]
1010
1016
1011 # look up what we need to read
1017 # look up what we need to read
1012 text = None
1018 text = None
1013 if rev is None:
1019 if rev is None:
1014 rev = self.rev(node)
1020 rev = self.rev(node)
1015
1021
1016 # check rev flags
1022 # check rev flags
1017 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1023 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1018 raise RevlogError(_('incompatible revision flag %x') %
1024 raise RevlogError(_('incompatible revision flag %x') %
1019 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1025 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1020
1026
1021 # build delta chain
1027 # build delta chain
1022 chain = []
1028 chain = []
1023 index = self.index # for performance
1029 index = self.index # for performance
1024 generaldelta = self._generaldelta
1030 generaldelta = self._generaldelta
1025 iterrev = rev
1031 iterrev = rev
1026 e = index[iterrev]
1032 e = index[iterrev]
1027 while iterrev != e[3] and iterrev != cachedrev:
1033 while iterrev != e[3] and iterrev != cachedrev:
1028 chain.append(iterrev)
1034 chain.append(iterrev)
1029 if generaldelta:
1035 if generaldelta:
1030 iterrev = e[3]
1036 iterrev = e[3]
1031 else:
1037 else:
1032 iterrev -= 1
1038 iterrev -= 1
1033 e = index[iterrev]
1039 e = index[iterrev]
1034
1040
1035 if iterrev == cachedrev:
1041 if iterrev == cachedrev:
1036 # cache hit
1042 # cache hit
1037 text = _cache[2]
1043 text = _cache[2]
1038 else:
1044 else:
1039 chain.append(iterrev)
1045 chain.append(iterrev)
1040 chain.reverse()
1046 chain.reverse()
1041
1047
1042 # drop cache to save memory
1048 # drop cache to save memory
1043 self._cache = None
1049 self._cache = None
1044
1050
1045 bins = self._chunks(chain)
1051 bins = self._chunks(chain)
1046 if text is None:
1052 if text is None:
1047 text = str(bins[0])
1053 text = str(bins[0])
1048 bins = bins[1:]
1054 bins = bins[1:]
1049
1055
1050 text = mdiff.patches(text, bins)
1056 text = mdiff.patches(text, bins)
1051
1057
1052 text = self._checkhash(text, node, rev)
1058 text = self._checkhash(text, node, rev)
1053
1059
1054 self._cache = (node, rev, text)
1060 self._cache = (node, rev, text)
1055 return text
1061 return text
1056
1062
1057 def hash(self, text, p1, p2):
1063 def hash(self, text, p1, p2):
1058 """Compute a node hash.
1064 """Compute a node hash.
1059
1065
1060 Available as a function so that subclasses can replace the hash
1066 Available as a function so that subclasses can replace the hash
1061 as needed.
1067 as needed.
1062 """
1068 """
1063 return hash(text, p1, p2)
1069 return hash(text, p1, p2)
1064
1070
1065 def _checkhash(self, text, node, rev):
1071 def _checkhash(self, text, node, rev):
1066 p1, p2 = self.parents(node)
1072 p1, p2 = self.parents(node)
1067 self.checkhash(text, p1, p2, node, rev)
1073 self.checkhash(text, p1, p2, node, rev)
1068 return text
1074 return text
1069
1075
1070 def checkhash(self, text, p1, p2, node, rev=None):
1076 def checkhash(self, text, p1, p2, node, rev=None):
1071 if node != self.hash(text, p1, p2):
1077 if node != self.hash(text, p1, p2):
1072 revornode = rev
1078 revornode = rev
1073 if revornode is None:
1079 if revornode is None:
1074 revornode = templatefilters.short(hex(node))
1080 revornode = templatefilters.short(hex(node))
1075 raise RevlogError(_("integrity check failed on %s:%s")
1081 raise RevlogError(_("integrity check failed on %s:%s")
1076 % (self.indexfile, revornode))
1082 % (self.indexfile, revornode))
1077
1083
1078 def checkinlinesize(self, tr, fp=None):
1084 def checkinlinesize(self, tr, fp=None):
1079 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1085 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1080 return
1086 return
1081
1087
1082 trinfo = tr.find(self.indexfile)
1088 trinfo = tr.find(self.indexfile)
1083 if trinfo is None:
1089 if trinfo is None:
1084 raise RevlogError(_("%s not found in the transaction")
1090 raise RevlogError(_("%s not found in the transaction")
1085 % self.indexfile)
1091 % self.indexfile)
1086
1092
1087 trindex = trinfo[2]
1093 trindex = trinfo[2]
1088 dataoff = self.start(trindex)
1094 dataoff = self.start(trindex)
1089
1095
1090 tr.add(self.datafile, dataoff)
1096 tr.add(self.datafile, dataoff)
1091
1097
1092 if fp:
1098 if fp:
1093 fp.flush()
1099 fp.flush()
1094 fp.close()
1100 fp.close()
1095
1101
1096 df = self.opener(self.datafile, 'w')
1102 df = self.opener(self.datafile, 'w')
1097 try:
1103 try:
1098 for r in self:
1104 for r in self:
1099 df.write(self._chunkraw(r, r))
1105 df.write(self._chunkraw(r, r))
1100 finally:
1106 finally:
1101 df.close()
1107 df.close()
1102
1108
1103 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1109 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1104 self.version &= ~(REVLOGNGINLINEDATA)
1110 self.version &= ~(REVLOGNGINLINEDATA)
1105 self._inline = False
1111 self._inline = False
1106 for i in self:
1112 for i in self:
1107 e = self._io.packentry(self.index[i], self.node, self.version, i)
1113 e = self._io.packentry(self.index[i], self.node, self.version, i)
1108 fp.write(e)
1114 fp.write(e)
1109
1115
1110 # if we don't call close, the temp file will never replace the
1116 # if we don't call close, the temp file will never replace the
1111 # real index
1117 # real index
1112 fp.close()
1118 fp.close()
1113
1119
1114 tr.replace(self.indexfile, trindex * self._io.size)
1120 tr.replace(self.indexfile, trindex * self._io.size)
1115 self._chunkclear()
1121 self._chunkclear()
1116
1122
1117 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1123 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1118 node=None):
1124 node=None):
1119 """add a revision to the log
1125 """add a revision to the log
1120
1126
1121 text - the revision data to add
1127 text - the revision data to add
1122 transaction - the transaction object used for rollback
1128 transaction - the transaction object used for rollback
1123 link - the linkrev data to add
1129 link - the linkrev data to add
1124 p1, p2 - the parent nodeids of the revision
1130 p1, p2 - the parent nodeids of the revision
1125 cachedelta - an optional precomputed delta
1131 cachedelta - an optional precomputed delta
1126 node - nodeid of revision; typically node is not specified, and it is
1132 node - nodeid of revision; typically node is not specified, and it is
1127 computed by default as hash(text, p1, p2), however subclasses might
1133 computed by default as hash(text, p1, p2), however subclasses might
1128 use different hashing method (and override checkhash() in such case)
1134 use different hashing method (and override checkhash() in such case)
1129 """
1135 """
1130 if link == nullrev:
1136 if link == nullrev:
1131 raise RevlogError(_("attempted to add linkrev -1 to %s")
1137 raise RevlogError(_("attempted to add linkrev -1 to %s")
1132 % self.indexfile)
1138 % self.indexfile)
1133 node = node or self.hash(text, p1, p2)
1139 node = node or self.hash(text, p1, p2)
1134 if node in self.nodemap:
1140 if node in self.nodemap:
1135 return node
1141 return node
1136
1142
1137 dfh = None
1143 dfh = None
1138 if not self._inline:
1144 if not self._inline:
1139 dfh = self.opener(self.datafile, "a")
1145 dfh = self.opener(self.datafile, "a")
1140 ifh = self.opener(self.indexfile, "a+")
1146 ifh = self.opener(self.indexfile, "a+")
1141 try:
1147 try:
1142 return self._addrevision(node, text, transaction, link, p1, p2,
1148 return self._addrevision(node, text, transaction, link, p1, p2,
1143 cachedelta, ifh, dfh)
1149 cachedelta, ifh, dfh)
1144 finally:
1150 finally:
1145 if dfh:
1151 if dfh:
1146 dfh.close()
1152 dfh.close()
1147 ifh.close()
1153 ifh.close()
1148
1154
1149 def compress(self, text):
1155 def compress(self, text):
1150 """ generate a possibly-compressed representation of text """
1156 """ generate a possibly-compressed representation of text """
1151 if not text:
1157 if not text:
1152 return ("", text)
1158 return ("", text)
1153 l = len(text)
1159 l = len(text)
1154 bin = None
1160 bin = None
1155 if l < 44:
1161 if l < 44:
1156 pass
1162 pass
1157 elif l > 1000000:
1163 elif l > 1000000:
1158 # zlib makes an internal copy, thus doubling memory usage for
1164 # zlib makes an internal copy, thus doubling memory usage for
1159 # large files, so lets do this in pieces
1165 # large files, so lets do this in pieces
1160 z = zlib.compressobj()
1166 z = zlib.compressobj()
1161 p = []
1167 p = []
1162 pos = 0
1168 pos = 0
1163 while pos < l:
1169 while pos < l:
1164 pos2 = pos + 2**20
1170 pos2 = pos + 2**20
1165 p.append(z.compress(text[pos:pos2]))
1171 p.append(z.compress(text[pos:pos2]))
1166 pos = pos2
1172 pos = pos2
1167 p.append(z.flush())
1173 p.append(z.flush())
1168 if sum(map(len, p)) < l:
1174 if sum(map(len, p)) < l:
1169 bin = "".join(p)
1175 bin = "".join(p)
1170 else:
1176 else:
1171 bin = _compress(text)
1177 bin = _compress(text)
1172 if bin is None or len(bin) > l:
1178 if bin is None or len(bin) > l:
1173 if text[0] == '\0':
1179 if text[0] == '\0':
1174 return ("", text)
1180 return ("", text)
1175 return ('u', text)
1181 return ('u', text)
1176 return ("", bin)
1182 return ("", bin)
1177
1183
1178 def _addrevision(self, node, text, transaction, link, p1, p2,
1184 def _addrevision(self, node, text, transaction, link, p1, p2,
1179 cachedelta, ifh, dfh):
1185 cachedelta, ifh, dfh):
1180 """internal function to add revisions to the log
1186 """internal function to add revisions to the log
1181
1187
1182 see addrevision for argument descriptions.
1188 see addrevision for argument descriptions.
1183 invariants:
1189 invariants:
1184 - text is optional (can be None); if not set, cachedelta must be set.
1190 - text is optional (can be None); if not set, cachedelta must be set.
1185 if both are set, they must correspond to each other.
1191 if both are set, they must correspond to each other.
1186 """
1192 """
1187 btext = [text]
1193 btext = [text]
1188 def buildtext():
1194 def buildtext():
1189 if btext[0] is not None:
1195 if btext[0] is not None:
1190 return btext[0]
1196 return btext[0]
1191 # flush any pending writes here so we can read it in revision
1197 # flush any pending writes here so we can read it in revision
1192 if dfh:
1198 if dfh:
1193 dfh.flush()
1199 dfh.flush()
1194 ifh.flush()
1200 ifh.flush()
1195 basetext = self.revision(self.node(cachedelta[0]))
1201 basetext = self.revision(self.node(cachedelta[0]))
1196 btext[0] = mdiff.patch(basetext, cachedelta[1])
1202 btext[0] = mdiff.patch(basetext, cachedelta[1])
1197 try:
1203 try:
1198 self.checkhash(btext[0], p1, p2, node)
1204 self.checkhash(btext[0], p1, p2, node)
1199 except CensoredNodeError:
1205 except CensoredNodeError:
1200 pass # always import a censor tombstone.
1206 pass # always import a censor tombstone.
1201 return btext[0]
1207 return btext[0]
1202
1208
1203 def builddelta(rev):
1209 def builddelta(rev):
1204 # can we use the cached delta?
1210 # can we use the cached delta?
1205 if cachedelta and cachedelta[0] == rev:
1211 if cachedelta and cachedelta[0] == rev:
1206 delta = cachedelta[1]
1212 delta = cachedelta[1]
1207 else:
1213 else:
1208 t = buildtext()
1214 t = buildtext()
1209 ptext = self.revision(self.node(rev))
1215 ptext = self.revision(self.node(rev))
1210 delta = mdiff.textdiff(ptext, t)
1216 delta = mdiff.textdiff(ptext, t)
1211 data = self.compress(delta)
1217 data = self.compress(delta)
1212 l = len(data[1]) + len(data[0])
1218 l = len(data[1]) + len(data[0])
1213 if basecache[0] == rev:
1219 if basecache[0] == rev:
1214 chainbase = basecache[1]
1220 chainbase = basecache[1]
1215 else:
1221 else:
1216 chainbase = self.chainbase(rev)
1222 chainbase = self.chainbase(rev)
1217 dist = l + offset - self.start(chainbase)
1223 dist = l + offset - self.start(chainbase)
1218 if self._generaldelta:
1224 if self._generaldelta:
1219 base = rev
1225 base = rev
1220 else:
1226 else:
1221 base = chainbase
1227 base = chainbase
1222 chainlen = self.chainlen(rev) + 1
1228 chainlen = self.chainlen(rev) + 1
1223 return dist, l, data, base, chainbase, chainlen
1229 return dist, l, data, base, chainbase, chainlen
1224
1230
1225 curr = len(self)
1231 curr = len(self)
1226 prev = curr - 1
1232 prev = curr - 1
1227 base = chainbase = curr
1233 base = chainbase = curr
1228 chainlen = None
1234 chainlen = None
1229 offset = self.end(prev)
1235 offset = self.end(prev)
1230 flags = 0
1236 flags = 0
1231 d = None
1237 d = None
1232 if self._basecache is None:
1238 if self._basecache is None:
1233 self._basecache = (prev, self.chainbase(prev))
1239 self._basecache = (prev, self.chainbase(prev))
1234 basecache = self._basecache
1240 basecache = self._basecache
1235 p1r, p2r = self.rev(p1), self.rev(p2)
1241 p1r, p2r = self.rev(p1), self.rev(p2)
1236
1242
1237 # should we try to build a delta?
1243 # should we try to build a delta?
1238 if prev != nullrev:
1244 if prev != nullrev:
1239 if self._generaldelta:
1245 if self._generaldelta:
1240 if p1r >= basecache[1]:
1246 if p1r >= basecache[1]:
1241 d = builddelta(p1r)
1247 d = builddelta(p1r)
1242 elif p2r >= basecache[1]:
1248 elif p2r >= basecache[1]:
1243 d = builddelta(p2r)
1249 d = builddelta(p2r)
1244 else:
1250 else:
1245 d = builddelta(prev)
1251 d = builddelta(prev)
1246 else:
1252 else:
1247 d = builddelta(prev)
1253 d = builddelta(prev)
1248 dist, l, data, base, chainbase, chainlen = d
1254 dist, l, data, base, chainbase, chainlen = d
1249
1255
1250 # full versions are inserted when the needed deltas
1256 # full versions are inserted when the needed deltas
1251 # become comparable to the uncompressed text
1257 # become comparable to the uncompressed text
1252 if text is None:
1258 if text is None:
1253 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1259 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1254 cachedelta[1])
1260 cachedelta[1])
1255 else:
1261 else:
1256 textlen = len(text)
1262 textlen = len(text)
1257 if (d is None or dist > textlen * 2 or l > textlen or
1263 if (d is None or dist > textlen * 2 or l > textlen or
1258 (self._maxchainlen and chainlen > self._maxchainlen)):
1264 (self._maxchainlen and chainlen > self._maxchainlen)):
1259 text = buildtext()
1265 text = buildtext()
1260 data = self.compress(text)
1266 data = self.compress(text)
1261 l = len(data[1]) + len(data[0])
1267 l = len(data[1]) + len(data[0])
1262 base = chainbase = curr
1268 base = chainbase = curr
1263
1269
1264 e = (offset_type(offset, flags), l, textlen,
1270 e = (offset_type(offset, flags), l, textlen,
1265 base, link, p1r, p2r, node)
1271 base, link, p1r, p2r, node)
1266 self.index.insert(-1, e)
1272 self.index.insert(-1, e)
1267 self.nodemap[node] = curr
1273 self.nodemap[node] = curr
1268
1274
1269 entry = self._io.packentry(e, self.node, self.version, curr)
1275 entry = self._io.packentry(e, self.node, self.version, curr)
1270 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1276 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1271
1277
1272 if type(text) == str: # only accept immutable objects
1278 if type(text) == str: # only accept immutable objects
1273 self._cache = (node, curr, text)
1279 self._cache = (node, curr, text)
1274 self._basecache = (curr, chainbase)
1280 self._basecache = (curr, chainbase)
1275 return node
1281 return node
1276
1282
1277 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1283 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1278 curr = len(self) - 1
1284 curr = len(self) - 1
1279 if not self._inline:
1285 if not self._inline:
1280 transaction.add(self.datafile, offset)
1286 transaction.add(self.datafile, offset)
1281 transaction.add(self.indexfile, curr * len(entry))
1287 transaction.add(self.indexfile, curr * len(entry))
1282 if data[0]:
1288 if data[0]:
1283 dfh.write(data[0])
1289 dfh.write(data[0])
1284 dfh.write(data[1])
1290 dfh.write(data[1])
1285 dfh.flush()
1291 dfh.flush()
1286 ifh.write(entry)
1292 ifh.write(entry)
1287 else:
1293 else:
1288 offset += curr * self._io.size
1294 offset += curr * self._io.size
1289 transaction.add(self.indexfile, offset, curr)
1295 transaction.add(self.indexfile, offset, curr)
1290 ifh.write(entry)
1296 ifh.write(entry)
1291 ifh.write(data[0])
1297 ifh.write(data[0])
1292 ifh.write(data[1])
1298 ifh.write(data[1])
1293 self.checkinlinesize(transaction, ifh)
1299 self.checkinlinesize(transaction, ifh)
1294
1300
1295 def addgroup(self, bundle, linkmapper, transaction):
1301 def addgroup(self, bundle, linkmapper, transaction):
1296 """
1302 """
1297 add a delta group
1303 add a delta group
1298
1304
1299 given a set of deltas, add them to the revision log. the
1305 given a set of deltas, add them to the revision log. the
1300 first delta is against its parent, which should be in our
1306 first delta is against its parent, which should be in our
1301 log, the rest are against the previous delta.
1307 log, the rest are against the previous delta.
1302 """
1308 """
1303
1309
1304 # track the base of the current delta log
1310 # track the base of the current delta log
1305 content = []
1311 content = []
1306 node = None
1312 node = None
1307
1313
1308 r = len(self)
1314 r = len(self)
1309 end = 0
1315 end = 0
1310 if r:
1316 if r:
1311 end = self.end(r - 1)
1317 end = self.end(r - 1)
1312 ifh = self.opener(self.indexfile, "a+")
1318 ifh = self.opener(self.indexfile, "a+")
1313 isize = r * self._io.size
1319 isize = r * self._io.size
1314 if self._inline:
1320 if self._inline:
1315 transaction.add(self.indexfile, end + isize, r)
1321 transaction.add(self.indexfile, end + isize, r)
1316 dfh = None
1322 dfh = None
1317 else:
1323 else:
1318 transaction.add(self.indexfile, isize, r)
1324 transaction.add(self.indexfile, isize, r)
1319 transaction.add(self.datafile, end)
1325 transaction.add(self.datafile, end)
1320 dfh = self.opener(self.datafile, "a")
1326 dfh = self.opener(self.datafile, "a")
1321
1327
1322 try:
1328 try:
1323 # loop through our set of deltas
1329 # loop through our set of deltas
1324 chain = None
1330 chain = None
1325 while True:
1331 while True:
1326 chunkdata = bundle.deltachunk(chain)
1332 chunkdata = bundle.deltachunk(chain)
1327 if not chunkdata:
1333 if not chunkdata:
1328 break
1334 break
1329 node = chunkdata['node']
1335 node = chunkdata['node']
1330 p1 = chunkdata['p1']
1336 p1 = chunkdata['p1']
1331 p2 = chunkdata['p2']
1337 p2 = chunkdata['p2']
1332 cs = chunkdata['cs']
1338 cs = chunkdata['cs']
1333 deltabase = chunkdata['deltabase']
1339 deltabase = chunkdata['deltabase']
1334 delta = chunkdata['delta']
1340 delta = chunkdata['delta']
1335
1341
1336 content.append(node)
1342 content.append(node)
1337
1343
1338 link = linkmapper(cs)
1344 link = linkmapper(cs)
1339 if node in self.nodemap:
1345 if node in self.nodemap:
1340 # this can happen if two branches make the same change
1346 # this can happen if two branches make the same change
1341 chain = node
1347 chain = node
1342 continue
1348 continue
1343
1349
1344 for p in (p1, p2):
1350 for p in (p1, p2):
1345 if p not in self.nodemap:
1351 if p not in self.nodemap:
1346 raise LookupError(p, self.indexfile,
1352 raise LookupError(p, self.indexfile,
1347 _('unknown parent'))
1353 _('unknown parent'))
1348
1354
1349 if deltabase not in self.nodemap:
1355 if deltabase not in self.nodemap:
1350 raise LookupError(deltabase, self.indexfile,
1356 raise LookupError(deltabase, self.indexfile,
1351 _('unknown delta base'))
1357 _('unknown delta base'))
1352
1358
1353 baserev = self.rev(deltabase)
1359 baserev = self.rev(deltabase)
1354 chain = self._addrevision(node, None, transaction, link,
1360 chain = self._addrevision(node, None, transaction, link,
1355 p1, p2, (baserev, delta), ifh, dfh)
1361 p1, p2, (baserev, delta), ifh, dfh)
1356 if not dfh and not self._inline:
1362 if not dfh and not self._inline:
1357 # addrevision switched from inline to conventional
1363 # addrevision switched from inline to conventional
1358 # reopen the index
1364 # reopen the index
1359 ifh.close()
1365 ifh.close()
1360 dfh = self.opener(self.datafile, "a")
1366 dfh = self.opener(self.datafile, "a")
1361 ifh = self.opener(self.indexfile, "a")
1367 ifh = self.opener(self.indexfile, "a")
1362 finally:
1368 finally:
1363 if dfh:
1369 if dfh:
1364 dfh.close()
1370 dfh.close()
1365 ifh.close()
1371 ifh.close()
1366
1372
1367 return content
1373 return content
1368
1374
1369 def getstrippoint(self, minlink):
1375 def getstrippoint(self, minlink):
1370 """find the minimum rev that must be stripped to strip the linkrev
1376 """find the minimum rev that must be stripped to strip the linkrev
1371
1377
1372 Returns a tuple containing the minimum rev and a set of all revs that
1378 Returns a tuple containing the minimum rev and a set of all revs that
1373 have linkrevs that will be broken by this strip.
1379 have linkrevs that will be broken by this strip.
1374 """
1380 """
1375 brokenrevs = set()
1381 brokenrevs = set()
1376 strippoint = len(self)
1382 strippoint = len(self)
1377
1383
1378 heads = {}
1384 heads = {}
1379 futurelargelinkrevs = set()
1385 futurelargelinkrevs = set()
1380 for head in self.headrevs():
1386 for head in self.headrevs():
1381 headlinkrev = self.linkrev(head)
1387 headlinkrev = self.linkrev(head)
1382 heads[head] = headlinkrev
1388 heads[head] = headlinkrev
1383 if headlinkrev >= minlink:
1389 if headlinkrev >= minlink:
1384 futurelargelinkrevs.add(headlinkrev)
1390 futurelargelinkrevs.add(headlinkrev)
1385
1391
1386 # This algorithm involves walking down the rev graph, starting at the
1392 # This algorithm involves walking down the rev graph, starting at the
1387 # heads. Since the revs are topologically sorted according to linkrev,
1393 # heads. Since the revs are topologically sorted according to linkrev,
1388 # once all head linkrevs are below the minlink, we know there are
1394 # once all head linkrevs are below the minlink, we know there are
1389 # no more revs that could have a linkrev greater than minlink.
1395 # no more revs that could have a linkrev greater than minlink.
1390 # So we can stop walking.
1396 # So we can stop walking.
1391 while futurelargelinkrevs:
1397 while futurelargelinkrevs:
1392 strippoint -= 1
1398 strippoint -= 1
1393 linkrev = heads.pop(strippoint)
1399 linkrev = heads.pop(strippoint)
1394
1400
1395 if linkrev < minlink:
1401 if linkrev < minlink:
1396 brokenrevs.add(strippoint)
1402 brokenrevs.add(strippoint)
1397 else:
1403 else:
1398 futurelargelinkrevs.remove(linkrev)
1404 futurelargelinkrevs.remove(linkrev)
1399
1405
1400 for p in self.parentrevs(strippoint):
1406 for p in self.parentrevs(strippoint):
1401 if p != nullrev:
1407 if p != nullrev:
1402 plinkrev = self.linkrev(p)
1408 plinkrev = self.linkrev(p)
1403 heads[p] = plinkrev
1409 heads[p] = plinkrev
1404 if plinkrev >= minlink:
1410 if plinkrev >= minlink:
1405 futurelargelinkrevs.add(plinkrev)
1411 futurelargelinkrevs.add(plinkrev)
1406
1412
1407 return strippoint, brokenrevs
1413 return strippoint, brokenrevs
1408
1414
1409 def strip(self, minlink, transaction):
1415 def strip(self, minlink, transaction):
1410 """truncate the revlog on the first revision with a linkrev >= minlink
1416 """truncate the revlog on the first revision with a linkrev >= minlink
1411
1417
1412 This function is called when we're stripping revision minlink and
1418 This function is called when we're stripping revision minlink and
1413 its descendants from the repository.
1419 its descendants from the repository.
1414
1420
1415 We have to remove all revisions with linkrev >= minlink, because
1421 We have to remove all revisions with linkrev >= minlink, because
1416 the equivalent changelog revisions will be renumbered after the
1422 the equivalent changelog revisions will be renumbered after the
1417 strip.
1423 strip.
1418
1424
1419 So we truncate the revlog on the first of these revisions, and
1425 So we truncate the revlog on the first of these revisions, and
1420 trust that the caller has saved the revisions that shouldn't be
1426 trust that the caller has saved the revisions that shouldn't be
1421 removed and that it'll re-add them after this truncation.
1427 removed and that it'll re-add them after this truncation.
1422 """
1428 """
1423 if len(self) == 0:
1429 if len(self) == 0:
1424 return
1430 return
1425
1431
1426 rev, _ = self.getstrippoint(minlink)
1432 rev, _ = self.getstrippoint(minlink)
1427 if rev == len(self):
1433 if rev == len(self):
1428 return
1434 return
1429
1435
1430 # first truncate the files on disk
1436 # first truncate the files on disk
1431 end = self.start(rev)
1437 end = self.start(rev)
1432 if not self._inline:
1438 if not self._inline:
1433 transaction.add(self.datafile, end)
1439 transaction.add(self.datafile, end)
1434 end = rev * self._io.size
1440 end = rev * self._io.size
1435 else:
1441 else:
1436 end += rev * self._io.size
1442 end += rev * self._io.size
1437
1443
1438 transaction.add(self.indexfile, end)
1444 transaction.add(self.indexfile, end)
1439
1445
1440 # then reset internal state in memory to forget those revisions
1446 # then reset internal state in memory to forget those revisions
1441 self._cache = None
1447 self._cache = None
1442 self._chunkclear()
1448 self._chunkclear()
1443 for x in xrange(rev, len(self)):
1449 for x in xrange(rev, len(self)):
1444 del self.nodemap[self.node(x)]
1450 del self.nodemap[self.node(x)]
1445
1451
1446 del self.index[rev:-1]
1452 del self.index[rev:-1]
1447
1453
1448 def checksize(self):
1454 def checksize(self):
1449 expected = 0
1455 expected = 0
1450 if len(self):
1456 if len(self):
1451 expected = max(0, self.end(len(self) - 1))
1457 expected = max(0, self.end(len(self) - 1))
1452
1458
1453 try:
1459 try:
1454 f = self.opener(self.datafile)
1460 f = self.opener(self.datafile)
1455 f.seek(0, 2)
1461 f.seek(0, 2)
1456 actual = f.tell()
1462 actual = f.tell()
1457 f.close()
1463 f.close()
1458 dd = actual - expected
1464 dd = actual - expected
1459 except IOError, inst:
1465 except IOError, inst:
1460 if inst.errno != errno.ENOENT:
1466 if inst.errno != errno.ENOENT:
1461 raise
1467 raise
1462 dd = 0
1468 dd = 0
1463
1469
1464 try:
1470 try:
1465 f = self.opener(self.indexfile)
1471 f = self.opener(self.indexfile)
1466 f.seek(0, 2)
1472 f.seek(0, 2)
1467 actual = f.tell()
1473 actual = f.tell()
1468 f.close()
1474 f.close()
1469 s = self._io.size
1475 s = self._io.size
1470 i = max(0, actual // s)
1476 i = max(0, actual // s)
1471 di = actual - (i * s)
1477 di = actual - (i * s)
1472 if self._inline:
1478 if self._inline:
1473 databytes = 0
1479 databytes = 0
1474 for r in self:
1480 for r in self:
1475 databytes += max(0, self.length(r))
1481 databytes += max(0, self.length(r))
1476 dd = 0
1482 dd = 0
1477 di = actual - len(self) * s - databytes
1483 di = actual - len(self) * s - databytes
1478 except IOError, inst:
1484 except IOError, inst:
1479 if inst.errno != errno.ENOENT:
1485 if inst.errno != errno.ENOENT:
1480 raise
1486 raise
1481 di = 0
1487 di = 0
1482
1488
1483 return (dd, di)
1489 return (dd, di)
1484
1490
1485 def files(self):
1491 def files(self):
1486 res = [self.indexfile]
1492 res = [self.indexfile]
1487 if not self._inline:
1493 if not self._inline:
1488 res.append(self.datafile)
1494 res.append(self.datafile)
1489 return res
1495 return res
General Comments 0
You need to be logged in to leave comments. Login now