##// END OF EJS Templates
revlog: improve documentation...
Gregory Szorc -
r27070:7860366b default
parent child Browse files
Show More
@@ -1,1684 +1,1737 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 import collections
15 import collections
16 from node import bin, hex, nullid, nullrev
16 from node import bin, hex, nullid, nullrev
17 from i18n import _
17 from i18n import _
18 import ancestor, mdiff, parsers, error, util, templatefilters
18 import ancestor, mdiff, parsers, error, util, templatefilters
19 import struct, zlib, errno
19 import struct, zlib, errno
20
20
21 _pack = struct.pack
21 _pack = struct.pack
22 _unpack = struct.unpack
22 _unpack = struct.unpack
23 _compress = zlib.compress
23 _compress = zlib.compress
24 _decompress = zlib.decompress
24 _decompress = zlib.decompress
25 _sha = util.sha1
25 _sha = util.sha1
26
26
27 # revlog header flags
27 # revlog header flags
28 REVLOGV0 = 0
28 REVLOGV0 = 0
29 REVLOGNG = 1
29 REVLOGNG = 1
30 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGGENERALDELTA = (1 << 17)
31 REVLOGGENERALDELTA = (1 << 17)
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36
36
37 # revlog index flags
37 # revlog index flags
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_DEFAULT_FLAGS = 0
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41
41
42 # max size of revlog with inline data
42 # max size of revlog with inline data
43 _maxinline = 131072
43 _maxinline = 131072
44 _chunksize = 1048576
44 _chunksize = 1048576
45
45
46 RevlogError = error.RevlogError
46 RevlogError = error.RevlogError
47 LookupError = error.LookupError
47 LookupError = error.LookupError
48 CensoredNodeError = error.CensoredNodeError
48 CensoredNodeError = error.CensoredNodeError
49
49
50 def getoffset(q):
50 def getoffset(q):
51 return int(q >> 16)
51 return int(q >> 16)
52
52
53 def gettype(q):
53 def gettype(q):
54 return int(q & 0xFFFF)
54 return int(q & 0xFFFF)
55
55
56 def offset_type(offset, type):
56 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
57 return long(long(offset) << 16 | type)
58
58
59 _nullhash = _sha(nullid)
59 _nullhash = _sha(nullid)
60
60
61 def hash(text, p1, p2):
61 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
62 """generate a hash from the given text and its parent hashes
63
63
64 This hash combines both the current file contents and its history
64 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
65 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
66 content in the revision graph.
67 """
67 """
68 # As of now, if one of the parent node is null, p2 is null
68 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
69 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
70 # deep copy of a hash is faster than creating one
71 s = _nullhash.copy()
71 s = _nullhash.copy()
72 s.update(p1)
72 s.update(p1)
73 else:
73 else:
74 # none of the parent nodes are nullid
74 # none of the parent nodes are nullid
75 l = [p1, p2]
75 l = [p1, p2]
76 l.sort()
76 l.sort()
77 s = _sha(l[0])
77 s = _sha(l[0])
78 s.update(l[1])
78 s.update(l[1])
79 s.update(text)
79 s.update(text)
80 return s.digest()
80 return s.digest()
81
81
82 def decompress(bin):
82 def decompress(bin):
83 """ decompress the given input """
83 """ decompress the given input """
84 if not bin:
84 if not bin:
85 return bin
85 return bin
86 t = bin[0]
86 t = bin[0]
87 if t == '\0':
87 if t == '\0':
88 return bin
88 return bin
89 if t == 'x':
89 if t == 'x':
90 try:
90 try:
91 return _decompress(bin)
91 return _decompress(bin)
92 except zlib.error as e:
92 except zlib.error as e:
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 if t == 'u':
94 if t == 'u':
95 return bin[1:]
95 return bin[1:]
96 raise RevlogError(_("unknown compression type %r") % t)
96 raise RevlogError(_("unknown compression type %r") % t)
97
97
98 # index v0:
98 # index v0:
99 # 4 bytes: offset
99 # 4 bytes: offset
100 # 4 bytes: compressed length
100 # 4 bytes: compressed length
101 # 4 bytes: base rev
101 # 4 bytes: base rev
102 # 4 bytes: link rev
102 # 4 bytes: link rev
103 # 20 bytes: parent 1 nodeid
103 # 20 bytes: parent 1 nodeid
104 # 20 bytes: parent 2 nodeid
104 # 20 bytes: parent 2 nodeid
105 # 20 bytes: nodeid
105 # 20 bytes: nodeid
106 indexformatv0 = ">4l20s20s20s"
106 indexformatv0 = ">4l20s20s20s"
107
107
108 class revlogoldio(object):
108 class revlogoldio(object):
109 def __init__(self):
109 def __init__(self):
110 self.size = struct.calcsize(indexformatv0)
110 self.size = struct.calcsize(indexformatv0)
111
111
112 def parseindex(self, data, inline):
112 def parseindex(self, data, inline):
113 s = self.size
113 s = self.size
114 index = []
114 index = []
115 nodemap = {nullid: nullrev}
115 nodemap = {nullid: nullrev}
116 n = off = 0
116 n = off = 0
117 l = len(data)
117 l = len(data)
118 while off + s <= l:
118 while off + s <= l:
119 cur = data[off:off + s]
119 cur = data[off:off + s]
120 off += s
120 off += s
121 e = _unpack(indexformatv0, cur)
121 e = _unpack(indexformatv0, cur)
122 # transform to revlogv1 format
122 # transform to revlogv1 format
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 index.append(e2)
125 index.append(e2)
126 nodemap[e[6]] = n
126 nodemap[e[6]] = n
127 n += 1
127 n += 1
128
128
129 # add the magic null revision at -1
129 # add the magic null revision at -1
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131
131
132 return index, nodemap, None
132 return index, nodemap, None
133
133
134 def packentry(self, entry, node, version, rev):
134 def packentry(self, entry, node, version, rev):
135 if gettype(entry[0]):
135 if gettype(entry[0]):
136 raise RevlogError(_("index entry flags need RevlogNG"))
136 raise RevlogError(_("index entry flags need RevlogNG"))
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 node(entry[5]), node(entry[6]), entry[7])
138 node(entry[5]), node(entry[6]), entry[7])
139 return _pack(indexformatv0, *e2)
139 return _pack(indexformatv0, *e2)
140
140
141 # index ng:
141 # index ng:
142 # 6 bytes: offset
142 # 6 bytes: offset
143 # 2 bytes: flags
143 # 2 bytes: flags
144 # 4 bytes: compressed length
144 # 4 bytes: compressed length
145 # 4 bytes: uncompressed length
145 # 4 bytes: uncompressed length
146 # 4 bytes: base rev
146 # 4 bytes: base rev
147 # 4 bytes: link rev
147 # 4 bytes: link rev
148 # 4 bytes: parent 1 rev
148 # 4 bytes: parent 1 rev
149 # 4 bytes: parent 2 rev
149 # 4 bytes: parent 2 rev
150 # 32 bytes: nodeid
150 # 32 bytes: nodeid
151 indexformatng = ">Qiiiiii20s12x"
151 indexformatng = ">Qiiiiii20s12x"
152 versionformat = ">I"
152 versionformat = ">I"
153
153
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 # signed integer)
155 # signed integer)
156 _maxentrysize = 0x7fffffff
156 _maxentrysize = 0x7fffffff
157
157
158 class revlogio(object):
158 class revlogio(object):
159 def __init__(self):
159 def __init__(self):
160 self.size = struct.calcsize(indexformatng)
160 self.size = struct.calcsize(indexformatng)
161
161
162 def parseindex(self, data, inline):
162 def parseindex(self, data, inline):
163 # call the C implementation to parse the index data
163 # call the C implementation to parse the index data
164 index, cache = parsers.parse_index2(data, inline)
164 index, cache = parsers.parse_index2(data, inline)
165 return index, getattr(index, 'nodemap', None), cache
165 return index, getattr(index, 'nodemap', None), cache
166
166
167 def packentry(self, entry, node, version, rev):
167 def packentry(self, entry, node, version, rev):
168 p = _pack(indexformatng, *entry)
168 p = _pack(indexformatng, *entry)
169 if rev == 0:
169 if rev == 0:
170 p = _pack(versionformat, version) + p[4:]
170 p = _pack(versionformat, version) + p[4:]
171 return p
171 return p
172
172
173 class revlog(object):
173 class revlog(object):
174 """
174 """
175 the underlying revision storage object
175 the underlying revision storage object
176
176
177 A revlog consists of two parts, an index and the revision data.
177 A revlog consists of two parts, an index and the revision data.
178
178
179 The index is a file with a fixed record size containing
179 The index is a file with a fixed record size containing
180 information on each revision, including its nodeid (hash), the
180 information on each revision, including its nodeid (hash), the
181 nodeids of its parents, the position and offset of its data within
181 nodeids of its parents, the position and offset of its data within
182 the data file, and the revision it's based on. Finally, each entry
182 the data file, and the revision it's based on. Finally, each entry
183 contains a linkrev entry that can serve as a pointer to external
183 contains a linkrev entry that can serve as a pointer to external
184 data.
184 data.
185
185
186 The revision data itself is a linear collection of data chunks.
186 The revision data itself is a linear collection of data chunks.
187 Each chunk represents a revision and is usually represented as a
187 Each chunk represents a revision and is usually represented as a
188 delta against the previous chunk. To bound lookup time, runs of
188 delta against the previous chunk. To bound lookup time, runs of
189 deltas are limited to about 2 times the length of the original
189 deltas are limited to about 2 times the length of the original
190 version data. This makes retrieval of a version proportional to
190 version data. This makes retrieval of a version proportional to
191 its size, or O(1) relative to the number of revisions.
191 its size, or O(1) relative to the number of revisions.
192
192
193 Both pieces of the revlog are written to in an append-only
193 Both pieces of the revlog are written to in an append-only
194 fashion, which means we never need to rewrite a file to insert or
194 fashion, which means we never need to rewrite a file to insert or
195 remove data, and can use some simple techniques to avoid the need
195 remove data, and can use some simple techniques to avoid the need
196 for locking while reading.
196 for locking while reading.
197 """
197 """
198 def __init__(self, opener, indexfile):
198 def __init__(self, opener, indexfile):
199 """
199 """
200 create a revlog object
200 create a revlog object
201
201
202 opener is a function that abstracts the file opening operation
202 opener is a function that abstracts the file opening operation
203 and can be used to implement COW semantics or the like.
203 and can be used to implement COW semantics or the like.
204 """
204 """
205 self.indexfile = indexfile
205 self.indexfile = indexfile
206 self.datafile = indexfile[:-2] + ".d"
206 self.datafile = indexfile[:-2] + ".d"
207 self.opener = opener
207 self.opener = opener
208 # 3-tuple of (node, rev, text) for a raw revision.
208 self._cache = None
209 self._cache = None
210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
211 # begins at for a revision.
209 self._basecache = None
212 self._basecache = None
213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
210 self._chunkcache = (0, '')
214 self._chunkcache = (0, '')
215 # How much data to read and cache into the raw revlog data cache.
211 self._chunkcachesize = 65536
216 self._chunkcachesize = 65536
212 self._maxchainlen = None
217 self._maxchainlen = None
213 self._aggressivemergedeltas = False
218 self._aggressivemergedeltas = False
214 self.index = []
219 self.index = []
220 # Mapping of partial identifiers to full nodes.
215 self._pcache = {}
221 self._pcache = {}
222 # Mapping of revision integer to full node.
216 self._nodecache = {nullid: nullrev}
223 self._nodecache = {nullid: nullrev}
217 self._nodepos = None
224 self._nodepos = None
218
225
219 v = REVLOG_DEFAULT_VERSION
226 v = REVLOG_DEFAULT_VERSION
220 opts = getattr(opener, 'options', None)
227 opts = getattr(opener, 'options', None)
221 if opts is not None:
228 if opts is not None:
222 if 'revlogv1' in opts:
229 if 'revlogv1' in opts:
223 if 'generaldelta' in opts:
230 if 'generaldelta' in opts:
224 v |= REVLOGGENERALDELTA
231 v |= REVLOGGENERALDELTA
225 else:
232 else:
226 v = 0
233 v = 0
227 if 'chunkcachesize' in opts:
234 if 'chunkcachesize' in opts:
228 self._chunkcachesize = opts['chunkcachesize']
235 self._chunkcachesize = opts['chunkcachesize']
229 if 'maxchainlen' in opts:
236 if 'maxchainlen' in opts:
230 self._maxchainlen = opts['maxchainlen']
237 self._maxchainlen = opts['maxchainlen']
231 if 'aggressivemergedeltas' in opts:
238 if 'aggressivemergedeltas' in opts:
232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
239 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
240 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
234
241
235 if self._chunkcachesize <= 0:
242 if self._chunkcachesize <= 0:
236 raise RevlogError(_('revlog chunk cache size %r is not greater '
243 raise RevlogError(_('revlog chunk cache size %r is not greater '
237 'than 0') % self._chunkcachesize)
244 'than 0') % self._chunkcachesize)
238 elif self._chunkcachesize & (self._chunkcachesize - 1):
245 elif self._chunkcachesize & (self._chunkcachesize - 1):
239 raise RevlogError(_('revlog chunk cache size %r is not a power '
246 raise RevlogError(_('revlog chunk cache size %r is not a power '
240 'of 2') % self._chunkcachesize)
247 'of 2') % self._chunkcachesize)
241
248
242 indexdata = ''
249 indexdata = ''
243 self._initempty = True
250 self._initempty = True
244 try:
251 try:
245 f = self.opener(self.indexfile)
252 f = self.opener(self.indexfile)
246 indexdata = f.read()
253 indexdata = f.read()
247 f.close()
254 f.close()
248 if len(indexdata) > 0:
255 if len(indexdata) > 0:
249 v = struct.unpack(versionformat, indexdata[:4])[0]
256 v = struct.unpack(versionformat, indexdata[:4])[0]
250 self._initempty = False
257 self._initempty = False
251 except IOError as inst:
258 except IOError as inst:
252 if inst.errno != errno.ENOENT:
259 if inst.errno != errno.ENOENT:
253 raise
260 raise
254
261
255 self.version = v
262 self.version = v
256 self._inline = v & REVLOGNGINLINEDATA
263 self._inline = v & REVLOGNGINLINEDATA
257 self._generaldelta = v & REVLOGGENERALDELTA
264 self._generaldelta = v & REVLOGGENERALDELTA
258 flags = v & ~0xFFFF
265 flags = v & ~0xFFFF
259 fmt = v & 0xFFFF
266 fmt = v & 0xFFFF
260 if fmt == REVLOGV0 and flags:
267 if fmt == REVLOGV0 and flags:
261 raise RevlogError(_("index %s unknown flags %#04x for format v0")
268 raise RevlogError(_("index %s unknown flags %#04x for format v0")
262 % (self.indexfile, flags >> 16))
269 % (self.indexfile, flags >> 16))
263 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
270 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
264 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
271 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
265 % (self.indexfile, flags >> 16))
272 % (self.indexfile, flags >> 16))
266 elif fmt > REVLOGNG:
273 elif fmt > REVLOGNG:
267 raise RevlogError(_("index %s unknown format %d")
274 raise RevlogError(_("index %s unknown format %d")
268 % (self.indexfile, fmt))
275 % (self.indexfile, fmt))
269
276
270 self._io = revlogio()
277 self._io = revlogio()
271 if self.version == REVLOGV0:
278 if self.version == REVLOGV0:
272 self._io = revlogoldio()
279 self._io = revlogoldio()
273 try:
280 try:
274 d = self._io.parseindex(indexdata, self._inline)
281 d = self._io.parseindex(indexdata, self._inline)
275 except (ValueError, IndexError):
282 except (ValueError, IndexError):
276 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
283 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
277 self.index, nodemap, self._chunkcache = d
284 self.index, nodemap, self._chunkcache = d
278 if nodemap is not None:
285 if nodemap is not None:
279 self.nodemap = self._nodecache = nodemap
286 self.nodemap = self._nodecache = nodemap
280 if not self._chunkcache:
287 if not self._chunkcache:
281 self._chunkclear()
288 self._chunkclear()
282 # revnum -> (chain-length, sum-delta-length)
289 # revnum -> (chain-length, sum-delta-length)
283 self._chaininfocache = {}
290 self._chaininfocache = {}
284
291
285 def tip(self):
292 def tip(self):
286 return self.node(len(self.index) - 2)
293 return self.node(len(self.index) - 2)
287 def __contains__(self, rev):
294 def __contains__(self, rev):
288 return 0 <= rev < len(self)
295 return 0 <= rev < len(self)
289 def __len__(self):
296 def __len__(self):
290 return len(self.index) - 1
297 return len(self.index) - 1
291 def __iter__(self):
298 def __iter__(self):
292 return iter(xrange(len(self)))
299 return iter(xrange(len(self)))
293 def revs(self, start=0, stop=None):
300 def revs(self, start=0, stop=None):
294 """iterate over all rev in this revlog (from start to stop)"""
301 """iterate over all rev in this revlog (from start to stop)"""
295 step = 1
302 step = 1
296 if stop is not None:
303 if stop is not None:
297 if start > stop:
304 if start > stop:
298 step = -1
305 step = -1
299 stop += step
306 stop += step
300 else:
307 else:
301 stop = len(self)
308 stop = len(self)
302 return xrange(start, stop, step)
309 return xrange(start, stop, step)
303
310
304 @util.propertycache
311 @util.propertycache
305 def nodemap(self):
312 def nodemap(self):
306 self.rev(self.node(0))
313 self.rev(self.node(0))
307 return self._nodecache
314 return self._nodecache
308
315
309 def hasnode(self, node):
316 def hasnode(self, node):
310 try:
317 try:
311 self.rev(node)
318 self.rev(node)
312 return True
319 return True
313 except KeyError:
320 except KeyError:
314 return False
321 return False
315
322
316 def clearcaches(self):
323 def clearcaches(self):
317 try:
324 try:
318 self._nodecache.clearcaches()
325 self._nodecache.clearcaches()
319 except AttributeError:
326 except AttributeError:
320 self._nodecache = {nullid: nullrev}
327 self._nodecache = {nullid: nullrev}
321 self._nodepos = None
328 self._nodepos = None
322
329
323 def rev(self, node):
330 def rev(self, node):
324 try:
331 try:
325 return self._nodecache[node]
332 return self._nodecache[node]
326 except TypeError:
333 except TypeError:
327 raise
334 raise
328 except RevlogError:
335 except RevlogError:
329 # parsers.c radix tree lookup failed
336 # parsers.c radix tree lookup failed
330 raise LookupError(node, self.indexfile, _('no node'))
337 raise LookupError(node, self.indexfile, _('no node'))
331 except KeyError:
338 except KeyError:
332 # pure python cache lookup failed
339 # pure python cache lookup failed
333 n = self._nodecache
340 n = self._nodecache
334 i = self.index
341 i = self.index
335 p = self._nodepos
342 p = self._nodepos
336 if p is None:
343 if p is None:
337 p = len(i) - 2
344 p = len(i) - 2
338 for r in xrange(p, -1, -1):
345 for r in xrange(p, -1, -1):
339 v = i[r][7]
346 v = i[r][7]
340 n[v] = r
347 n[v] = r
341 if v == node:
348 if v == node:
342 self._nodepos = r - 1
349 self._nodepos = r - 1
343 return r
350 return r
344 raise LookupError(node, self.indexfile, _('no node'))
351 raise LookupError(node, self.indexfile, _('no node'))
345
352
346 def node(self, rev):
353 def node(self, rev):
347 return self.index[rev][7]
354 return self.index[rev][7]
348 def linkrev(self, rev):
355 def linkrev(self, rev):
349 return self.index[rev][4]
356 return self.index[rev][4]
350 def parents(self, node):
357 def parents(self, node):
351 i = self.index
358 i = self.index
352 d = i[self.rev(node)]
359 d = i[self.rev(node)]
353 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
360 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
354 def parentrevs(self, rev):
361 def parentrevs(self, rev):
355 return self.index[rev][5:7]
362 return self.index[rev][5:7]
356 def start(self, rev):
363 def start(self, rev):
357 return int(self.index[rev][0] >> 16)
364 return int(self.index[rev][0] >> 16)
358 def end(self, rev):
365 def end(self, rev):
359 return self.start(rev) + self.length(rev)
366 return self.start(rev) + self.length(rev)
360 def length(self, rev):
367 def length(self, rev):
361 return self.index[rev][1]
368 return self.index[rev][1]
362 def chainbase(self, rev):
369 def chainbase(self, rev):
363 index = self.index
370 index = self.index
364 base = index[rev][3]
371 base = index[rev][3]
365 while base != rev:
372 while base != rev:
366 rev = base
373 rev = base
367 base = index[rev][3]
374 base = index[rev][3]
368 return base
375 return base
369 def chainlen(self, rev):
376 def chainlen(self, rev):
370 return self._chaininfo(rev)[0]
377 return self._chaininfo(rev)[0]
371
378
372 def _chaininfo(self, rev):
379 def _chaininfo(self, rev):
373 chaininfocache = self._chaininfocache
380 chaininfocache = self._chaininfocache
374 if rev in chaininfocache:
381 if rev in chaininfocache:
375 return chaininfocache[rev]
382 return chaininfocache[rev]
376 index = self.index
383 index = self.index
377 generaldelta = self._generaldelta
384 generaldelta = self._generaldelta
378 iterrev = rev
385 iterrev = rev
379 e = index[iterrev]
386 e = index[iterrev]
380 clen = 0
387 clen = 0
381 compresseddeltalen = 0
388 compresseddeltalen = 0
382 while iterrev != e[3]:
389 while iterrev != e[3]:
383 clen += 1
390 clen += 1
384 compresseddeltalen += e[1]
391 compresseddeltalen += e[1]
385 if generaldelta:
392 if generaldelta:
386 iterrev = e[3]
393 iterrev = e[3]
387 else:
394 else:
388 iterrev -= 1
395 iterrev -= 1
389 if iterrev in chaininfocache:
396 if iterrev in chaininfocache:
390 t = chaininfocache[iterrev]
397 t = chaininfocache[iterrev]
391 clen += t[0]
398 clen += t[0]
392 compresseddeltalen += t[1]
399 compresseddeltalen += t[1]
393 break
400 break
394 e = index[iterrev]
401 e = index[iterrev]
395 else:
402 else:
396 # Add text length of base since decompressing that also takes
403 # Add text length of base since decompressing that also takes
397 # work. For cache hits the length is already included.
404 # work. For cache hits the length is already included.
398 compresseddeltalen += e[1]
405 compresseddeltalen += e[1]
399 r = (clen, compresseddeltalen)
406 r = (clen, compresseddeltalen)
400 chaininfocache[rev] = r
407 chaininfocache[rev] = r
401 return r
408 return r
402
409
403 def flags(self, rev):
410 def flags(self, rev):
404 return self.index[rev][0] & 0xFFFF
411 return self.index[rev][0] & 0xFFFF
405 def rawsize(self, rev):
412 def rawsize(self, rev):
406 """return the length of the uncompressed text for a given revision"""
413 """return the length of the uncompressed text for a given revision"""
407 l = self.index[rev][2]
414 l = self.index[rev][2]
408 if l >= 0:
415 if l >= 0:
409 return l
416 return l
410
417
411 t = self.revision(self.node(rev))
418 t = self.revision(self.node(rev))
412 return len(t)
419 return len(t)
413 size = rawsize
420 size = rawsize
414
421
415 def ancestors(self, revs, stoprev=0, inclusive=False):
422 def ancestors(self, revs, stoprev=0, inclusive=False):
416 """Generate the ancestors of 'revs' in reverse topological order.
423 """Generate the ancestors of 'revs' in reverse topological order.
417 Does not generate revs lower than stoprev.
424 Does not generate revs lower than stoprev.
418
425
419 See the documentation for ancestor.lazyancestors for more details."""
426 See the documentation for ancestor.lazyancestors for more details."""
420
427
421 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
428 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
422 inclusive=inclusive)
429 inclusive=inclusive)
423
430
424 def descendants(self, revs):
431 def descendants(self, revs):
425 """Generate the descendants of 'revs' in revision order.
432 """Generate the descendants of 'revs' in revision order.
426
433
427 Yield a sequence of revision numbers starting with a child of
434 Yield a sequence of revision numbers starting with a child of
428 some rev in revs, i.e., each revision is *not* considered a
435 some rev in revs, i.e., each revision is *not* considered a
429 descendant of itself. Results are ordered by revision number (a
436 descendant of itself. Results are ordered by revision number (a
430 topological sort)."""
437 topological sort)."""
431 first = min(revs)
438 first = min(revs)
432 if first == nullrev:
439 if first == nullrev:
433 for i in self:
440 for i in self:
434 yield i
441 yield i
435 return
442 return
436
443
437 seen = set(revs)
444 seen = set(revs)
438 for i in self.revs(start=first + 1):
445 for i in self.revs(start=first + 1):
439 for x in self.parentrevs(i):
446 for x in self.parentrevs(i):
440 if x != nullrev and x in seen:
447 if x != nullrev and x in seen:
441 seen.add(i)
448 seen.add(i)
442 yield i
449 yield i
443 break
450 break
444
451
445 def findcommonmissing(self, common=None, heads=None):
452 def findcommonmissing(self, common=None, heads=None):
446 """Return a tuple of the ancestors of common and the ancestors of heads
453 """Return a tuple of the ancestors of common and the ancestors of heads
447 that are not ancestors of common. In revset terminology, we return the
454 that are not ancestors of common. In revset terminology, we return the
448 tuple:
455 tuple:
449
456
450 ::common, (::heads) - (::common)
457 ::common, (::heads) - (::common)
451
458
452 The list is sorted by revision number, meaning it is
459 The list is sorted by revision number, meaning it is
453 topologically sorted.
460 topologically sorted.
454
461
455 'heads' and 'common' are both lists of node IDs. If heads is
462 'heads' and 'common' are both lists of node IDs. If heads is
456 not supplied, uses all of the revlog's heads. If common is not
463 not supplied, uses all of the revlog's heads. If common is not
457 supplied, uses nullid."""
464 supplied, uses nullid."""
458 if common is None:
465 if common is None:
459 common = [nullid]
466 common = [nullid]
460 if heads is None:
467 if heads is None:
461 heads = self.heads()
468 heads = self.heads()
462
469
463 common = [self.rev(n) for n in common]
470 common = [self.rev(n) for n in common]
464 heads = [self.rev(n) for n in heads]
471 heads = [self.rev(n) for n in heads]
465
472
466 # we want the ancestors, but inclusive
473 # we want the ancestors, but inclusive
467 class lazyset(object):
474 class lazyset(object):
468 def __init__(self, lazyvalues):
475 def __init__(self, lazyvalues):
469 self.addedvalues = set()
476 self.addedvalues = set()
470 self.lazyvalues = lazyvalues
477 self.lazyvalues = lazyvalues
471
478
472 def __contains__(self, value):
479 def __contains__(self, value):
473 return value in self.addedvalues or value in self.lazyvalues
480 return value in self.addedvalues or value in self.lazyvalues
474
481
475 def __iter__(self):
482 def __iter__(self):
476 added = self.addedvalues
483 added = self.addedvalues
477 for r in added:
484 for r in added:
478 yield r
485 yield r
479 for r in self.lazyvalues:
486 for r in self.lazyvalues:
480 if not r in added:
487 if not r in added:
481 yield r
488 yield r
482
489
483 def add(self, value):
490 def add(self, value):
484 self.addedvalues.add(value)
491 self.addedvalues.add(value)
485
492
486 def update(self, values):
493 def update(self, values):
487 self.addedvalues.update(values)
494 self.addedvalues.update(values)
488
495
489 has = lazyset(self.ancestors(common))
496 has = lazyset(self.ancestors(common))
490 has.add(nullrev)
497 has.add(nullrev)
491 has.update(common)
498 has.update(common)
492
499
493 # take all ancestors from heads that aren't in has
500 # take all ancestors from heads that aren't in has
494 missing = set()
501 missing = set()
495 visit = collections.deque(r for r in heads if r not in has)
502 visit = collections.deque(r for r in heads if r not in has)
496 while visit:
503 while visit:
497 r = visit.popleft()
504 r = visit.popleft()
498 if r in missing:
505 if r in missing:
499 continue
506 continue
500 else:
507 else:
501 missing.add(r)
508 missing.add(r)
502 for p in self.parentrevs(r):
509 for p in self.parentrevs(r):
503 if p not in has:
510 if p not in has:
504 visit.append(p)
511 visit.append(p)
505 missing = list(missing)
512 missing = list(missing)
506 missing.sort()
513 missing.sort()
507 return has, [self.node(r) for r in missing]
514 return has, [self.node(r) for r in missing]
508
515
509 def incrementalmissingrevs(self, common=None):
516 def incrementalmissingrevs(self, common=None):
510 """Return an object that can be used to incrementally compute the
517 """Return an object that can be used to incrementally compute the
511 revision numbers of the ancestors of arbitrary sets that are not
518 revision numbers of the ancestors of arbitrary sets that are not
512 ancestors of common. This is an ancestor.incrementalmissingancestors
519 ancestors of common. This is an ancestor.incrementalmissingancestors
513 object.
520 object.
514
521
515 'common' is a list of revision numbers. If common is not supplied, uses
522 'common' is a list of revision numbers. If common is not supplied, uses
516 nullrev.
523 nullrev.
517 """
524 """
518 if common is None:
525 if common is None:
519 common = [nullrev]
526 common = [nullrev]
520
527
521 return ancestor.incrementalmissingancestors(self.parentrevs, common)
528 return ancestor.incrementalmissingancestors(self.parentrevs, common)
522
529
523 def findmissingrevs(self, common=None, heads=None):
530 def findmissingrevs(self, common=None, heads=None):
524 """Return the revision numbers of the ancestors of heads that
531 """Return the revision numbers of the ancestors of heads that
525 are not ancestors of common.
532 are not ancestors of common.
526
533
527 More specifically, return a list of revision numbers corresponding to
534 More specifically, return a list of revision numbers corresponding to
528 nodes N such that every N satisfies the following constraints:
535 nodes N such that every N satisfies the following constraints:
529
536
530 1. N is an ancestor of some node in 'heads'
537 1. N is an ancestor of some node in 'heads'
531 2. N is not an ancestor of any node in 'common'
538 2. N is not an ancestor of any node in 'common'
532
539
533 The list is sorted by revision number, meaning it is
540 The list is sorted by revision number, meaning it is
534 topologically sorted.
541 topologically sorted.
535
542
536 'heads' and 'common' are both lists of revision numbers. If heads is
543 'heads' and 'common' are both lists of revision numbers. If heads is
537 not supplied, uses all of the revlog's heads. If common is not
544 not supplied, uses all of the revlog's heads. If common is not
538 supplied, uses nullid."""
545 supplied, uses nullid."""
539 if common is None:
546 if common is None:
540 common = [nullrev]
547 common = [nullrev]
541 if heads is None:
548 if heads is None:
542 heads = self.headrevs()
549 heads = self.headrevs()
543
550
544 inc = self.incrementalmissingrevs(common=common)
551 inc = self.incrementalmissingrevs(common=common)
545 return inc.missingancestors(heads)
552 return inc.missingancestors(heads)
546
553
547 def findmissing(self, common=None, heads=None):
554 def findmissing(self, common=None, heads=None):
548 """Return the ancestors of heads that are not ancestors of common.
555 """Return the ancestors of heads that are not ancestors of common.
549
556
550 More specifically, return a list of nodes N such that every N
557 More specifically, return a list of nodes N such that every N
551 satisfies the following constraints:
558 satisfies the following constraints:
552
559
553 1. N is an ancestor of some node in 'heads'
560 1. N is an ancestor of some node in 'heads'
554 2. N is not an ancestor of any node in 'common'
561 2. N is not an ancestor of any node in 'common'
555
562
556 The list is sorted by revision number, meaning it is
563 The list is sorted by revision number, meaning it is
557 topologically sorted.
564 topologically sorted.
558
565
559 'heads' and 'common' are both lists of node IDs. If heads is
566 'heads' and 'common' are both lists of node IDs. If heads is
560 not supplied, uses all of the revlog's heads. If common is not
567 not supplied, uses all of the revlog's heads. If common is not
561 supplied, uses nullid."""
568 supplied, uses nullid."""
562 if common is None:
569 if common is None:
563 common = [nullid]
570 common = [nullid]
564 if heads is None:
571 if heads is None:
565 heads = self.heads()
572 heads = self.heads()
566
573
567 common = [self.rev(n) for n in common]
574 common = [self.rev(n) for n in common]
568 heads = [self.rev(n) for n in heads]
575 heads = [self.rev(n) for n in heads]
569
576
570 inc = self.incrementalmissingrevs(common=common)
577 inc = self.incrementalmissingrevs(common=common)
571 return [self.node(r) for r in inc.missingancestors(heads)]
578 return [self.node(r) for r in inc.missingancestors(heads)]
572
579
573 def nodesbetween(self, roots=None, heads=None):
580 def nodesbetween(self, roots=None, heads=None):
574 """Return a topological path from 'roots' to 'heads'.
581 """Return a topological path from 'roots' to 'heads'.
575
582
576 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
583 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
577 topologically sorted list of all nodes N that satisfy both of
584 topologically sorted list of all nodes N that satisfy both of
578 these constraints:
585 these constraints:
579
586
580 1. N is a descendant of some node in 'roots'
587 1. N is a descendant of some node in 'roots'
581 2. N is an ancestor of some node in 'heads'
588 2. N is an ancestor of some node in 'heads'
582
589
583 Every node is considered to be both a descendant and an ancestor
590 Every node is considered to be both a descendant and an ancestor
584 of itself, so every reachable node in 'roots' and 'heads' will be
591 of itself, so every reachable node in 'roots' and 'heads' will be
585 included in 'nodes'.
592 included in 'nodes'.
586
593
587 'outroots' is the list of reachable nodes in 'roots', i.e., the
594 'outroots' is the list of reachable nodes in 'roots', i.e., the
588 subset of 'roots' that is returned in 'nodes'. Likewise,
595 subset of 'roots' that is returned in 'nodes'. Likewise,
589 'outheads' is the subset of 'heads' that is also in 'nodes'.
596 'outheads' is the subset of 'heads' that is also in 'nodes'.
590
597
591 'roots' and 'heads' are both lists of node IDs. If 'roots' is
598 'roots' and 'heads' are both lists of node IDs. If 'roots' is
592 unspecified, uses nullid as the only root. If 'heads' is
599 unspecified, uses nullid as the only root. If 'heads' is
593 unspecified, uses list of all of the revlog's heads."""
600 unspecified, uses list of all of the revlog's heads."""
594 nonodes = ([], [], [])
601 nonodes = ([], [], [])
595 if roots is not None:
602 if roots is not None:
596 roots = list(roots)
603 roots = list(roots)
597 if not roots:
604 if not roots:
598 return nonodes
605 return nonodes
599 lowestrev = min([self.rev(n) for n in roots])
606 lowestrev = min([self.rev(n) for n in roots])
600 else:
607 else:
601 roots = [nullid] # Everybody's a descendant of nullid
608 roots = [nullid] # Everybody's a descendant of nullid
602 lowestrev = nullrev
609 lowestrev = nullrev
603 if (lowestrev == nullrev) and (heads is None):
610 if (lowestrev == nullrev) and (heads is None):
604 # We want _all_ the nodes!
611 # We want _all_ the nodes!
605 return ([self.node(r) for r in self], [nullid], list(self.heads()))
612 return ([self.node(r) for r in self], [nullid], list(self.heads()))
606 if heads is None:
613 if heads is None:
607 # All nodes are ancestors, so the latest ancestor is the last
614 # All nodes are ancestors, so the latest ancestor is the last
608 # node.
615 # node.
609 highestrev = len(self) - 1
616 highestrev = len(self) - 1
610 # Set ancestors to None to signal that every node is an ancestor.
617 # Set ancestors to None to signal that every node is an ancestor.
611 ancestors = None
618 ancestors = None
612 # Set heads to an empty dictionary for later discovery of heads
619 # Set heads to an empty dictionary for later discovery of heads
613 heads = {}
620 heads = {}
614 else:
621 else:
615 heads = list(heads)
622 heads = list(heads)
616 if not heads:
623 if not heads:
617 return nonodes
624 return nonodes
618 ancestors = set()
625 ancestors = set()
619 # Turn heads into a dictionary so we can remove 'fake' heads.
626 # Turn heads into a dictionary so we can remove 'fake' heads.
620 # Also, later we will be using it to filter out the heads we can't
627 # Also, later we will be using it to filter out the heads we can't
621 # find from roots.
628 # find from roots.
622 heads = dict.fromkeys(heads, False)
629 heads = dict.fromkeys(heads, False)
623 # Start at the top and keep marking parents until we're done.
630 # Start at the top and keep marking parents until we're done.
624 nodestotag = set(heads)
631 nodestotag = set(heads)
625 # Remember where the top was so we can use it as a limit later.
632 # Remember where the top was so we can use it as a limit later.
626 highestrev = max([self.rev(n) for n in nodestotag])
633 highestrev = max([self.rev(n) for n in nodestotag])
627 while nodestotag:
634 while nodestotag:
628 # grab a node to tag
635 # grab a node to tag
629 n = nodestotag.pop()
636 n = nodestotag.pop()
630 # Never tag nullid
637 # Never tag nullid
631 if n == nullid:
638 if n == nullid:
632 continue
639 continue
633 # A node's revision number represents its place in a
640 # A node's revision number represents its place in a
634 # topologically sorted list of nodes.
641 # topologically sorted list of nodes.
635 r = self.rev(n)
642 r = self.rev(n)
636 if r >= lowestrev:
643 if r >= lowestrev:
637 if n not in ancestors:
644 if n not in ancestors:
638 # If we are possibly a descendant of one of the roots
645 # If we are possibly a descendant of one of the roots
639 # and we haven't already been marked as an ancestor
646 # and we haven't already been marked as an ancestor
640 ancestors.add(n) # Mark as ancestor
647 ancestors.add(n) # Mark as ancestor
641 # Add non-nullid parents to list of nodes to tag.
648 # Add non-nullid parents to list of nodes to tag.
642 nodestotag.update([p for p in self.parents(n) if
649 nodestotag.update([p for p in self.parents(n) if
643 p != nullid])
650 p != nullid])
644 elif n in heads: # We've seen it before, is it a fake head?
651 elif n in heads: # We've seen it before, is it a fake head?
645 # So it is, real heads should not be the ancestors of
652 # So it is, real heads should not be the ancestors of
646 # any other heads.
653 # any other heads.
647 heads.pop(n)
654 heads.pop(n)
648 if not ancestors:
655 if not ancestors:
649 return nonodes
656 return nonodes
650 # Now that we have our set of ancestors, we want to remove any
657 # Now that we have our set of ancestors, we want to remove any
651 # roots that are not ancestors.
658 # roots that are not ancestors.
652
659
653 # If one of the roots was nullid, everything is included anyway.
660 # If one of the roots was nullid, everything is included anyway.
654 if lowestrev > nullrev:
661 if lowestrev > nullrev:
655 # But, since we weren't, let's recompute the lowest rev to not
662 # But, since we weren't, let's recompute the lowest rev to not
656 # include roots that aren't ancestors.
663 # include roots that aren't ancestors.
657
664
658 # Filter out roots that aren't ancestors of heads
665 # Filter out roots that aren't ancestors of heads
659 roots = [n for n in roots if n in ancestors]
666 roots = [n for n in roots if n in ancestors]
660 # Recompute the lowest revision
667 # Recompute the lowest revision
661 if roots:
668 if roots:
662 lowestrev = min([self.rev(n) for n in roots])
669 lowestrev = min([self.rev(n) for n in roots])
663 else:
670 else:
664 # No more roots? Return empty list
671 # No more roots? Return empty list
665 return nonodes
672 return nonodes
666 else:
673 else:
667 # We are descending from nullid, and don't need to care about
674 # We are descending from nullid, and don't need to care about
668 # any other roots.
675 # any other roots.
669 lowestrev = nullrev
676 lowestrev = nullrev
670 roots = [nullid]
677 roots = [nullid]
671 # Transform our roots list into a set.
678 # Transform our roots list into a set.
672 descendants = set(roots)
679 descendants = set(roots)
673 # Also, keep the original roots so we can filter out roots that aren't
680 # Also, keep the original roots so we can filter out roots that aren't
674 # 'real' roots (i.e. are descended from other roots).
681 # 'real' roots (i.e. are descended from other roots).
675 roots = descendants.copy()
682 roots = descendants.copy()
676 # Our topologically sorted list of output nodes.
683 # Our topologically sorted list of output nodes.
677 orderedout = []
684 orderedout = []
678 # Don't start at nullid since we don't want nullid in our output list,
685 # Don't start at nullid since we don't want nullid in our output list,
679 # and if nullid shows up in descendants, empty parents will look like
686 # and if nullid shows up in descendants, empty parents will look like
680 # they're descendants.
687 # they're descendants.
681 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
688 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
682 n = self.node(r)
689 n = self.node(r)
683 isdescendant = False
690 isdescendant = False
684 if lowestrev == nullrev: # Everybody is a descendant of nullid
691 if lowestrev == nullrev: # Everybody is a descendant of nullid
685 isdescendant = True
692 isdescendant = True
686 elif n in descendants:
693 elif n in descendants:
687 # n is already a descendant
694 # n is already a descendant
688 isdescendant = True
695 isdescendant = True
689 # This check only needs to be done here because all the roots
696 # This check only needs to be done here because all the roots
690 # will start being marked is descendants before the loop.
697 # will start being marked is descendants before the loop.
691 if n in roots:
698 if n in roots:
692 # If n was a root, check if it's a 'real' root.
699 # If n was a root, check if it's a 'real' root.
693 p = tuple(self.parents(n))
700 p = tuple(self.parents(n))
694 # If any of its parents are descendants, it's not a root.
701 # If any of its parents are descendants, it's not a root.
695 if (p[0] in descendants) or (p[1] in descendants):
702 if (p[0] in descendants) or (p[1] in descendants):
696 roots.remove(n)
703 roots.remove(n)
697 else:
704 else:
698 p = tuple(self.parents(n))
705 p = tuple(self.parents(n))
699 # A node is a descendant if either of its parents are
706 # A node is a descendant if either of its parents are
700 # descendants. (We seeded the dependents list with the roots
707 # descendants. (We seeded the dependents list with the roots
701 # up there, remember?)
708 # up there, remember?)
702 if (p[0] in descendants) or (p[1] in descendants):
709 if (p[0] in descendants) or (p[1] in descendants):
703 descendants.add(n)
710 descendants.add(n)
704 isdescendant = True
711 isdescendant = True
705 if isdescendant and ((ancestors is None) or (n in ancestors)):
712 if isdescendant and ((ancestors is None) or (n in ancestors)):
706 # Only include nodes that are both descendants and ancestors.
713 # Only include nodes that are both descendants and ancestors.
707 orderedout.append(n)
714 orderedout.append(n)
708 if (ancestors is not None) and (n in heads):
715 if (ancestors is not None) and (n in heads):
709 # We're trying to figure out which heads are reachable
716 # We're trying to figure out which heads are reachable
710 # from roots.
717 # from roots.
711 # Mark this head as having been reached
718 # Mark this head as having been reached
712 heads[n] = True
719 heads[n] = True
713 elif ancestors is None:
720 elif ancestors is None:
714 # Otherwise, we're trying to discover the heads.
721 # Otherwise, we're trying to discover the heads.
715 # Assume this is a head because if it isn't, the next step
722 # Assume this is a head because if it isn't, the next step
716 # will eventually remove it.
723 # will eventually remove it.
717 heads[n] = True
724 heads[n] = True
718 # But, obviously its parents aren't.
725 # But, obviously its parents aren't.
719 for p in self.parents(n):
726 for p in self.parents(n):
720 heads.pop(p, None)
727 heads.pop(p, None)
721 heads = [n for n, flag in heads.iteritems() if flag]
728 heads = [n for n, flag in heads.iteritems() if flag]
722 roots = list(roots)
729 roots = list(roots)
723 assert orderedout
730 assert orderedout
724 assert roots
731 assert roots
725 assert heads
732 assert heads
726 return (orderedout, roots, heads)
733 return (orderedout, roots, heads)
727
734
728 def headrevs(self):
735 def headrevs(self):
729 try:
736 try:
730 return self.index.headrevs()
737 return self.index.headrevs()
731 except AttributeError:
738 except AttributeError:
732 return self._headrevs()
739 return self._headrevs()
733
740
734 def computephases(self, roots):
741 def computephases(self, roots):
735 return self.index.computephasesmapsets(roots)
742 return self.index.computephasesmapsets(roots)
736
743
737 def _headrevs(self):
744 def _headrevs(self):
738 count = len(self)
745 count = len(self)
739 if not count:
746 if not count:
740 return [nullrev]
747 return [nullrev]
741 # we won't iter over filtered rev so nobody is a head at start
748 # we won't iter over filtered rev so nobody is a head at start
742 ishead = [0] * (count + 1)
749 ishead = [0] * (count + 1)
743 index = self.index
750 index = self.index
744 for r in self:
751 for r in self:
745 ishead[r] = 1 # I may be an head
752 ishead[r] = 1 # I may be an head
746 e = index[r]
753 e = index[r]
747 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
754 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
748 return [r for r, val in enumerate(ishead) if val]
755 return [r for r, val in enumerate(ishead) if val]
749
756
750 def heads(self, start=None, stop=None):
757 def heads(self, start=None, stop=None):
751 """return the list of all nodes that have no children
758 """return the list of all nodes that have no children
752
759
753 if start is specified, only heads that are descendants of
760 if start is specified, only heads that are descendants of
754 start will be returned
761 start will be returned
755 if stop is specified, it will consider all the revs from stop
762 if stop is specified, it will consider all the revs from stop
756 as if they had no children
763 as if they had no children
757 """
764 """
758 if start is None and stop is None:
765 if start is None and stop is None:
759 if not len(self):
766 if not len(self):
760 return [nullid]
767 return [nullid]
761 return [self.node(r) for r in self.headrevs()]
768 return [self.node(r) for r in self.headrevs()]
762
769
763 if start is None:
770 if start is None:
764 start = nullid
771 start = nullid
765 if stop is None:
772 if stop is None:
766 stop = []
773 stop = []
767 stoprevs = set([self.rev(n) for n in stop])
774 stoprevs = set([self.rev(n) for n in stop])
768 startrev = self.rev(start)
775 startrev = self.rev(start)
769 reachable = set((startrev,))
776 reachable = set((startrev,))
770 heads = set((startrev,))
777 heads = set((startrev,))
771
778
772 parentrevs = self.parentrevs
779 parentrevs = self.parentrevs
773 for r in self.revs(start=startrev + 1):
780 for r in self.revs(start=startrev + 1):
774 for p in parentrevs(r):
781 for p in parentrevs(r):
775 if p in reachable:
782 if p in reachable:
776 if r not in stoprevs:
783 if r not in stoprevs:
777 reachable.add(r)
784 reachable.add(r)
778 heads.add(r)
785 heads.add(r)
779 if p in heads and p not in stoprevs:
786 if p in heads and p not in stoprevs:
780 heads.remove(p)
787 heads.remove(p)
781
788
782 return [self.node(r) for r in heads]
789 return [self.node(r) for r in heads]
783
790
784 def children(self, node):
791 def children(self, node):
785 """find the children of a given node"""
792 """find the children of a given node"""
786 c = []
793 c = []
787 p = self.rev(node)
794 p = self.rev(node)
788 for r in self.revs(start=p + 1):
795 for r in self.revs(start=p + 1):
789 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
796 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
790 if prevs:
797 if prevs:
791 for pr in prevs:
798 for pr in prevs:
792 if pr == p:
799 if pr == p:
793 c.append(self.node(r))
800 c.append(self.node(r))
794 elif p == nullrev:
801 elif p == nullrev:
795 c.append(self.node(r))
802 c.append(self.node(r))
796 return c
803 return c
797
804
798 def descendant(self, start, end):
805 def descendant(self, start, end):
799 if start == nullrev:
806 if start == nullrev:
800 return True
807 return True
801 for i in self.descendants([start]):
808 for i in self.descendants([start]):
802 if i == end:
809 if i == end:
803 return True
810 return True
804 elif i > end:
811 elif i > end:
805 break
812 break
806 return False
813 return False
807
814
808 def commonancestorsheads(self, a, b):
815 def commonancestorsheads(self, a, b):
809 """calculate all the heads of the common ancestors of nodes a and b"""
816 """calculate all the heads of the common ancestors of nodes a and b"""
810 a, b = self.rev(a), self.rev(b)
817 a, b = self.rev(a), self.rev(b)
811 try:
818 try:
812 ancs = self.index.commonancestorsheads(a, b)
819 ancs = self.index.commonancestorsheads(a, b)
813 except (AttributeError, OverflowError): # C implementation failed
820 except (AttributeError, OverflowError): # C implementation failed
814 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
821 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
815 return map(self.node, ancs)
822 return map(self.node, ancs)
816
823
817 def isancestor(self, a, b):
824 def isancestor(self, a, b):
818 """return True if node a is an ancestor of node b
825 """return True if node a is an ancestor of node b
819
826
820 The implementation of this is trivial but the use of
827 The implementation of this is trivial but the use of
821 commonancestorsheads is not."""
828 commonancestorsheads is not."""
822 return a in self.commonancestorsheads(a, b)
829 return a in self.commonancestorsheads(a, b)
823
830
824 def ancestor(self, a, b):
831 def ancestor(self, a, b):
825 """calculate the "best" common ancestor of nodes a and b"""
832 """calculate the "best" common ancestor of nodes a and b"""
826
833
827 a, b = self.rev(a), self.rev(b)
834 a, b = self.rev(a), self.rev(b)
828 try:
835 try:
829 ancs = self.index.ancestors(a, b)
836 ancs = self.index.ancestors(a, b)
830 except (AttributeError, OverflowError):
837 except (AttributeError, OverflowError):
831 ancs = ancestor.ancestors(self.parentrevs, a, b)
838 ancs = ancestor.ancestors(self.parentrevs, a, b)
832 if ancs:
839 if ancs:
833 # choose a consistent winner when there's a tie
840 # choose a consistent winner when there's a tie
834 return min(map(self.node, ancs))
841 return min(map(self.node, ancs))
835 return nullid
842 return nullid
836
843
837 def _match(self, id):
844 def _match(self, id):
838 if isinstance(id, int):
845 if isinstance(id, int):
839 # rev
846 # rev
840 return self.node(id)
847 return self.node(id)
841 if len(id) == 20:
848 if len(id) == 20:
842 # possibly a binary node
849 # possibly a binary node
843 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
844 try:
851 try:
845 node = id
852 node = id
846 self.rev(node) # quick search the index
853 self.rev(node) # quick search the index
847 return node
854 return node
848 except LookupError:
855 except LookupError:
849 pass # may be partial hex id
856 pass # may be partial hex id
850 try:
857 try:
851 # str(rev)
858 # str(rev)
852 rev = int(id)
859 rev = int(id)
853 if str(rev) != id:
860 if str(rev) != id:
854 raise ValueError
861 raise ValueError
855 if rev < 0:
862 if rev < 0:
856 rev = len(self) + rev
863 rev = len(self) + rev
857 if rev < 0 or rev >= len(self):
864 if rev < 0 or rev >= len(self):
858 raise ValueError
865 raise ValueError
859 return self.node(rev)
866 return self.node(rev)
860 except (ValueError, OverflowError):
867 except (ValueError, OverflowError):
861 pass
868 pass
862 if len(id) == 40:
869 if len(id) == 40:
863 try:
870 try:
864 # a full hex nodeid?
871 # a full hex nodeid?
865 node = bin(id)
872 node = bin(id)
866 self.rev(node)
873 self.rev(node)
867 return node
874 return node
868 except (TypeError, LookupError):
875 except (TypeError, LookupError):
869 pass
876 pass
870
877
871 def _partialmatch(self, id):
878 def _partialmatch(self, id):
872 try:
879 try:
873 n = self.index.partialmatch(id)
880 n = self.index.partialmatch(id)
874 if n and self.hasnode(n):
881 if n and self.hasnode(n):
875 return n
882 return n
876 return None
883 return None
877 except RevlogError:
884 except RevlogError:
878 # parsers.c radix tree lookup gave multiple matches
885 # parsers.c radix tree lookup gave multiple matches
879 # fall through to slow path that filters hidden revisions
886 # fall through to slow path that filters hidden revisions
880 pass
887 pass
881 except (AttributeError, ValueError):
888 except (AttributeError, ValueError):
882 # we are pure python, or key was too short to search radix tree
889 # we are pure python, or key was too short to search radix tree
883 pass
890 pass
884
891
885 if id in self._pcache:
892 if id in self._pcache:
886 return self._pcache[id]
893 return self._pcache[id]
887
894
888 if len(id) < 40:
895 if len(id) < 40:
889 try:
896 try:
890 # hex(node)[:...]
897 # hex(node)[:...]
891 l = len(id) // 2 # grab an even number of digits
898 l = len(id) // 2 # grab an even number of digits
892 prefix = bin(id[:l * 2])
899 prefix = bin(id[:l * 2])
893 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
900 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
894 nl = [n for n in nl if hex(n).startswith(id) and
901 nl = [n for n in nl if hex(n).startswith(id) and
895 self.hasnode(n)]
902 self.hasnode(n)]
896 if len(nl) > 0:
903 if len(nl) > 0:
897 if len(nl) == 1:
904 if len(nl) == 1:
898 self._pcache[id] = nl[0]
905 self._pcache[id] = nl[0]
899 return nl[0]
906 return nl[0]
900 raise LookupError(id, self.indexfile,
907 raise LookupError(id, self.indexfile,
901 _('ambiguous identifier'))
908 _('ambiguous identifier'))
902 return None
909 return None
903 except TypeError:
910 except TypeError:
904 pass
911 pass
905
912
906 def lookup(self, id):
913 def lookup(self, id):
907 """locate a node based on:
914 """locate a node based on:
908 - revision number or str(revision number)
915 - revision number or str(revision number)
909 - nodeid or subset of hex nodeid
916 - nodeid or subset of hex nodeid
910 """
917 """
911 n = self._match(id)
918 n = self._match(id)
912 if n is not None:
919 if n is not None:
913 return n
920 return n
914 n = self._partialmatch(id)
921 n = self._partialmatch(id)
915 if n:
922 if n:
916 return n
923 return n
917
924
918 raise LookupError(id, self.indexfile, _('no match found'))
925 raise LookupError(id, self.indexfile, _('no match found'))
919
926
920 def cmp(self, node, text):
927 def cmp(self, node, text):
921 """compare text with a given file revision
928 """compare text with a given file revision
922
929
923 returns True if text is different than what is stored.
930 returns True if text is different than what is stored.
924 """
931 """
925 p1, p2 = self.parents(node)
932 p1, p2 = self.parents(node)
926 return hash(text, p1, p2) != node
933 return hash(text, p1, p2) != node
927
934
928 def _addchunk(self, offset, data):
935 def _addchunk(self, offset, data):
936 """Add a segment to the revlog cache.
937
938 Accepts an absolute offset and the data that is at that location.
939 """
929 o, d = self._chunkcache
940 o, d = self._chunkcache
930 # try to add to existing cache
941 # try to add to existing cache
931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
932 self._chunkcache = o, d + data
943 self._chunkcache = o, d + data
933 else:
944 else:
934 self._chunkcache = offset, data
945 self._chunkcache = offset, data
935
946
936 def _loadchunk(self, offset, length, df=None):
947 def _loadchunk(self, offset, length, df=None):
937 """Load a chunk/segment from the revlog.
948 """Load a segment of raw data from the revlog.
938
949
939 Accepts absolute offset, length to read, and an optional existing
950 Accepts an absolute offset, length to read, and an optional existing
940 file handle to read from.
951 file handle to read from.
941
952
942 If an existing file handle is passed, it will be seeked and the
953 If an existing file handle is passed, it will be seeked and the
943 original seek position will NOT be restored.
954 original seek position will NOT be restored.
955
956 Returns a str or buffer of raw byte data.
944 """
957 """
945 if df is not None:
958 if df is not None:
946 closehandle = False
959 closehandle = False
947 else:
960 else:
948 if self._inline:
961 if self._inline:
949 df = self.opener(self.indexfile)
962 df = self.opener(self.indexfile)
950 else:
963 else:
951 df = self.opener(self.datafile)
964 df = self.opener(self.datafile)
952 closehandle = True
965 closehandle = True
953
966
954 # Cache data both forward and backward around the requested
967 # Cache data both forward and backward around the requested
955 # data, in a fixed size window. This helps speed up operations
968 # data, in a fixed size window. This helps speed up operations
956 # involving reading the revlog backwards.
969 # involving reading the revlog backwards.
957 cachesize = self._chunkcachesize
970 cachesize = self._chunkcachesize
958 realoffset = offset & ~(cachesize - 1)
971 realoffset = offset & ~(cachesize - 1)
959 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
972 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
960 - realoffset)
973 - realoffset)
961 df.seek(realoffset)
974 df.seek(realoffset)
962 d = df.read(reallength)
975 d = df.read(reallength)
963 if closehandle:
976 if closehandle:
964 df.close()
977 df.close()
965 self._addchunk(realoffset, d)
978 self._addchunk(realoffset, d)
966 if offset != realoffset or reallength != length:
979 if offset != realoffset or reallength != length:
967 return util.buffer(d, offset - realoffset, length)
980 return util.buffer(d, offset - realoffset, length)
968 return d
981 return d
969
982
970 def _getchunk(self, offset, length, df=None):
983 def _getchunk(self, offset, length, df=None):
984 """Obtain a segment of raw data from the revlog.
985
986 Accepts an absolute offset, length of bytes to obtain, and an
987 optional file handle to the already-opened revlog. If the file
988 handle is used, it's original seek position will not be preserved.
989
990 Requests for data may be returned from a cache.
991
992 Returns a str or a buffer instance of raw byte data.
993 """
971 o, d = self._chunkcache
994 o, d = self._chunkcache
972 l = len(d)
995 l = len(d)
973
996
974 # is it in the cache?
997 # is it in the cache?
975 cachestart = offset - o
998 cachestart = offset - o
976 cacheend = cachestart + length
999 cacheend = cachestart + length
977 if cachestart >= 0 and cacheend <= l:
1000 if cachestart >= 0 and cacheend <= l:
978 if cachestart == 0 and cacheend == l:
1001 if cachestart == 0 and cacheend == l:
979 return d # avoid a copy
1002 return d # avoid a copy
980 return util.buffer(d, cachestart, cacheend - cachestart)
1003 return util.buffer(d, cachestart, cacheend - cachestart)
981
1004
982 return self._loadchunk(offset, length, df=df)
1005 return self._loadchunk(offset, length, df=df)
983
1006
984 def _chunkraw(self, startrev, endrev, df=None):
1007 def _chunkraw(self, startrev, endrev, df=None):
1008 """Obtain a segment of raw data corresponding to a range of revisions.
1009
1010 Accepts the start and end revisions and an optional already-open
1011 file handle to be used for reading. If the file handle is read, its
1012 seek position will not be preserved.
1013
1014 Requests for data may be satisfied by a cache.
1015
1016 Returns a str or a buffer instance of raw byte data. Callers will
1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1018 where each revision's data begins and ends.
1019 """
985 start = self.start(startrev)
1020 start = self.start(startrev)
986 end = self.end(endrev)
1021 end = self.end(endrev)
987 if self._inline:
1022 if self._inline:
988 start += (startrev + 1) * self._io.size
1023 start += (startrev + 1) * self._io.size
989 end += (endrev + 1) * self._io.size
1024 end += (endrev + 1) * self._io.size
990 length = end - start
1025 length = end - start
991 return self._getchunk(start, length, df=df)
1026 return self._getchunk(start, length, df=df)
992
1027
993 def _chunk(self, rev, df=None):
1028 def _chunk(self, rev, df=None):
1029 """Obtain a single decompressed chunk for a revision.
1030
1031 Accepts an integer revision and an optional already-open file handle
1032 to be used for reading. If used, the seek position of the file will not
1033 be preserved.
1034
1035 Returns a str holding uncompressed data for the requested revision.
1036 """
994 return decompress(self._chunkraw(rev, rev, df=df))
1037 return decompress(self._chunkraw(rev, rev, df=df))
995
1038
996 def _chunks(self, revs, df=None):
1039 def _chunks(self, revs, df=None):
997 '''faster version of [self._chunk(rev) for rev in revs]
1040 """Obtain decompressed chunks for the specified revisions.
998
1041
999 Assumes that revs is in ascending order.'''
1042 Accepts an iterable of numeric revisions that are assumed to be in
1043 ascending order. Also accepts an optional already-open file handle
1044 to be used for reading. If used, the seek position of the file will
1045 not be preserved.
1046
1047 This function is similar to calling ``self._chunk()`` multiple times,
1048 but is faster.
1049
1050 Returns a list with decompressed data for each requested revision.
1051 """
1000 if not revs:
1052 if not revs:
1001 return []
1053 return []
1002 start = self.start
1054 start = self.start
1003 length = self.length
1055 length = self.length
1004 inline = self._inline
1056 inline = self._inline
1005 iosize = self._io.size
1057 iosize = self._io.size
1006 buffer = util.buffer
1058 buffer = util.buffer
1007
1059
1008 l = []
1060 l = []
1009 ladd = l.append
1061 ladd = l.append
1010
1062
1011 # preload the cache
1063 # preload the cache
1012 try:
1064 try:
1013 while True:
1065 while True:
1014 # ensure that the cache doesn't change out from under us
1066 # ensure that the cache doesn't change out from under us
1015 _cache = self._chunkcache
1067 _cache = self._chunkcache
1016 self._chunkraw(revs[0], revs[-1], df=df)
1068 self._chunkraw(revs[0], revs[-1], df=df)
1017 if _cache == self._chunkcache:
1069 if _cache == self._chunkcache:
1018 break
1070 break
1019 offset, data = _cache
1071 offset, data = _cache
1020 except OverflowError:
1072 except OverflowError:
1021 # issue4215 - we can't cache a run of chunks greater than
1073 # issue4215 - we can't cache a run of chunks greater than
1022 # 2G on Windows
1074 # 2G on Windows
1023 return [self._chunk(rev, df=df) for rev in revs]
1075 return [self._chunk(rev, df=df) for rev in revs]
1024
1076
1025 for rev in revs:
1077 for rev in revs:
1026 chunkstart = start(rev)
1078 chunkstart = start(rev)
1027 if inline:
1079 if inline:
1028 chunkstart += (rev + 1) * iosize
1080 chunkstart += (rev + 1) * iosize
1029 chunklength = length(rev)
1081 chunklength = length(rev)
1030 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1031
1083
1032 return l
1084 return l
1033
1085
1034 def _chunkclear(self):
1086 def _chunkclear(self):
1087 """Clear the raw chunk cache."""
1035 self._chunkcache = (0, '')
1088 self._chunkcache = (0, '')
1036
1089
1037 def deltaparent(self, rev):
1090 def deltaparent(self, rev):
1038 """return deltaparent of the given revision"""
1091 """return deltaparent of the given revision"""
1039 base = self.index[rev][3]
1092 base = self.index[rev][3]
1040 if base == rev:
1093 if base == rev:
1041 return nullrev
1094 return nullrev
1042 elif self._generaldelta:
1095 elif self._generaldelta:
1043 return base
1096 return base
1044 else:
1097 else:
1045 return rev - 1
1098 return rev - 1
1046
1099
1047 def revdiff(self, rev1, rev2):
1100 def revdiff(self, rev1, rev2):
1048 """return or calculate a delta between two revisions"""
1101 """return or calculate a delta between two revisions"""
1049 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1102 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1050 return str(self._chunk(rev2))
1103 return str(self._chunk(rev2))
1051
1104
1052 return mdiff.textdiff(self.revision(rev1),
1105 return mdiff.textdiff(self.revision(rev1),
1053 self.revision(rev2))
1106 self.revision(rev2))
1054
1107
1055 def revision(self, nodeorrev, _df=None):
1108 def revision(self, nodeorrev, _df=None):
1056 """return an uncompressed revision of a given node or revision
1109 """return an uncompressed revision of a given node or revision
1057 number.
1110 number.
1058
1111
1059 _df is an existing file handle to read from. It is meant to only be
1112 _df is an existing file handle to read from. It is meant to only be
1060 used internally.
1113 used internally.
1061 """
1114 """
1062 if isinstance(nodeorrev, int):
1115 if isinstance(nodeorrev, int):
1063 rev = nodeorrev
1116 rev = nodeorrev
1064 node = self.node(rev)
1117 node = self.node(rev)
1065 else:
1118 else:
1066 node = nodeorrev
1119 node = nodeorrev
1067 rev = None
1120 rev = None
1068
1121
1069 cachedrev = None
1122 cachedrev = None
1070 if node == nullid:
1123 if node == nullid:
1071 return ""
1124 return ""
1072 if self._cache:
1125 if self._cache:
1073 if self._cache[0] == node:
1126 if self._cache[0] == node:
1074 return self._cache[2]
1127 return self._cache[2]
1075 cachedrev = self._cache[1]
1128 cachedrev = self._cache[1]
1076
1129
1077 # look up what we need to read
1130 # look up what we need to read
1078 text = None
1131 text = None
1079 if rev is None:
1132 if rev is None:
1080 rev = self.rev(node)
1133 rev = self.rev(node)
1081
1134
1082 # check rev flags
1135 # check rev flags
1083 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1136 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1084 raise RevlogError(_('incompatible revision flag %x') %
1137 raise RevlogError(_('incompatible revision flag %x') %
1085 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1138 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1086
1139
1087 # build delta chain
1140 # build delta chain
1088 chain = []
1141 chain = []
1089 index = self.index # for performance
1142 index = self.index # for performance
1090 generaldelta = self._generaldelta
1143 generaldelta = self._generaldelta
1091 iterrev = rev
1144 iterrev = rev
1092 e = index[iterrev]
1145 e = index[iterrev]
1093 while iterrev != e[3] and iterrev != cachedrev:
1146 while iterrev != e[3] and iterrev != cachedrev:
1094 chain.append(iterrev)
1147 chain.append(iterrev)
1095 if generaldelta:
1148 if generaldelta:
1096 iterrev = e[3]
1149 iterrev = e[3]
1097 else:
1150 else:
1098 iterrev -= 1
1151 iterrev -= 1
1099 e = index[iterrev]
1152 e = index[iterrev]
1100
1153
1101 if iterrev == cachedrev:
1154 if iterrev == cachedrev:
1102 # cache hit
1155 # cache hit
1103 text = self._cache[2]
1156 text = self._cache[2]
1104 else:
1157 else:
1105 chain.append(iterrev)
1158 chain.append(iterrev)
1106 chain.reverse()
1159 chain.reverse()
1107
1160
1108 # drop cache to save memory
1161 # drop cache to save memory
1109 self._cache = None
1162 self._cache = None
1110
1163
1111 bins = self._chunks(chain, df=_df)
1164 bins = self._chunks(chain, df=_df)
1112 if text is None:
1165 if text is None:
1113 text = str(bins[0])
1166 text = str(bins[0])
1114 bins = bins[1:]
1167 bins = bins[1:]
1115
1168
1116 text = mdiff.patches(text, bins)
1169 text = mdiff.patches(text, bins)
1117
1170
1118 text = self._checkhash(text, node, rev)
1171 text = self._checkhash(text, node, rev)
1119
1172
1120 self._cache = (node, rev, text)
1173 self._cache = (node, rev, text)
1121 return text
1174 return text
1122
1175
1123 def hash(self, text, p1, p2):
1176 def hash(self, text, p1, p2):
1124 """Compute a node hash.
1177 """Compute a node hash.
1125
1178
1126 Available as a function so that subclasses can replace the hash
1179 Available as a function so that subclasses can replace the hash
1127 as needed.
1180 as needed.
1128 """
1181 """
1129 return hash(text, p1, p2)
1182 return hash(text, p1, p2)
1130
1183
1131 def _checkhash(self, text, node, rev):
1184 def _checkhash(self, text, node, rev):
1132 p1, p2 = self.parents(node)
1185 p1, p2 = self.parents(node)
1133 self.checkhash(text, p1, p2, node, rev)
1186 self.checkhash(text, p1, p2, node, rev)
1134 return text
1187 return text
1135
1188
1136 def checkhash(self, text, p1, p2, node, rev=None):
1189 def checkhash(self, text, p1, p2, node, rev=None):
1137 if node != self.hash(text, p1, p2):
1190 if node != self.hash(text, p1, p2):
1138 revornode = rev
1191 revornode = rev
1139 if revornode is None:
1192 if revornode is None:
1140 revornode = templatefilters.short(hex(node))
1193 revornode = templatefilters.short(hex(node))
1141 raise RevlogError(_("integrity check failed on %s:%s")
1194 raise RevlogError(_("integrity check failed on %s:%s")
1142 % (self.indexfile, revornode))
1195 % (self.indexfile, revornode))
1143
1196
1144 def checkinlinesize(self, tr, fp=None):
1197 def checkinlinesize(self, tr, fp=None):
1145 """Check if the revlog is too big for inline and convert if so.
1198 """Check if the revlog is too big for inline and convert if so.
1146
1199
1147 This should be called after revisions are added to the revlog. If the
1200 This should be called after revisions are added to the revlog. If the
1148 revlog has grown too large to be an inline revlog, it will convert it
1201 revlog has grown too large to be an inline revlog, it will convert it
1149 to use multiple index and data files.
1202 to use multiple index and data files.
1150 """
1203 """
1151 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1204 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1152 return
1205 return
1153
1206
1154 trinfo = tr.find(self.indexfile)
1207 trinfo = tr.find(self.indexfile)
1155 if trinfo is None:
1208 if trinfo is None:
1156 raise RevlogError(_("%s not found in the transaction")
1209 raise RevlogError(_("%s not found in the transaction")
1157 % self.indexfile)
1210 % self.indexfile)
1158
1211
1159 trindex = trinfo[2]
1212 trindex = trinfo[2]
1160 if trindex is not None:
1213 if trindex is not None:
1161 dataoff = self.start(trindex)
1214 dataoff = self.start(trindex)
1162 else:
1215 else:
1163 # revlog was stripped at start of transaction, use all leftover data
1216 # revlog was stripped at start of transaction, use all leftover data
1164 trindex = len(self) - 1
1217 trindex = len(self) - 1
1165 dataoff = self.end(-2)
1218 dataoff = self.end(-2)
1166
1219
1167 tr.add(self.datafile, dataoff)
1220 tr.add(self.datafile, dataoff)
1168
1221
1169 if fp:
1222 if fp:
1170 fp.flush()
1223 fp.flush()
1171 fp.close()
1224 fp.close()
1172
1225
1173 df = self.opener(self.datafile, 'w')
1226 df = self.opener(self.datafile, 'w')
1174 try:
1227 try:
1175 for r in self:
1228 for r in self:
1176 df.write(self._chunkraw(r, r))
1229 df.write(self._chunkraw(r, r))
1177 finally:
1230 finally:
1178 df.close()
1231 df.close()
1179
1232
1180 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1233 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1181 self.version &= ~(REVLOGNGINLINEDATA)
1234 self.version &= ~(REVLOGNGINLINEDATA)
1182 self._inline = False
1235 self._inline = False
1183 for i in self:
1236 for i in self:
1184 e = self._io.packentry(self.index[i], self.node, self.version, i)
1237 e = self._io.packentry(self.index[i], self.node, self.version, i)
1185 fp.write(e)
1238 fp.write(e)
1186
1239
1187 # if we don't call close, the temp file will never replace the
1240 # if we don't call close, the temp file will never replace the
1188 # real index
1241 # real index
1189 fp.close()
1242 fp.close()
1190
1243
1191 tr.replace(self.indexfile, trindex * self._io.size)
1244 tr.replace(self.indexfile, trindex * self._io.size)
1192 self._chunkclear()
1245 self._chunkclear()
1193
1246
1194 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1195 node=None):
1248 node=None):
1196 """add a revision to the log
1249 """add a revision to the log
1197
1250
1198 text - the revision data to add
1251 text - the revision data to add
1199 transaction - the transaction object used for rollback
1252 transaction - the transaction object used for rollback
1200 link - the linkrev data to add
1253 link - the linkrev data to add
1201 p1, p2 - the parent nodeids of the revision
1254 p1, p2 - the parent nodeids of the revision
1202 cachedelta - an optional precomputed delta
1255 cachedelta - an optional precomputed delta
1203 node - nodeid of revision; typically node is not specified, and it is
1256 node - nodeid of revision; typically node is not specified, and it is
1204 computed by default as hash(text, p1, p2), however subclasses might
1257 computed by default as hash(text, p1, p2), however subclasses might
1205 use different hashing method (and override checkhash() in such case)
1258 use different hashing method (and override checkhash() in such case)
1206 """
1259 """
1207 if link == nullrev:
1260 if link == nullrev:
1208 raise RevlogError(_("attempted to add linkrev -1 to %s")
1261 raise RevlogError(_("attempted to add linkrev -1 to %s")
1209 % self.indexfile)
1262 % self.indexfile)
1210
1263
1211 if len(text) > _maxentrysize:
1264 if len(text) > _maxentrysize:
1212 raise RevlogError(
1265 raise RevlogError(
1213 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1266 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1214 % (self.indexfile, len(text)))
1267 % (self.indexfile, len(text)))
1215
1268
1216 node = node or self.hash(text, p1, p2)
1269 node = node or self.hash(text, p1, p2)
1217 if node in self.nodemap:
1270 if node in self.nodemap:
1218 return node
1271 return node
1219
1272
1220 dfh = None
1273 dfh = None
1221 if not self._inline:
1274 if not self._inline:
1222 dfh = self.opener(self.datafile, "a+")
1275 dfh = self.opener(self.datafile, "a+")
1223 ifh = self.opener(self.indexfile, "a+")
1276 ifh = self.opener(self.indexfile, "a+")
1224 try:
1277 try:
1225 return self._addrevision(node, text, transaction, link, p1, p2,
1278 return self._addrevision(node, text, transaction, link, p1, p2,
1226 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1279 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1227 finally:
1280 finally:
1228 if dfh:
1281 if dfh:
1229 dfh.close()
1282 dfh.close()
1230 ifh.close()
1283 ifh.close()
1231
1284
1232 def compress(self, text):
1285 def compress(self, text):
1233 """ generate a possibly-compressed representation of text """
1286 """ generate a possibly-compressed representation of text """
1234 if not text:
1287 if not text:
1235 return ("", text)
1288 return ("", text)
1236 l = len(text)
1289 l = len(text)
1237 bin = None
1290 bin = None
1238 if l < 44:
1291 if l < 44:
1239 pass
1292 pass
1240 elif l > 1000000:
1293 elif l > 1000000:
1241 # zlib makes an internal copy, thus doubling memory usage for
1294 # zlib makes an internal copy, thus doubling memory usage for
1242 # large files, so lets do this in pieces
1295 # large files, so lets do this in pieces
1243 z = zlib.compressobj()
1296 z = zlib.compressobj()
1244 p = []
1297 p = []
1245 pos = 0
1298 pos = 0
1246 while pos < l:
1299 while pos < l:
1247 pos2 = pos + 2**20
1300 pos2 = pos + 2**20
1248 p.append(z.compress(text[pos:pos2]))
1301 p.append(z.compress(text[pos:pos2]))
1249 pos = pos2
1302 pos = pos2
1250 p.append(z.flush())
1303 p.append(z.flush())
1251 if sum(map(len, p)) < l:
1304 if sum(map(len, p)) < l:
1252 bin = "".join(p)
1305 bin = "".join(p)
1253 else:
1306 else:
1254 bin = _compress(text)
1307 bin = _compress(text)
1255 if bin is None or len(bin) > l:
1308 if bin is None or len(bin) > l:
1256 if text[0] == '\0':
1309 if text[0] == '\0':
1257 return ("", text)
1310 return ("", text)
1258 return ('u', text)
1311 return ('u', text)
1259 return ("", bin)
1312 return ("", bin)
1260
1313
1261 def _isgooddelta(self, d, textlen):
1314 def _isgooddelta(self, d, textlen):
1262 """Returns True if the given delta is good. Good means that it is within
1315 """Returns True if the given delta is good. Good means that it is within
1263 the disk span, disk size, and chain length bounds that we know to be
1316 the disk span, disk size, and chain length bounds that we know to be
1264 performant."""
1317 performant."""
1265 if d is None:
1318 if d is None:
1266 return False
1319 return False
1267
1320
1268 # - 'dist' is the distance from the base revision -- bounding it limits
1321 # - 'dist' is the distance from the base revision -- bounding it limits
1269 # the amount of I/O we need to do.
1322 # the amount of I/O we need to do.
1270 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1323 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1271 # to apply -- bounding it limits the amount of CPU we consume.
1324 # to apply -- bounding it limits the amount of CPU we consume.
1272 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1325 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1273 if (dist > textlen * 4 or l > textlen or
1326 if (dist > textlen * 4 or l > textlen or
1274 compresseddeltalen > textlen * 2 or
1327 compresseddeltalen > textlen * 2 or
1275 (self._maxchainlen and chainlen > self._maxchainlen)):
1328 (self._maxchainlen and chainlen > self._maxchainlen)):
1276 return False
1329 return False
1277
1330
1278 return True
1331 return True
1279
1332
1280 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1333 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1281 cachedelta, ifh, dfh, alwayscache=False):
1334 cachedelta, ifh, dfh, alwayscache=False):
1282 """internal function to add revisions to the log
1335 """internal function to add revisions to the log
1283
1336
1284 see addrevision for argument descriptions.
1337 see addrevision for argument descriptions.
1285 invariants:
1338 invariants:
1286 - text is optional (can be None); if not set, cachedelta must be set.
1339 - text is optional (can be None); if not set, cachedelta must be set.
1287 if both are set, they must correspond to each other.
1340 if both are set, they must correspond to each other.
1288 """
1341 """
1289 btext = [text]
1342 btext = [text]
1290 def buildtext():
1343 def buildtext():
1291 if btext[0] is not None:
1344 if btext[0] is not None:
1292 return btext[0]
1345 return btext[0]
1293 baserev = cachedelta[0]
1346 baserev = cachedelta[0]
1294 delta = cachedelta[1]
1347 delta = cachedelta[1]
1295 # special case deltas which replace entire base; no need to decode
1348 # special case deltas which replace entire base; no need to decode
1296 # base revision. this neatly avoids censored bases, which throw when
1349 # base revision. this neatly avoids censored bases, which throw when
1297 # they're decoded.
1350 # they're decoded.
1298 hlen = struct.calcsize(">lll")
1351 hlen = struct.calcsize(">lll")
1299 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1352 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1300 len(delta) - hlen):
1353 len(delta) - hlen):
1301 btext[0] = delta[hlen:]
1354 btext[0] = delta[hlen:]
1302 else:
1355 else:
1303 if self._inline:
1356 if self._inline:
1304 fh = ifh
1357 fh = ifh
1305 else:
1358 else:
1306 fh = dfh
1359 fh = dfh
1307 basetext = self.revision(self.node(baserev), _df=fh)
1360 basetext = self.revision(self.node(baserev), _df=fh)
1308 btext[0] = mdiff.patch(basetext, delta)
1361 btext[0] = mdiff.patch(basetext, delta)
1309 try:
1362 try:
1310 self.checkhash(btext[0], p1, p2, node)
1363 self.checkhash(btext[0], p1, p2, node)
1311 if flags & REVIDX_ISCENSORED:
1364 if flags & REVIDX_ISCENSORED:
1312 raise RevlogError(_('node %s is not censored') % node)
1365 raise RevlogError(_('node %s is not censored') % node)
1313 except CensoredNodeError:
1366 except CensoredNodeError:
1314 # must pass the censored index flag to add censored revisions
1367 # must pass the censored index flag to add censored revisions
1315 if not flags & REVIDX_ISCENSORED:
1368 if not flags & REVIDX_ISCENSORED:
1316 raise
1369 raise
1317 return btext[0]
1370 return btext[0]
1318
1371
1319 def builddelta(rev):
1372 def builddelta(rev):
1320 # can we use the cached delta?
1373 # can we use the cached delta?
1321 if cachedelta and cachedelta[0] == rev:
1374 if cachedelta and cachedelta[0] == rev:
1322 delta = cachedelta[1]
1375 delta = cachedelta[1]
1323 else:
1376 else:
1324 t = buildtext()
1377 t = buildtext()
1325 if self.iscensored(rev):
1378 if self.iscensored(rev):
1326 # deltas based on a censored revision must replace the
1379 # deltas based on a censored revision must replace the
1327 # full content in one patch, so delta works everywhere
1380 # full content in one patch, so delta works everywhere
1328 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1381 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1329 delta = header + t
1382 delta = header + t
1330 else:
1383 else:
1331 if self._inline:
1384 if self._inline:
1332 fh = ifh
1385 fh = ifh
1333 else:
1386 else:
1334 fh = dfh
1387 fh = dfh
1335 ptext = self.revision(self.node(rev), _df=fh)
1388 ptext = self.revision(self.node(rev), _df=fh)
1336 delta = mdiff.textdiff(ptext, t)
1389 delta = mdiff.textdiff(ptext, t)
1337 data = self.compress(delta)
1390 data = self.compress(delta)
1338 l = len(data[1]) + len(data[0])
1391 l = len(data[1]) + len(data[0])
1339 if basecache[0] == rev:
1392 if basecache[0] == rev:
1340 chainbase = basecache[1]
1393 chainbase = basecache[1]
1341 else:
1394 else:
1342 chainbase = self.chainbase(rev)
1395 chainbase = self.chainbase(rev)
1343 dist = l + offset - self.start(chainbase)
1396 dist = l + offset - self.start(chainbase)
1344 if self._generaldelta:
1397 if self._generaldelta:
1345 base = rev
1398 base = rev
1346 else:
1399 else:
1347 base = chainbase
1400 base = chainbase
1348 chainlen, compresseddeltalen = self._chaininfo(rev)
1401 chainlen, compresseddeltalen = self._chaininfo(rev)
1349 chainlen += 1
1402 chainlen += 1
1350 compresseddeltalen += l
1403 compresseddeltalen += l
1351 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1404 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1352
1405
1353 curr = len(self)
1406 curr = len(self)
1354 prev = curr - 1
1407 prev = curr - 1
1355 base = chainbase = curr
1408 base = chainbase = curr
1356 chainlen = None
1409 chainlen = None
1357 offset = self.end(prev)
1410 offset = self.end(prev)
1358 d = None
1411 d = None
1359 if self._basecache is None:
1412 if self._basecache is None:
1360 self._basecache = (prev, self.chainbase(prev))
1413 self._basecache = (prev, self.chainbase(prev))
1361 basecache = self._basecache
1414 basecache = self._basecache
1362 p1r, p2r = self.rev(p1), self.rev(p2)
1415 p1r, p2r = self.rev(p1), self.rev(p2)
1363
1416
1364 # full versions are inserted when the needed deltas
1417 # full versions are inserted when the needed deltas
1365 # become comparable to the uncompressed text
1418 # become comparable to the uncompressed text
1366 if text is None:
1419 if text is None:
1367 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1420 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1368 cachedelta[1])
1421 cachedelta[1])
1369 else:
1422 else:
1370 textlen = len(text)
1423 textlen = len(text)
1371
1424
1372 # should we try to build a delta?
1425 # should we try to build a delta?
1373 if prev != nullrev:
1426 if prev != nullrev:
1374 if cachedelta and self._generaldelta and self._lazydeltabase:
1427 if cachedelta and self._generaldelta and self._lazydeltabase:
1375 # Assume what we received from the server is a good choice
1428 # Assume what we received from the server is a good choice
1376 # build delta will reuse the cache
1429 # build delta will reuse the cache
1377 d = builddelta(cachedelta[0])
1430 d = builddelta(cachedelta[0])
1378 elif self._generaldelta:
1431 elif self._generaldelta:
1379 if p2r != nullrev and self._aggressivemergedeltas:
1432 if p2r != nullrev and self._aggressivemergedeltas:
1380 d = builddelta(p1r)
1433 d = builddelta(p1r)
1381 d2 = builddelta(p2r)
1434 d2 = builddelta(p2r)
1382 p1good = self._isgooddelta(d, textlen)
1435 p1good = self._isgooddelta(d, textlen)
1383 p2good = self._isgooddelta(d2, textlen)
1436 p2good = self._isgooddelta(d2, textlen)
1384 if p1good and p2good:
1437 if p1good and p2good:
1385 # If both are good deltas, choose the smallest
1438 # If both are good deltas, choose the smallest
1386 if d2[1] < d[1]:
1439 if d2[1] < d[1]:
1387 d = d2
1440 d = d2
1388 elif p2good:
1441 elif p2good:
1389 # If only p2 is good, use it
1442 # If only p2 is good, use it
1390 d = d2
1443 d = d2
1391 elif p1good:
1444 elif p1good:
1392 pass
1445 pass
1393 else:
1446 else:
1394 # Neither is good, try against prev to hopefully save us
1447 # Neither is good, try against prev to hopefully save us
1395 # a fulltext.
1448 # a fulltext.
1396 d = builddelta(prev)
1449 d = builddelta(prev)
1397 else:
1450 else:
1398 # Pick whichever parent is closer to us (to minimize the
1451 # Pick whichever parent is closer to us (to minimize the
1399 # chance of having to build a fulltext). Since
1452 # chance of having to build a fulltext). Since
1400 # nullrev == -1, any non-merge commit will always pick p1r.
1453 # nullrev == -1, any non-merge commit will always pick p1r.
1401 drev = p2r if p2r > p1r else p1r
1454 drev = p2r if p2r > p1r else p1r
1402 d = builddelta(drev)
1455 d = builddelta(drev)
1403 # If the chosen delta will result in us making a full text,
1456 # If the chosen delta will result in us making a full text,
1404 # give it one last try against prev.
1457 # give it one last try against prev.
1405 if drev != prev and not self._isgooddelta(d, textlen):
1458 if drev != prev and not self._isgooddelta(d, textlen):
1406 d = builddelta(prev)
1459 d = builddelta(prev)
1407 else:
1460 else:
1408 d = builddelta(prev)
1461 d = builddelta(prev)
1409 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1462 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1410
1463
1411 if not self._isgooddelta(d, textlen):
1464 if not self._isgooddelta(d, textlen):
1412 text = buildtext()
1465 text = buildtext()
1413 data = self.compress(text)
1466 data = self.compress(text)
1414 l = len(data[1]) + len(data[0])
1467 l = len(data[1]) + len(data[0])
1415 base = chainbase = curr
1468 base = chainbase = curr
1416
1469
1417 e = (offset_type(offset, flags), l, textlen,
1470 e = (offset_type(offset, flags), l, textlen,
1418 base, link, p1r, p2r, node)
1471 base, link, p1r, p2r, node)
1419 self.index.insert(-1, e)
1472 self.index.insert(-1, e)
1420 self.nodemap[node] = curr
1473 self.nodemap[node] = curr
1421
1474
1422 entry = self._io.packentry(e, self.node, self.version, curr)
1475 entry = self._io.packentry(e, self.node, self.version, curr)
1423 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1476 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1424
1477
1425 if alwayscache and text is None:
1478 if alwayscache and text is None:
1426 text = buildtext()
1479 text = buildtext()
1427
1480
1428 if type(text) == str: # only accept immutable objects
1481 if type(text) == str: # only accept immutable objects
1429 self._cache = (node, curr, text)
1482 self._cache = (node, curr, text)
1430 self._basecache = (curr, chainbase)
1483 self._basecache = (curr, chainbase)
1431 return node
1484 return node
1432
1485
1433 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1486 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1434 curr = len(self) - 1
1487 curr = len(self) - 1
1435 if not self._inline:
1488 if not self._inline:
1436 transaction.add(self.datafile, offset)
1489 transaction.add(self.datafile, offset)
1437 transaction.add(self.indexfile, curr * len(entry))
1490 transaction.add(self.indexfile, curr * len(entry))
1438 if data[0]:
1491 if data[0]:
1439 dfh.write(data[0])
1492 dfh.write(data[0])
1440 dfh.write(data[1])
1493 dfh.write(data[1])
1441 ifh.write(entry)
1494 ifh.write(entry)
1442 else:
1495 else:
1443 offset += curr * self._io.size
1496 offset += curr * self._io.size
1444 transaction.add(self.indexfile, offset, curr)
1497 transaction.add(self.indexfile, offset, curr)
1445 ifh.write(entry)
1498 ifh.write(entry)
1446 ifh.write(data[0])
1499 ifh.write(data[0])
1447 ifh.write(data[1])
1500 ifh.write(data[1])
1448 self.checkinlinesize(transaction, ifh)
1501 self.checkinlinesize(transaction, ifh)
1449
1502
1450 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1503 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1451 """
1504 """
1452 add a delta group
1505 add a delta group
1453
1506
1454 given a set of deltas, add them to the revision log. the
1507 given a set of deltas, add them to the revision log. the
1455 first delta is against its parent, which should be in our
1508 first delta is against its parent, which should be in our
1456 log, the rest are against the previous delta.
1509 log, the rest are against the previous delta.
1457
1510
1458 If ``addrevisioncb`` is defined, it will be called with arguments of
1511 If ``addrevisioncb`` is defined, it will be called with arguments of
1459 this revlog and the node that was added.
1512 this revlog and the node that was added.
1460 """
1513 """
1461
1514
1462 # track the base of the current delta log
1515 # track the base of the current delta log
1463 content = []
1516 content = []
1464 node = None
1517 node = None
1465
1518
1466 r = len(self)
1519 r = len(self)
1467 end = 0
1520 end = 0
1468 if r:
1521 if r:
1469 end = self.end(r - 1)
1522 end = self.end(r - 1)
1470 ifh = self.opener(self.indexfile, "a+")
1523 ifh = self.opener(self.indexfile, "a+")
1471 isize = r * self._io.size
1524 isize = r * self._io.size
1472 if self._inline:
1525 if self._inline:
1473 transaction.add(self.indexfile, end + isize, r)
1526 transaction.add(self.indexfile, end + isize, r)
1474 dfh = None
1527 dfh = None
1475 else:
1528 else:
1476 transaction.add(self.indexfile, isize, r)
1529 transaction.add(self.indexfile, isize, r)
1477 transaction.add(self.datafile, end)
1530 transaction.add(self.datafile, end)
1478 dfh = self.opener(self.datafile, "a+")
1531 dfh = self.opener(self.datafile, "a+")
1479 def flush():
1532 def flush():
1480 if dfh:
1533 if dfh:
1481 dfh.flush()
1534 dfh.flush()
1482 ifh.flush()
1535 ifh.flush()
1483 try:
1536 try:
1484 # loop through our set of deltas
1537 # loop through our set of deltas
1485 chain = None
1538 chain = None
1486 while True:
1539 while True:
1487 chunkdata = cg.deltachunk(chain)
1540 chunkdata = cg.deltachunk(chain)
1488 if not chunkdata:
1541 if not chunkdata:
1489 break
1542 break
1490 node = chunkdata['node']
1543 node = chunkdata['node']
1491 p1 = chunkdata['p1']
1544 p1 = chunkdata['p1']
1492 p2 = chunkdata['p2']
1545 p2 = chunkdata['p2']
1493 cs = chunkdata['cs']
1546 cs = chunkdata['cs']
1494 deltabase = chunkdata['deltabase']
1547 deltabase = chunkdata['deltabase']
1495 delta = chunkdata['delta']
1548 delta = chunkdata['delta']
1496
1549
1497 content.append(node)
1550 content.append(node)
1498
1551
1499 link = linkmapper(cs)
1552 link = linkmapper(cs)
1500 if node in self.nodemap:
1553 if node in self.nodemap:
1501 # this can happen if two branches make the same change
1554 # this can happen if two branches make the same change
1502 chain = node
1555 chain = node
1503 continue
1556 continue
1504
1557
1505 for p in (p1, p2):
1558 for p in (p1, p2):
1506 if p not in self.nodemap:
1559 if p not in self.nodemap:
1507 raise LookupError(p, self.indexfile,
1560 raise LookupError(p, self.indexfile,
1508 _('unknown parent'))
1561 _('unknown parent'))
1509
1562
1510 if deltabase not in self.nodemap:
1563 if deltabase not in self.nodemap:
1511 raise LookupError(deltabase, self.indexfile,
1564 raise LookupError(deltabase, self.indexfile,
1512 _('unknown delta base'))
1565 _('unknown delta base'))
1513
1566
1514 baserev = self.rev(deltabase)
1567 baserev = self.rev(deltabase)
1515
1568
1516 if baserev != nullrev and self.iscensored(baserev):
1569 if baserev != nullrev and self.iscensored(baserev):
1517 # if base is censored, delta must be full replacement in a
1570 # if base is censored, delta must be full replacement in a
1518 # single patch operation
1571 # single patch operation
1519 hlen = struct.calcsize(">lll")
1572 hlen = struct.calcsize(">lll")
1520 oldlen = self.rawsize(baserev)
1573 oldlen = self.rawsize(baserev)
1521 newlen = len(delta) - hlen
1574 newlen = len(delta) - hlen
1522 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1575 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1523 raise error.CensoredBaseError(self.indexfile,
1576 raise error.CensoredBaseError(self.indexfile,
1524 self.node(baserev))
1577 self.node(baserev))
1525
1578
1526 flags = REVIDX_DEFAULT_FLAGS
1579 flags = REVIDX_DEFAULT_FLAGS
1527 if self._peek_iscensored(baserev, delta, flush):
1580 if self._peek_iscensored(baserev, delta, flush):
1528 flags |= REVIDX_ISCENSORED
1581 flags |= REVIDX_ISCENSORED
1529
1582
1530 # We assume consumers of addrevisioncb will want to retrieve
1583 # We assume consumers of addrevisioncb will want to retrieve
1531 # the added revision, which will require a call to
1584 # the added revision, which will require a call to
1532 # revision(). revision() will fast path if there is a cache
1585 # revision(). revision() will fast path if there is a cache
1533 # hit. So, we tell _addrevision() to always cache in this case.
1586 # hit. So, we tell _addrevision() to always cache in this case.
1534 chain = self._addrevision(node, None, transaction, link,
1587 chain = self._addrevision(node, None, transaction, link,
1535 p1, p2, flags, (baserev, delta),
1588 p1, p2, flags, (baserev, delta),
1536 ifh, dfh,
1589 ifh, dfh,
1537 alwayscache=bool(addrevisioncb))
1590 alwayscache=bool(addrevisioncb))
1538
1591
1539 if addrevisioncb:
1592 if addrevisioncb:
1540 addrevisioncb(self, chain)
1593 addrevisioncb(self, chain)
1541
1594
1542 if not dfh and not self._inline:
1595 if not dfh and not self._inline:
1543 # addrevision switched from inline to conventional
1596 # addrevision switched from inline to conventional
1544 # reopen the index
1597 # reopen the index
1545 ifh.close()
1598 ifh.close()
1546 dfh = self.opener(self.datafile, "a+")
1599 dfh = self.opener(self.datafile, "a+")
1547 ifh = self.opener(self.indexfile, "a+")
1600 ifh = self.opener(self.indexfile, "a+")
1548 finally:
1601 finally:
1549 if dfh:
1602 if dfh:
1550 dfh.close()
1603 dfh.close()
1551 ifh.close()
1604 ifh.close()
1552
1605
1553 return content
1606 return content
1554
1607
1555 def iscensored(self, rev):
1608 def iscensored(self, rev):
1556 """Check if a file revision is censored."""
1609 """Check if a file revision is censored."""
1557 return False
1610 return False
1558
1611
1559 def _peek_iscensored(self, baserev, delta, flush):
1612 def _peek_iscensored(self, baserev, delta, flush):
1560 """Quickly check if a delta produces a censored revision."""
1613 """Quickly check if a delta produces a censored revision."""
1561 return False
1614 return False
1562
1615
1563 def getstrippoint(self, minlink):
1616 def getstrippoint(self, minlink):
1564 """find the minimum rev that must be stripped to strip the linkrev
1617 """find the minimum rev that must be stripped to strip the linkrev
1565
1618
1566 Returns a tuple containing the minimum rev and a set of all revs that
1619 Returns a tuple containing the minimum rev and a set of all revs that
1567 have linkrevs that will be broken by this strip.
1620 have linkrevs that will be broken by this strip.
1568 """
1621 """
1569 brokenrevs = set()
1622 brokenrevs = set()
1570 strippoint = len(self)
1623 strippoint = len(self)
1571
1624
1572 heads = {}
1625 heads = {}
1573 futurelargelinkrevs = set()
1626 futurelargelinkrevs = set()
1574 for head in self.headrevs():
1627 for head in self.headrevs():
1575 headlinkrev = self.linkrev(head)
1628 headlinkrev = self.linkrev(head)
1576 heads[head] = headlinkrev
1629 heads[head] = headlinkrev
1577 if headlinkrev >= minlink:
1630 if headlinkrev >= minlink:
1578 futurelargelinkrevs.add(headlinkrev)
1631 futurelargelinkrevs.add(headlinkrev)
1579
1632
1580 # This algorithm involves walking down the rev graph, starting at the
1633 # This algorithm involves walking down the rev graph, starting at the
1581 # heads. Since the revs are topologically sorted according to linkrev,
1634 # heads. Since the revs are topologically sorted according to linkrev,
1582 # once all head linkrevs are below the minlink, we know there are
1635 # once all head linkrevs are below the minlink, we know there are
1583 # no more revs that could have a linkrev greater than minlink.
1636 # no more revs that could have a linkrev greater than minlink.
1584 # So we can stop walking.
1637 # So we can stop walking.
1585 while futurelargelinkrevs:
1638 while futurelargelinkrevs:
1586 strippoint -= 1
1639 strippoint -= 1
1587 linkrev = heads.pop(strippoint)
1640 linkrev = heads.pop(strippoint)
1588
1641
1589 if linkrev < minlink:
1642 if linkrev < minlink:
1590 brokenrevs.add(strippoint)
1643 brokenrevs.add(strippoint)
1591 else:
1644 else:
1592 futurelargelinkrevs.remove(linkrev)
1645 futurelargelinkrevs.remove(linkrev)
1593
1646
1594 for p in self.parentrevs(strippoint):
1647 for p in self.parentrevs(strippoint):
1595 if p != nullrev:
1648 if p != nullrev:
1596 plinkrev = self.linkrev(p)
1649 plinkrev = self.linkrev(p)
1597 heads[p] = plinkrev
1650 heads[p] = plinkrev
1598 if plinkrev >= minlink:
1651 if plinkrev >= minlink:
1599 futurelargelinkrevs.add(plinkrev)
1652 futurelargelinkrevs.add(plinkrev)
1600
1653
1601 return strippoint, brokenrevs
1654 return strippoint, brokenrevs
1602
1655
1603 def strip(self, minlink, transaction):
1656 def strip(self, minlink, transaction):
1604 """truncate the revlog on the first revision with a linkrev >= minlink
1657 """truncate the revlog on the first revision with a linkrev >= minlink
1605
1658
1606 This function is called when we're stripping revision minlink and
1659 This function is called when we're stripping revision minlink and
1607 its descendants from the repository.
1660 its descendants from the repository.
1608
1661
1609 We have to remove all revisions with linkrev >= minlink, because
1662 We have to remove all revisions with linkrev >= minlink, because
1610 the equivalent changelog revisions will be renumbered after the
1663 the equivalent changelog revisions will be renumbered after the
1611 strip.
1664 strip.
1612
1665
1613 So we truncate the revlog on the first of these revisions, and
1666 So we truncate the revlog on the first of these revisions, and
1614 trust that the caller has saved the revisions that shouldn't be
1667 trust that the caller has saved the revisions that shouldn't be
1615 removed and that it'll re-add them after this truncation.
1668 removed and that it'll re-add them after this truncation.
1616 """
1669 """
1617 if len(self) == 0:
1670 if len(self) == 0:
1618 return
1671 return
1619
1672
1620 rev, _ = self.getstrippoint(minlink)
1673 rev, _ = self.getstrippoint(minlink)
1621 if rev == len(self):
1674 if rev == len(self):
1622 return
1675 return
1623
1676
1624 # first truncate the files on disk
1677 # first truncate the files on disk
1625 end = self.start(rev)
1678 end = self.start(rev)
1626 if not self._inline:
1679 if not self._inline:
1627 transaction.add(self.datafile, end)
1680 transaction.add(self.datafile, end)
1628 end = rev * self._io.size
1681 end = rev * self._io.size
1629 else:
1682 else:
1630 end += rev * self._io.size
1683 end += rev * self._io.size
1631
1684
1632 transaction.add(self.indexfile, end)
1685 transaction.add(self.indexfile, end)
1633
1686
1634 # then reset internal state in memory to forget those revisions
1687 # then reset internal state in memory to forget those revisions
1635 self._cache = None
1688 self._cache = None
1636 self._chaininfocache = {}
1689 self._chaininfocache = {}
1637 self._chunkclear()
1690 self._chunkclear()
1638 for x in xrange(rev, len(self)):
1691 for x in xrange(rev, len(self)):
1639 del self.nodemap[self.node(x)]
1692 del self.nodemap[self.node(x)]
1640
1693
1641 del self.index[rev:-1]
1694 del self.index[rev:-1]
1642
1695
1643 def checksize(self):
1696 def checksize(self):
1644 expected = 0
1697 expected = 0
1645 if len(self):
1698 if len(self):
1646 expected = max(0, self.end(len(self) - 1))
1699 expected = max(0, self.end(len(self) - 1))
1647
1700
1648 try:
1701 try:
1649 f = self.opener(self.datafile)
1702 f = self.opener(self.datafile)
1650 f.seek(0, 2)
1703 f.seek(0, 2)
1651 actual = f.tell()
1704 actual = f.tell()
1652 f.close()
1705 f.close()
1653 dd = actual - expected
1706 dd = actual - expected
1654 except IOError as inst:
1707 except IOError as inst:
1655 if inst.errno != errno.ENOENT:
1708 if inst.errno != errno.ENOENT:
1656 raise
1709 raise
1657 dd = 0
1710 dd = 0
1658
1711
1659 try:
1712 try:
1660 f = self.opener(self.indexfile)
1713 f = self.opener(self.indexfile)
1661 f.seek(0, 2)
1714 f.seek(0, 2)
1662 actual = f.tell()
1715 actual = f.tell()
1663 f.close()
1716 f.close()
1664 s = self._io.size
1717 s = self._io.size
1665 i = max(0, actual // s)
1718 i = max(0, actual // s)
1666 di = actual - (i * s)
1719 di = actual - (i * s)
1667 if self._inline:
1720 if self._inline:
1668 databytes = 0
1721 databytes = 0
1669 for r in self:
1722 for r in self:
1670 databytes += max(0, self.length(r))
1723 databytes += max(0, self.length(r))
1671 dd = 0
1724 dd = 0
1672 di = actual - len(self) * s - databytes
1725 di = actual - len(self) * s - databytes
1673 except IOError as inst:
1726 except IOError as inst:
1674 if inst.errno != errno.ENOENT:
1727 if inst.errno != errno.ENOENT:
1675 raise
1728 raise
1676 di = 0
1729 di = 0
1677
1730
1678 return (dd, di)
1731 return (dd, di)
1679
1732
1680 def files(self):
1733 def files(self):
1681 res = [self.indexfile]
1734 res = [self.indexfile]
1682 if not self._inline:
1735 if not self._inline:
1683 res.append(self.datafile)
1736 res.append(self.datafile)
1684 return res
1737 return res
General Comments 0
You need to be logged in to leave comments. Login now