##// END OF EJS Templates
revlog: group delta computation methods under _deltacomputer object...
Paul Morelle -
r35756:f90f6fd1 default
parent child Browse files
Show More
@@ -1,2466 +1,2491 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
268 def __init__(self, revlog):
269 self.revlog = revlog
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
275 """
276 revlog = self.revlog
277 curr = len(revlog)
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
293
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
302 tested.update(parents)
303 yield parents
304
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
308 yield (prev,)
309
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
312
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
316 """
317 btext = revinfo.btext
318 if btext[0] is not None:
319 return btext[0]
320
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
324 node = revinfo.node
325
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
338
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
349 raise
350 return btext[0]
351
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
363
364 return delta
365
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
379 deltabase = base
380 else:
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
387
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
390
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
394
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
397 """
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
400 p2 = revinfo.p2
401 revlog = self.revlog
402
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
413
414 return deltainfo
415
267 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
268 class _revisioninfo(object):
417 class _revisioninfo(object):
269 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
270 node: expected hash of the revision
419 node: expected hash of the revision
271 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
272 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
273 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
274 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
275
424
276 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
277 """
426 """
278 node = attr.ib()
427 node = attr.ib()
279 p1 = attr.ib()
428 p1 = attr.ib()
280 p2 = attr.ib()
429 p2 = attr.ib()
281 btext = attr.ib()
430 btext = attr.ib()
282 textlen = attr.ib()
431 textlen = attr.ib()
283 cachedelta = attr.ib()
432 cachedelta = attr.ib()
284 flags = attr.ib()
433 flags = attr.ib()
285
434
286 # index v0:
435 # index v0:
287 # 4 bytes: offset
436 # 4 bytes: offset
288 # 4 bytes: compressed length
437 # 4 bytes: compressed length
289 # 4 bytes: base rev
438 # 4 bytes: base rev
290 # 4 bytes: link rev
439 # 4 bytes: link rev
291 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
292 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
293 # 20 bytes: nodeid
442 # 20 bytes: nodeid
294 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
295 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
296 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
297
446
298 class revlogoldio(object):
447 class revlogoldio(object):
299 def __init__(self):
448 def __init__(self):
300 self.size = indexformatv0.size
449 self.size = indexformatv0.size
301
450
302 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
303 s = self.size
452 s = self.size
304 index = []
453 index = []
305 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
306 n = off = 0
455 n = off = 0
307 l = len(data)
456 l = len(data)
308 while off + s <= l:
457 while off + s <= l:
309 cur = data[off:off + s]
458 cur = data[off:off + s]
310 off += s
459 off += s
311 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
312 # transform to revlogv1 format
461 # transform to revlogv1 format
313 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
314 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
315 index.append(e2)
464 index.append(e2)
316 nodemap[e[6]] = n
465 nodemap[e[6]] = n
317 n += 1
466 n += 1
318
467
319 # add the magic null revision at -1
468 # add the magic null revision at -1
320 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
321
470
322 return index, nodemap, None
471 return index, nodemap, None
323
472
324 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
325 if gettype(entry[0]):
474 if gettype(entry[0]):
326 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
327 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
328 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
329 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
330
479
331 # index ng:
480 # index ng:
332 # 6 bytes: offset
481 # 6 bytes: offset
333 # 2 bytes: flags
482 # 2 bytes: flags
334 # 4 bytes: compressed length
483 # 4 bytes: compressed length
335 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
336 # 4 bytes: base rev
485 # 4 bytes: base rev
337 # 4 bytes: link rev
486 # 4 bytes: link rev
338 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
339 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
340 # 32 bytes: nodeid
489 # 32 bytes: nodeid
341 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
342 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
343 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
344 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
345 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
346
495
347 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 # signed integer)
497 # signed integer)
349 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
350
499
351 class revlogio(object):
500 class revlogio(object):
352 def __init__(self):
501 def __init__(self):
353 self.size = indexformatng.size
502 self.size = indexformatng.size
354
503
355 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
356 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
357 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
358 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
359
508
360 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
361 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
362 if rev == 0:
511 if rev == 0:
363 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
364 return p
513 return p
365
514
366 class revlog(object):
515 class revlog(object):
367 """
516 """
368 the underlying revision storage object
517 the underlying revision storage object
369
518
370 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
371
520
372 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
373 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
374 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
375 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
376 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
377 data.
526 data.
378
527
379 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
380 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
381 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
382 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
383 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
384 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
385
534
386 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
387 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
388 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
389 for locking while reading.
538 for locking while reading.
390
539
391 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
392 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
393
542
394 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
395 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
396 configured threshold.
545 configured threshold.
397 """
546 """
398 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
399 mmaplargeindex=False):
548 mmaplargeindex=False):
400 """
549 """
401 create a revlog object
550 create a revlog object
402
551
403 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
404 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
405 """
554 """
406 self.indexfile = indexfile
555 self.indexfile = indexfile
407 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
408 self.opener = opener
557 self.opener = opener
409 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
410 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
411 self._checkambig = checkambig
560 self._checkambig = checkambig
412 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
413 self._cache = None
562 self._cache = None
414 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
415 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
416 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
417 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
418 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
419 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
420 self._maxchainlen = None
569 self._maxchainlen = None
421 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
422 self.index = []
571 self.index = []
423 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
424 self._pcache = {}
573 self._pcache = {}
425 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
426 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
427 self._nodepos = None
576 self._nodepos = None
428 self._compengine = 'zlib'
577 self._compengine = 'zlib'
429 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
430 self._withsparseread = False
579 self._withsparseread = False
431 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
432 self._srmingapsize = 262144
581 self._srmingapsize = 262144
433
582
434 mmapindexthreshold = None
583 mmapindexthreshold = None
435 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
436 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
437 if opts is not None:
586 if opts is not None:
438 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
439 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
440 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
441 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
442 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
443 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
444 else:
593 else:
445 v = 0
594 v = 0
446 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
447 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
448 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
449 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
450 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
451 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
452 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
453 if 'compengine' in opts:
602 if 'compengine' in opts:
454 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
455 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
456 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
457 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
458 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
459 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
460 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
462 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
464
613
465 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
466 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
467 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
468 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
469 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
470 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
471
620
472 indexdata = ''
621 indexdata = ''
473 self._initempty = True
622 self._initempty = True
474 try:
623 try:
475 f = self.opener(self.indexfile)
624 f = self.opener(self.indexfile)
476 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
477 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
478 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
479 else:
628 else:
480 indexdata = f.read()
629 indexdata = f.read()
481 f.close()
630 f.close()
482 if len(indexdata) > 0:
631 if len(indexdata) > 0:
483 v = versionformat_unpack(indexdata[:4])[0]
632 v = versionformat_unpack(indexdata[:4])[0]
484 self._initempty = False
633 self._initempty = False
485 except IOError as inst:
634 except IOError as inst:
486 if inst.errno != errno.ENOENT:
635 if inst.errno != errno.ENOENT:
487 raise
636 raise
488
637
489 self.version = v
638 self.version = v
490 self._inline = v & FLAG_INLINE_DATA
639 self._inline = v & FLAG_INLINE_DATA
491 self._generaldelta = v & FLAG_GENERALDELTA
640 self._generaldelta = v & FLAG_GENERALDELTA
492 flags = v & ~0xFFFF
641 flags = v & ~0xFFFF
493 fmt = v & 0xFFFF
642 fmt = v & 0xFFFF
494 if fmt == REVLOGV0:
643 if fmt == REVLOGV0:
495 if flags:
644 if flags:
496 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
497 'revlog %s') %
646 'revlog %s') %
498 (flags >> 16, fmt, self.indexfile))
647 (flags >> 16, fmt, self.indexfile))
499 elif fmt == REVLOGV1:
648 elif fmt == REVLOGV1:
500 if flags & ~REVLOGV1_FLAGS:
649 if flags & ~REVLOGV1_FLAGS:
501 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
502 'revlog %s') %
651 'revlog %s') %
503 (flags >> 16, fmt, self.indexfile))
652 (flags >> 16, fmt, self.indexfile))
504 elif fmt == REVLOGV2:
653 elif fmt == REVLOGV2:
505 if flags & ~REVLOGV2_FLAGS:
654 if flags & ~REVLOGV2_FLAGS:
506 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
507 'revlog %s') %
656 'revlog %s') %
508 (flags >> 16, fmt, self.indexfile))
657 (flags >> 16, fmt, self.indexfile))
509 else:
658 else:
510 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
511 (fmt, self.indexfile))
660 (fmt, self.indexfile))
512
661
513 self.storedeltachains = True
662 self.storedeltachains = True
514
663
515 self._io = revlogio()
664 self._io = revlogio()
516 if self.version == REVLOGV0:
665 if self.version == REVLOGV0:
517 self._io = revlogoldio()
666 self._io = revlogoldio()
518 try:
667 try:
519 d = self._io.parseindex(indexdata, self._inline)
668 d = self._io.parseindex(indexdata, self._inline)
520 except (ValueError, IndexError):
669 except (ValueError, IndexError):
521 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
522 self.index, nodemap, self._chunkcache = d
671 self.index, nodemap, self._chunkcache = d
523 if nodemap is not None:
672 if nodemap is not None:
524 self.nodemap = self._nodecache = nodemap
673 self.nodemap = self._nodecache = nodemap
525 if not self._chunkcache:
674 if not self._chunkcache:
526 self._chunkclear()
675 self._chunkclear()
527 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
528 self._chaininfocache = {}
677 self._chaininfocache = {}
529 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
530 self._decompressors = {}
679 self._decompressors = {}
531
680
532 @util.propertycache
681 @util.propertycache
533 def _compressor(self):
682 def _compressor(self):
534 return util.compengines[self._compengine].revlogcompressor()
683 return util.compengines[self._compengine].revlogcompressor()
535
684
536 def tip(self):
685 def tip(self):
537 return self.node(len(self.index) - 2)
686 return self.node(len(self.index) - 2)
538 def __contains__(self, rev):
687 def __contains__(self, rev):
539 return 0 <= rev < len(self)
688 return 0 <= rev < len(self)
540 def __len__(self):
689 def __len__(self):
541 return len(self.index) - 1
690 return len(self.index) - 1
542 def __iter__(self):
691 def __iter__(self):
543 return iter(xrange(len(self)))
692 return iter(xrange(len(self)))
544 def revs(self, start=0, stop=None):
693 def revs(self, start=0, stop=None):
545 """iterate over all rev in this revlog (from start to stop)"""
694 """iterate over all rev in this revlog (from start to stop)"""
546 step = 1
695 step = 1
547 if stop is not None:
696 if stop is not None:
548 if start > stop:
697 if start > stop:
549 step = -1
698 step = -1
550 stop += step
699 stop += step
551 else:
700 else:
552 stop = len(self)
701 stop = len(self)
553 return xrange(start, stop, step)
702 return xrange(start, stop, step)
554
703
555 @util.propertycache
704 @util.propertycache
556 def nodemap(self):
705 def nodemap(self):
557 self.rev(self.node(0))
706 self.rev(self.node(0))
558 return self._nodecache
707 return self._nodecache
559
708
560 def hasnode(self, node):
709 def hasnode(self, node):
561 try:
710 try:
562 self.rev(node)
711 self.rev(node)
563 return True
712 return True
564 except KeyError:
713 except KeyError:
565 return False
714 return False
566
715
567 def clearcaches(self):
716 def clearcaches(self):
568 self._cache = None
717 self._cache = None
569 self._chainbasecache.clear()
718 self._chainbasecache.clear()
570 self._chunkcache = (0, '')
719 self._chunkcache = (0, '')
571 self._pcache = {}
720 self._pcache = {}
572
721
573 try:
722 try:
574 self._nodecache.clearcaches()
723 self._nodecache.clearcaches()
575 except AttributeError:
724 except AttributeError:
576 self._nodecache = {nullid: nullrev}
725 self._nodecache = {nullid: nullrev}
577 self._nodepos = None
726 self._nodepos = None
578
727
579 def rev(self, node):
728 def rev(self, node):
580 try:
729 try:
581 return self._nodecache[node]
730 return self._nodecache[node]
582 except TypeError:
731 except TypeError:
583 raise
732 raise
584 except RevlogError:
733 except RevlogError:
585 # parsers.c radix tree lookup failed
734 # parsers.c radix tree lookup failed
586 if node == wdirid:
735 if node == wdirid:
587 raise error.WdirUnsupported
736 raise error.WdirUnsupported
588 raise LookupError(node, self.indexfile, _('no node'))
737 raise LookupError(node, self.indexfile, _('no node'))
589 except KeyError:
738 except KeyError:
590 # pure python cache lookup failed
739 # pure python cache lookup failed
591 n = self._nodecache
740 n = self._nodecache
592 i = self.index
741 i = self.index
593 p = self._nodepos
742 p = self._nodepos
594 if p is None:
743 if p is None:
595 p = len(i) - 2
744 p = len(i) - 2
596 for r in xrange(p, -1, -1):
745 for r in xrange(p, -1, -1):
597 v = i[r][7]
746 v = i[r][7]
598 n[v] = r
747 n[v] = r
599 if v == node:
748 if v == node:
600 self._nodepos = r - 1
749 self._nodepos = r - 1
601 return r
750 return r
602 if node == wdirid:
751 if node == wdirid:
603 raise error.WdirUnsupported
752 raise error.WdirUnsupported
604 raise LookupError(node, self.indexfile, _('no node'))
753 raise LookupError(node, self.indexfile, _('no node'))
605
754
606 # Accessors for index entries.
755 # Accessors for index entries.
607
756
608 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
757 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
609 # are flags.
758 # are flags.
610 def start(self, rev):
759 def start(self, rev):
611 return int(self.index[rev][0] >> 16)
760 return int(self.index[rev][0] >> 16)
612
761
613 def flags(self, rev):
762 def flags(self, rev):
614 return self.index[rev][0] & 0xFFFF
763 return self.index[rev][0] & 0xFFFF
615
764
616 def length(self, rev):
765 def length(self, rev):
617 return self.index[rev][1]
766 return self.index[rev][1]
618
767
619 def rawsize(self, rev):
768 def rawsize(self, rev):
620 """return the length of the uncompressed text for a given revision"""
769 """return the length of the uncompressed text for a given revision"""
621 l = self.index[rev][2]
770 l = self.index[rev][2]
622 if l >= 0:
771 if l >= 0:
623 return l
772 return l
624
773
625 t = self.revision(rev, raw=True)
774 t = self.revision(rev, raw=True)
626 return len(t)
775 return len(t)
627
776
628 def size(self, rev):
777 def size(self, rev):
629 """length of non-raw text (processed by a "read" flag processor)"""
778 """length of non-raw text (processed by a "read" flag processor)"""
630 # fast path: if no "read" flag processor could change the content,
779 # fast path: if no "read" flag processor could change the content,
631 # size is rawsize. note: ELLIPSIS is known to not change the content.
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
632 flags = self.flags(rev)
781 flags = self.flags(rev)
633 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
782 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
634 return self.rawsize(rev)
783 return self.rawsize(rev)
635
784
636 return len(self.revision(rev, raw=False))
785 return len(self.revision(rev, raw=False))
637
786
638 def chainbase(self, rev):
787 def chainbase(self, rev):
639 base = self._chainbasecache.get(rev)
788 base = self._chainbasecache.get(rev)
640 if base is not None:
789 if base is not None:
641 return base
790 return base
642
791
643 index = self.index
792 index = self.index
644 base = index[rev][3]
793 base = index[rev][3]
645 while base != rev:
794 while base != rev:
646 rev = base
795 rev = base
647 base = index[rev][3]
796 base = index[rev][3]
648
797
649 self._chainbasecache[rev] = base
798 self._chainbasecache[rev] = base
650 return base
799 return base
651
800
652 def linkrev(self, rev):
801 def linkrev(self, rev):
653 return self.index[rev][4]
802 return self.index[rev][4]
654
803
655 def parentrevs(self, rev):
804 def parentrevs(self, rev):
656 try:
805 try:
657 entry = self.index[rev]
806 entry = self.index[rev]
658 except IndexError:
807 except IndexError:
659 if rev == wdirrev:
808 if rev == wdirrev:
660 raise error.WdirUnsupported
809 raise error.WdirUnsupported
661 raise
810 raise
662
811
663 return entry[5], entry[6]
812 return entry[5], entry[6]
664
813
665 def node(self, rev):
814 def node(self, rev):
666 try:
815 try:
667 return self.index[rev][7]
816 return self.index[rev][7]
668 except IndexError:
817 except IndexError:
669 if rev == wdirrev:
818 if rev == wdirrev:
670 raise error.WdirUnsupported
819 raise error.WdirUnsupported
671 raise
820 raise
672
821
673 # Derived from index values.
822 # Derived from index values.
674
823
675 def end(self, rev):
824 def end(self, rev):
676 return self.start(rev) + self.length(rev)
825 return self.start(rev) + self.length(rev)
677
826
678 def parents(self, node):
827 def parents(self, node):
679 i = self.index
828 i = self.index
680 d = i[self.rev(node)]
829 d = i[self.rev(node)]
681 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
830 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
682
831
683 def chainlen(self, rev):
832 def chainlen(self, rev):
684 return self._chaininfo(rev)[0]
833 return self._chaininfo(rev)[0]
685
834
686 def _chaininfo(self, rev):
835 def _chaininfo(self, rev):
687 chaininfocache = self._chaininfocache
836 chaininfocache = self._chaininfocache
688 if rev in chaininfocache:
837 if rev in chaininfocache:
689 return chaininfocache[rev]
838 return chaininfocache[rev]
690 index = self.index
839 index = self.index
691 generaldelta = self._generaldelta
840 generaldelta = self._generaldelta
692 iterrev = rev
841 iterrev = rev
693 e = index[iterrev]
842 e = index[iterrev]
694 clen = 0
843 clen = 0
695 compresseddeltalen = 0
844 compresseddeltalen = 0
696 while iterrev != e[3]:
845 while iterrev != e[3]:
697 clen += 1
846 clen += 1
698 compresseddeltalen += e[1]
847 compresseddeltalen += e[1]
699 if generaldelta:
848 if generaldelta:
700 iterrev = e[3]
849 iterrev = e[3]
701 else:
850 else:
702 iterrev -= 1
851 iterrev -= 1
703 if iterrev in chaininfocache:
852 if iterrev in chaininfocache:
704 t = chaininfocache[iterrev]
853 t = chaininfocache[iterrev]
705 clen += t[0]
854 clen += t[0]
706 compresseddeltalen += t[1]
855 compresseddeltalen += t[1]
707 break
856 break
708 e = index[iterrev]
857 e = index[iterrev]
709 else:
858 else:
710 # Add text length of base since decompressing that also takes
859 # Add text length of base since decompressing that also takes
711 # work. For cache hits the length is already included.
860 # work. For cache hits the length is already included.
712 compresseddeltalen += e[1]
861 compresseddeltalen += e[1]
713 r = (clen, compresseddeltalen)
862 r = (clen, compresseddeltalen)
714 chaininfocache[rev] = r
863 chaininfocache[rev] = r
715 return r
864 return r
716
865
717 def _deltachain(self, rev, stoprev=None):
866 def _deltachain(self, rev, stoprev=None):
718 """Obtain the delta chain for a revision.
867 """Obtain the delta chain for a revision.
719
868
720 ``stoprev`` specifies a revision to stop at. If not specified, we
869 ``stoprev`` specifies a revision to stop at. If not specified, we
721 stop at the base of the chain.
870 stop at the base of the chain.
722
871
723 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
872 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
724 revs in ascending order and ``stopped`` is a bool indicating whether
873 revs in ascending order and ``stopped`` is a bool indicating whether
725 ``stoprev`` was hit.
874 ``stoprev`` was hit.
726 """
875 """
727 # Try C implementation.
876 # Try C implementation.
728 try:
877 try:
729 return self.index.deltachain(rev, stoprev, self._generaldelta)
878 return self.index.deltachain(rev, stoprev, self._generaldelta)
730 except AttributeError:
879 except AttributeError:
731 pass
880 pass
732
881
733 chain = []
882 chain = []
734
883
735 # Alias to prevent attribute lookup in tight loop.
884 # Alias to prevent attribute lookup in tight loop.
736 index = self.index
885 index = self.index
737 generaldelta = self._generaldelta
886 generaldelta = self._generaldelta
738
887
739 iterrev = rev
888 iterrev = rev
740 e = index[iterrev]
889 e = index[iterrev]
741 while iterrev != e[3] and iterrev != stoprev:
890 while iterrev != e[3] and iterrev != stoprev:
742 chain.append(iterrev)
891 chain.append(iterrev)
743 if generaldelta:
892 if generaldelta:
744 iterrev = e[3]
893 iterrev = e[3]
745 else:
894 else:
746 iterrev -= 1
895 iterrev -= 1
747 e = index[iterrev]
896 e = index[iterrev]
748
897
749 if iterrev == stoprev:
898 if iterrev == stoprev:
750 stopped = True
899 stopped = True
751 else:
900 else:
752 chain.append(iterrev)
901 chain.append(iterrev)
753 stopped = False
902 stopped = False
754
903
755 chain.reverse()
904 chain.reverse()
756 return chain, stopped
905 return chain, stopped
757
906
758 def ancestors(self, revs, stoprev=0, inclusive=False):
907 def ancestors(self, revs, stoprev=0, inclusive=False):
759 """Generate the ancestors of 'revs' in reverse topological order.
908 """Generate the ancestors of 'revs' in reverse topological order.
760 Does not generate revs lower than stoprev.
909 Does not generate revs lower than stoprev.
761
910
762 See the documentation for ancestor.lazyancestors for more details."""
911 See the documentation for ancestor.lazyancestors for more details."""
763
912
764 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
913 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
765 inclusive=inclusive)
914 inclusive=inclusive)
766
915
767 def descendants(self, revs):
916 def descendants(self, revs):
768 """Generate the descendants of 'revs' in revision order.
917 """Generate the descendants of 'revs' in revision order.
769
918
770 Yield a sequence of revision numbers starting with a child of
919 Yield a sequence of revision numbers starting with a child of
771 some rev in revs, i.e., each revision is *not* considered a
920 some rev in revs, i.e., each revision is *not* considered a
772 descendant of itself. Results are ordered by revision number (a
921 descendant of itself. Results are ordered by revision number (a
773 topological sort)."""
922 topological sort)."""
774 first = min(revs)
923 first = min(revs)
775 if first == nullrev:
924 if first == nullrev:
776 for i in self:
925 for i in self:
777 yield i
926 yield i
778 return
927 return
779
928
780 seen = set(revs)
929 seen = set(revs)
781 for i in self.revs(start=first + 1):
930 for i in self.revs(start=first + 1):
782 for x in self.parentrevs(i):
931 for x in self.parentrevs(i):
783 if x != nullrev and x in seen:
932 if x != nullrev and x in seen:
784 seen.add(i)
933 seen.add(i)
785 yield i
934 yield i
786 break
935 break
787
936
788 def findcommonmissing(self, common=None, heads=None):
937 def findcommonmissing(self, common=None, heads=None):
789 """Return a tuple of the ancestors of common and the ancestors of heads
938 """Return a tuple of the ancestors of common and the ancestors of heads
790 that are not ancestors of common. In revset terminology, we return the
939 that are not ancestors of common. In revset terminology, we return the
791 tuple:
940 tuple:
792
941
793 ::common, (::heads) - (::common)
942 ::common, (::heads) - (::common)
794
943
795 The list is sorted by revision number, meaning it is
944 The list is sorted by revision number, meaning it is
796 topologically sorted.
945 topologically sorted.
797
946
798 'heads' and 'common' are both lists of node IDs. If heads is
947 'heads' and 'common' are both lists of node IDs. If heads is
799 not supplied, uses all of the revlog's heads. If common is not
948 not supplied, uses all of the revlog's heads. If common is not
800 supplied, uses nullid."""
949 supplied, uses nullid."""
801 if common is None:
950 if common is None:
802 common = [nullid]
951 common = [nullid]
803 if heads is None:
952 if heads is None:
804 heads = self.heads()
953 heads = self.heads()
805
954
806 common = [self.rev(n) for n in common]
955 common = [self.rev(n) for n in common]
807 heads = [self.rev(n) for n in heads]
956 heads = [self.rev(n) for n in heads]
808
957
809 # we want the ancestors, but inclusive
958 # we want the ancestors, but inclusive
810 class lazyset(object):
959 class lazyset(object):
811 def __init__(self, lazyvalues):
960 def __init__(self, lazyvalues):
812 self.addedvalues = set()
961 self.addedvalues = set()
813 self.lazyvalues = lazyvalues
962 self.lazyvalues = lazyvalues
814
963
815 def __contains__(self, value):
964 def __contains__(self, value):
816 return value in self.addedvalues or value in self.lazyvalues
965 return value in self.addedvalues or value in self.lazyvalues
817
966
818 def __iter__(self):
967 def __iter__(self):
819 added = self.addedvalues
968 added = self.addedvalues
820 for r in added:
969 for r in added:
821 yield r
970 yield r
822 for r in self.lazyvalues:
971 for r in self.lazyvalues:
823 if not r in added:
972 if not r in added:
824 yield r
973 yield r
825
974
826 def add(self, value):
975 def add(self, value):
827 self.addedvalues.add(value)
976 self.addedvalues.add(value)
828
977
829 def update(self, values):
978 def update(self, values):
830 self.addedvalues.update(values)
979 self.addedvalues.update(values)
831
980
832 has = lazyset(self.ancestors(common))
981 has = lazyset(self.ancestors(common))
833 has.add(nullrev)
982 has.add(nullrev)
834 has.update(common)
983 has.update(common)
835
984
836 # take all ancestors from heads that aren't in has
985 # take all ancestors from heads that aren't in has
837 missing = set()
986 missing = set()
838 visit = collections.deque(r for r in heads if r not in has)
987 visit = collections.deque(r for r in heads if r not in has)
839 while visit:
988 while visit:
840 r = visit.popleft()
989 r = visit.popleft()
841 if r in missing:
990 if r in missing:
842 continue
991 continue
843 else:
992 else:
844 missing.add(r)
993 missing.add(r)
845 for p in self.parentrevs(r):
994 for p in self.parentrevs(r):
846 if p not in has:
995 if p not in has:
847 visit.append(p)
996 visit.append(p)
848 missing = list(missing)
997 missing = list(missing)
849 missing.sort()
998 missing.sort()
850 return has, [self.node(miss) for miss in missing]
999 return has, [self.node(miss) for miss in missing]
851
1000
852 def incrementalmissingrevs(self, common=None):
1001 def incrementalmissingrevs(self, common=None):
853 """Return an object that can be used to incrementally compute the
1002 """Return an object that can be used to incrementally compute the
854 revision numbers of the ancestors of arbitrary sets that are not
1003 revision numbers of the ancestors of arbitrary sets that are not
855 ancestors of common. This is an ancestor.incrementalmissingancestors
1004 ancestors of common. This is an ancestor.incrementalmissingancestors
856 object.
1005 object.
857
1006
858 'common' is a list of revision numbers. If common is not supplied, uses
1007 'common' is a list of revision numbers. If common is not supplied, uses
859 nullrev.
1008 nullrev.
860 """
1009 """
861 if common is None:
1010 if common is None:
862 common = [nullrev]
1011 common = [nullrev]
863
1012
864 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1013 return ancestor.incrementalmissingancestors(self.parentrevs, common)
865
1014
866 def findmissingrevs(self, common=None, heads=None):
1015 def findmissingrevs(self, common=None, heads=None):
867 """Return the revision numbers of the ancestors of heads that
1016 """Return the revision numbers of the ancestors of heads that
868 are not ancestors of common.
1017 are not ancestors of common.
869
1018
870 More specifically, return a list of revision numbers corresponding to
1019 More specifically, return a list of revision numbers corresponding to
871 nodes N such that every N satisfies the following constraints:
1020 nodes N such that every N satisfies the following constraints:
872
1021
873 1. N is an ancestor of some node in 'heads'
1022 1. N is an ancestor of some node in 'heads'
874 2. N is not an ancestor of any node in 'common'
1023 2. N is not an ancestor of any node in 'common'
875
1024
876 The list is sorted by revision number, meaning it is
1025 The list is sorted by revision number, meaning it is
877 topologically sorted.
1026 topologically sorted.
878
1027
879 'heads' and 'common' are both lists of revision numbers. If heads is
1028 'heads' and 'common' are both lists of revision numbers. If heads is
880 not supplied, uses all of the revlog's heads. If common is not
1029 not supplied, uses all of the revlog's heads. If common is not
881 supplied, uses nullid."""
1030 supplied, uses nullid."""
882 if common is None:
1031 if common is None:
883 common = [nullrev]
1032 common = [nullrev]
884 if heads is None:
1033 if heads is None:
885 heads = self.headrevs()
1034 heads = self.headrevs()
886
1035
887 inc = self.incrementalmissingrevs(common=common)
1036 inc = self.incrementalmissingrevs(common=common)
888 return inc.missingancestors(heads)
1037 return inc.missingancestors(heads)
889
1038
890 def findmissing(self, common=None, heads=None):
1039 def findmissing(self, common=None, heads=None):
891 """Return the ancestors of heads that are not ancestors of common.
1040 """Return the ancestors of heads that are not ancestors of common.
892
1041
893 More specifically, return a list of nodes N such that every N
1042 More specifically, return a list of nodes N such that every N
894 satisfies the following constraints:
1043 satisfies the following constraints:
895
1044
896 1. N is an ancestor of some node in 'heads'
1045 1. N is an ancestor of some node in 'heads'
897 2. N is not an ancestor of any node in 'common'
1046 2. N is not an ancestor of any node in 'common'
898
1047
899 The list is sorted by revision number, meaning it is
1048 The list is sorted by revision number, meaning it is
900 topologically sorted.
1049 topologically sorted.
901
1050
902 'heads' and 'common' are both lists of node IDs. If heads is
1051 'heads' and 'common' are both lists of node IDs. If heads is
903 not supplied, uses all of the revlog's heads. If common is not
1052 not supplied, uses all of the revlog's heads. If common is not
904 supplied, uses nullid."""
1053 supplied, uses nullid."""
905 if common is None:
1054 if common is None:
906 common = [nullid]
1055 common = [nullid]
907 if heads is None:
1056 if heads is None:
908 heads = self.heads()
1057 heads = self.heads()
909
1058
910 common = [self.rev(n) for n in common]
1059 common = [self.rev(n) for n in common]
911 heads = [self.rev(n) for n in heads]
1060 heads = [self.rev(n) for n in heads]
912
1061
913 inc = self.incrementalmissingrevs(common=common)
1062 inc = self.incrementalmissingrevs(common=common)
914 return [self.node(r) for r in inc.missingancestors(heads)]
1063 return [self.node(r) for r in inc.missingancestors(heads)]
915
1064
916 def nodesbetween(self, roots=None, heads=None):
1065 def nodesbetween(self, roots=None, heads=None):
917 """Return a topological path from 'roots' to 'heads'.
1066 """Return a topological path from 'roots' to 'heads'.
918
1067
919 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1068 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
920 topologically sorted list of all nodes N that satisfy both of
1069 topologically sorted list of all nodes N that satisfy both of
921 these constraints:
1070 these constraints:
922
1071
923 1. N is a descendant of some node in 'roots'
1072 1. N is a descendant of some node in 'roots'
924 2. N is an ancestor of some node in 'heads'
1073 2. N is an ancestor of some node in 'heads'
925
1074
926 Every node is considered to be both a descendant and an ancestor
1075 Every node is considered to be both a descendant and an ancestor
927 of itself, so every reachable node in 'roots' and 'heads' will be
1076 of itself, so every reachable node in 'roots' and 'heads' will be
928 included in 'nodes'.
1077 included in 'nodes'.
929
1078
930 'outroots' is the list of reachable nodes in 'roots', i.e., the
1079 'outroots' is the list of reachable nodes in 'roots', i.e., the
931 subset of 'roots' that is returned in 'nodes'. Likewise,
1080 subset of 'roots' that is returned in 'nodes'. Likewise,
932 'outheads' is the subset of 'heads' that is also in 'nodes'.
1081 'outheads' is the subset of 'heads' that is also in 'nodes'.
933
1082
934 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1083 'roots' and 'heads' are both lists of node IDs. If 'roots' is
935 unspecified, uses nullid as the only root. If 'heads' is
1084 unspecified, uses nullid as the only root. If 'heads' is
936 unspecified, uses list of all of the revlog's heads."""
1085 unspecified, uses list of all of the revlog's heads."""
937 nonodes = ([], [], [])
1086 nonodes = ([], [], [])
938 if roots is not None:
1087 if roots is not None:
939 roots = list(roots)
1088 roots = list(roots)
940 if not roots:
1089 if not roots:
941 return nonodes
1090 return nonodes
942 lowestrev = min([self.rev(n) for n in roots])
1091 lowestrev = min([self.rev(n) for n in roots])
943 else:
1092 else:
944 roots = [nullid] # Everybody's a descendant of nullid
1093 roots = [nullid] # Everybody's a descendant of nullid
945 lowestrev = nullrev
1094 lowestrev = nullrev
946 if (lowestrev == nullrev) and (heads is None):
1095 if (lowestrev == nullrev) and (heads is None):
947 # We want _all_ the nodes!
1096 # We want _all_ the nodes!
948 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1097 return ([self.node(r) for r in self], [nullid], list(self.heads()))
949 if heads is None:
1098 if heads is None:
950 # All nodes are ancestors, so the latest ancestor is the last
1099 # All nodes are ancestors, so the latest ancestor is the last
951 # node.
1100 # node.
952 highestrev = len(self) - 1
1101 highestrev = len(self) - 1
953 # Set ancestors to None to signal that every node is an ancestor.
1102 # Set ancestors to None to signal that every node is an ancestor.
954 ancestors = None
1103 ancestors = None
955 # Set heads to an empty dictionary for later discovery of heads
1104 # Set heads to an empty dictionary for later discovery of heads
956 heads = {}
1105 heads = {}
957 else:
1106 else:
958 heads = list(heads)
1107 heads = list(heads)
959 if not heads:
1108 if not heads:
960 return nonodes
1109 return nonodes
961 ancestors = set()
1110 ancestors = set()
962 # Turn heads into a dictionary so we can remove 'fake' heads.
1111 # Turn heads into a dictionary so we can remove 'fake' heads.
963 # Also, later we will be using it to filter out the heads we can't
1112 # Also, later we will be using it to filter out the heads we can't
964 # find from roots.
1113 # find from roots.
965 heads = dict.fromkeys(heads, False)
1114 heads = dict.fromkeys(heads, False)
966 # Start at the top and keep marking parents until we're done.
1115 # Start at the top and keep marking parents until we're done.
967 nodestotag = set(heads)
1116 nodestotag = set(heads)
968 # Remember where the top was so we can use it as a limit later.
1117 # Remember where the top was so we can use it as a limit later.
969 highestrev = max([self.rev(n) for n in nodestotag])
1118 highestrev = max([self.rev(n) for n in nodestotag])
970 while nodestotag:
1119 while nodestotag:
971 # grab a node to tag
1120 # grab a node to tag
972 n = nodestotag.pop()
1121 n = nodestotag.pop()
973 # Never tag nullid
1122 # Never tag nullid
974 if n == nullid:
1123 if n == nullid:
975 continue
1124 continue
976 # A node's revision number represents its place in a
1125 # A node's revision number represents its place in a
977 # topologically sorted list of nodes.
1126 # topologically sorted list of nodes.
978 r = self.rev(n)
1127 r = self.rev(n)
979 if r >= lowestrev:
1128 if r >= lowestrev:
980 if n not in ancestors:
1129 if n not in ancestors:
981 # If we are possibly a descendant of one of the roots
1130 # If we are possibly a descendant of one of the roots
982 # and we haven't already been marked as an ancestor
1131 # and we haven't already been marked as an ancestor
983 ancestors.add(n) # Mark as ancestor
1132 ancestors.add(n) # Mark as ancestor
984 # Add non-nullid parents to list of nodes to tag.
1133 # Add non-nullid parents to list of nodes to tag.
985 nodestotag.update([p for p in self.parents(n) if
1134 nodestotag.update([p for p in self.parents(n) if
986 p != nullid])
1135 p != nullid])
987 elif n in heads: # We've seen it before, is it a fake head?
1136 elif n in heads: # We've seen it before, is it a fake head?
988 # So it is, real heads should not be the ancestors of
1137 # So it is, real heads should not be the ancestors of
989 # any other heads.
1138 # any other heads.
990 heads.pop(n)
1139 heads.pop(n)
991 if not ancestors:
1140 if not ancestors:
992 return nonodes
1141 return nonodes
993 # Now that we have our set of ancestors, we want to remove any
1142 # Now that we have our set of ancestors, we want to remove any
994 # roots that are not ancestors.
1143 # roots that are not ancestors.
995
1144
996 # If one of the roots was nullid, everything is included anyway.
1145 # If one of the roots was nullid, everything is included anyway.
997 if lowestrev > nullrev:
1146 if lowestrev > nullrev:
998 # But, since we weren't, let's recompute the lowest rev to not
1147 # But, since we weren't, let's recompute the lowest rev to not
999 # include roots that aren't ancestors.
1148 # include roots that aren't ancestors.
1000
1149
1001 # Filter out roots that aren't ancestors of heads
1150 # Filter out roots that aren't ancestors of heads
1002 roots = [root for root in roots if root in ancestors]
1151 roots = [root for root in roots if root in ancestors]
1003 # Recompute the lowest revision
1152 # Recompute the lowest revision
1004 if roots:
1153 if roots:
1005 lowestrev = min([self.rev(root) for root in roots])
1154 lowestrev = min([self.rev(root) for root in roots])
1006 else:
1155 else:
1007 # No more roots? Return empty list
1156 # No more roots? Return empty list
1008 return nonodes
1157 return nonodes
1009 else:
1158 else:
1010 # We are descending from nullid, and don't need to care about
1159 # We are descending from nullid, and don't need to care about
1011 # any other roots.
1160 # any other roots.
1012 lowestrev = nullrev
1161 lowestrev = nullrev
1013 roots = [nullid]
1162 roots = [nullid]
1014 # Transform our roots list into a set.
1163 # Transform our roots list into a set.
1015 descendants = set(roots)
1164 descendants = set(roots)
1016 # Also, keep the original roots so we can filter out roots that aren't
1165 # Also, keep the original roots so we can filter out roots that aren't
1017 # 'real' roots (i.e. are descended from other roots).
1166 # 'real' roots (i.e. are descended from other roots).
1018 roots = descendants.copy()
1167 roots = descendants.copy()
1019 # Our topologically sorted list of output nodes.
1168 # Our topologically sorted list of output nodes.
1020 orderedout = []
1169 orderedout = []
1021 # Don't start at nullid since we don't want nullid in our output list,
1170 # Don't start at nullid since we don't want nullid in our output list,
1022 # and if nullid shows up in descendants, empty parents will look like
1171 # and if nullid shows up in descendants, empty parents will look like
1023 # they're descendants.
1172 # they're descendants.
1024 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1025 n = self.node(r)
1174 n = self.node(r)
1026 isdescendant = False
1175 isdescendant = False
1027 if lowestrev == nullrev: # Everybody is a descendant of nullid
1176 if lowestrev == nullrev: # Everybody is a descendant of nullid
1028 isdescendant = True
1177 isdescendant = True
1029 elif n in descendants:
1178 elif n in descendants:
1030 # n is already a descendant
1179 # n is already a descendant
1031 isdescendant = True
1180 isdescendant = True
1032 # This check only needs to be done here because all the roots
1181 # This check only needs to be done here because all the roots
1033 # will start being marked is descendants before the loop.
1182 # will start being marked is descendants before the loop.
1034 if n in roots:
1183 if n in roots:
1035 # If n was a root, check if it's a 'real' root.
1184 # If n was a root, check if it's a 'real' root.
1036 p = tuple(self.parents(n))
1185 p = tuple(self.parents(n))
1037 # If any of its parents are descendants, it's not a root.
1186 # If any of its parents are descendants, it's not a root.
1038 if (p[0] in descendants) or (p[1] in descendants):
1187 if (p[0] in descendants) or (p[1] in descendants):
1039 roots.remove(n)
1188 roots.remove(n)
1040 else:
1189 else:
1041 p = tuple(self.parents(n))
1190 p = tuple(self.parents(n))
1042 # A node is a descendant if either of its parents are
1191 # A node is a descendant if either of its parents are
1043 # descendants. (We seeded the dependents list with the roots
1192 # descendants. (We seeded the dependents list with the roots
1044 # up there, remember?)
1193 # up there, remember?)
1045 if (p[0] in descendants) or (p[1] in descendants):
1194 if (p[0] in descendants) or (p[1] in descendants):
1046 descendants.add(n)
1195 descendants.add(n)
1047 isdescendant = True
1196 isdescendant = True
1048 if isdescendant and ((ancestors is None) or (n in ancestors)):
1197 if isdescendant and ((ancestors is None) or (n in ancestors)):
1049 # Only include nodes that are both descendants and ancestors.
1198 # Only include nodes that are both descendants and ancestors.
1050 orderedout.append(n)
1199 orderedout.append(n)
1051 if (ancestors is not None) and (n in heads):
1200 if (ancestors is not None) and (n in heads):
1052 # We're trying to figure out which heads are reachable
1201 # We're trying to figure out which heads are reachable
1053 # from roots.
1202 # from roots.
1054 # Mark this head as having been reached
1203 # Mark this head as having been reached
1055 heads[n] = True
1204 heads[n] = True
1056 elif ancestors is None:
1205 elif ancestors is None:
1057 # Otherwise, we're trying to discover the heads.
1206 # Otherwise, we're trying to discover the heads.
1058 # Assume this is a head because if it isn't, the next step
1207 # Assume this is a head because if it isn't, the next step
1059 # will eventually remove it.
1208 # will eventually remove it.
1060 heads[n] = True
1209 heads[n] = True
1061 # But, obviously its parents aren't.
1210 # But, obviously its parents aren't.
1062 for p in self.parents(n):
1211 for p in self.parents(n):
1063 heads.pop(p, None)
1212 heads.pop(p, None)
1064 heads = [head for head, flag in heads.iteritems() if flag]
1213 heads = [head for head, flag in heads.iteritems() if flag]
1065 roots = list(roots)
1214 roots = list(roots)
1066 assert orderedout
1215 assert orderedout
1067 assert roots
1216 assert roots
1068 assert heads
1217 assert heads
1069 return (orderedout, roots, heads)
1218 return (orderedout, roots, heads)
1070
1219
1071 def headrevs(self):
1220 def headrevs(self):
1072 try:
1221 try:
1073 return self.index.headrevs()
1222 return self.index.headrevs()
1074 except AttributeError:
1223 except AttributeError:
1075 return self._headrevs()
1224 return self._headrevs()
1076
1225
1077 def computephases(self, roots):
1226 def computephases(self, roots):
1078 return self.index.computephasesmapsets(roots)
1227 return self.index.computephasesmapsets(roots)
1079
1228
1080 def _headrevs(self):
1229 def _headrevs(self):
1081 count = len(self)
1230 count = len(self)
1082 if not count:
1231 if not count:
1083 return [nullrev]
1232 return [nullrev]
1084 # we won't iter over filtered rev so nobody is a head at start
1233 # we won't iter over filtered rev so nobody is a head at start
1085 ishead = [0] * (count + 1)
1234 ishead = [0] * (count + 1)
1086 index = self.index
1235 index = self.index
1087 for r in self:
1236 for r in self:
1088 ishead[r] = 1 # I may be an head
1237 ishead[r] = 1 # I may be an head
1089 e = index[r]
1238 e = index[r]
1090 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1239 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1091 return [r for r, val in enumerate(ishead) if val]
1240 return [r for r, val in enumerate(ishead) if val]
1092
1241
1093 def heads(self, start=None, stop=None):
1242 def heads(self, start=None, stop=None):
1094 """return the list of all nodes that have no children
1243 """return the list of all nodes that have no children
1095
1244
1096 if start is specified, only heads that are descendants of
1245 if start is specified, only heads that are descendants of
1097 start will be returned
1246 start will be returned
1098 if stop is specified, it will consider all the revs from stop
1247 if stop is specified, it will consider all the revs from stop
1099 as if they had no children
1248 as if they had no children
1100 """
1249 """
1101 if start is None and stop is None:
1250 if start is None and stop is None:
1102 if not len(self):
1251 if not len(self):
1103 return [nullid]
1252 return [nullid]
1104 return [self.node(r) for r in self.headrevs()]
1253 return [self.node(r) for r in self.headrevs()]
1105
1254
1106 if start is None:
1255 if start is None:
1107 start = nullid
1256 start = nullid
1108 if stop is None:
1257 if stop is None:
1109 stop = []
1258 stop = []
1110 stoprevs = set([self.rev(n) for n in stop])
1259 stoprevs = set([self.rev(n) for n in stop])
1111 startrev = self.rev(start)
1260 startrev = self.rev(start)
1112 reachable = {startrev}
1261 reachable = {startrev}
1113 heads = {startrev}
1262 heads = {startrev}
1114
1263
1115 parentrevs = self.parentrevs
1264 parentrevs = self.parentrevs
1116 for r in self.revs(start=startrev + 1):
1265 for r in self.revs(start=startrev + 1):
1117 for p in parentrevs(r):
1266 for p in parentrevs(r):
1118 if p in reachable:
1267 if p in reachable:
1119 if r not in stoprevs:
1268 if r not in stoprevs:
1120 reachable.add(r)
1269 reachable.add(r)
1121 heads.add(r)
1270 heads.add(r)
1122 if p in heads and p not in stoprevs:
1271 if p in heads and p not in stoprevs:
1123 heads.remove(p)
1272 heads.remove(p)
1124
1273
1125 return [self.node(r) for r in heads]
1274 return [self.node(r) for r in heads]
1126
1275
1127 def children(self, node):
1276 def children(self, node):
1128 """find the children of a given node"""
1277 """find the children of a given node"""
1129 c = []
1278 c = []
1130 p = self.rev(node)
1279 p = self.rev(node)
1131 for r in self.revs(start=p + 1):
1280 for r in self.revs(start=p + 1):
1132 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1281 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1133 if prevs:
1282 if prevs:
1134 for pr in prevs:
1283 for pr in prevs:
1135 if pr == p:
1284 if pr == p:
1136 c.append(self.node(r))
1285 c.append(self.node(r))
1137 elif p == nullrev:
1286 elif p == nullrev:
1138 c.append(self.node(r))
1287 c.append(self.node(r))
1139 return c
1288 return c
1140
1289
1141 def descendant(self, start, end):
1290 def descendant(self, start, end):
1142 if start == nullrev:
1291 if start == nullrev:
1143 return True
1292 return True
1144 for i in self.descendants([start]):
1293 for i in self.descendants([start]):
1145 if i == end:
1294 if i == end:
1146 return True
1295 return True
1147 elif i > end:
1296 elif i > end:
1148 break
1297 break
1149 return False
1298 return False
1150
1299
1151 def commonancestorsheads(self, a, b):
1300 def commonancestorsheads(self, a, b):
1152 """calculate all the heads of the common ancestors of nodes a and b"""
1301 """calculate all the heads of the common ancestors of nodes a and b"""
1153 a, b = self.rev(a), self.rev(b)
1302 a, b = self.rev(a), self.rev(b)
1154 try:
1303 try:
1155 ancs = self.index.commonancestorsheads(a, b)
1304 ancs = self.index.commonancestorsheads(a, b)
1156 except (AttributeError, OverflowError): # C implementation failed
1305 except (AttributeError, OverflowError): # C implementation failed
1157 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1158 return pycompat.maplist(self.node, ancs)
1307 return pycompat.maplist(self.node, ancs)
1159
1308
1160 def isancestor(self, a, b):
1309 def isancestor(self, a, b):
1161 """return True if node a is an ancestor of node b
1310 """return True if node a is an ancestor of node b
1162
1311
1163 The implementation of this is trivial but the use of
1312 The implementation of this is trivial but the use of
1164 commonancestorsheads is not."""
1313 commonancestorsheads is not."""
1165 return a in self.commonancestorsheads(a, b)
1314 return a in self.commonancestorsheads(a, b)
1166
1315
1167 def ancestor(self, a, b):
1316 def ancestor(self, a, b):
1168 """calculate the "best" common ancestor of nodes a and b"""
1317 """calculate the "best" common ancestor of nodes a and b"""
1169
1318
1170 a, b = self.rev(a), self.rev(b)
1319 a, b = self.rev(a), self.rev(b)
1171 try:
1320 try:
1172 ancs = self.index.ancestors(a, b)
1321 ancs = self.index.ancestors(a, b)
1173 except (AttributeError, OverflowError):
1322 except (AttributeError, OverflowError):
1174 ancs = ancestor.ancestors(self.parentrevs, a, b)
1323 ancs = ancestor.ancestors(self.parentrevs, a, b)
1175 if ancs:
1324 if ancs:
1176 # choose a consistent winner when there's a tie
1325 # choose a consistent winner when there's a tie
1177 return min(map(self.node, ancs))
1326 return min(map(self.node, ancs))
1178 return nullid
1327 return nullid
1179
1328
1180 def _match(self, id):
1329 def _match(self, id):
1181 if isinstance(id, int):
1330 if isinstance(id, int):
1182 # rev
1331 # rev
1183 return self.node(id)
1332 return self.node(id)
1184 if len(id) == 20:
1333 if len(id) == 20:
1185 # possibly a binary node
1334 # possibly a binary node
1186 # odds of a binary node being all hex in ASCII are 1 in 10**25
1335 # odds of a binary node being all hex in ASCII are 1 in 10**25
1187 try:
1336 try:
1188 node = id
1337 node = id
1189 self.rev(node) # quick search the index
1338 self.rev(node) # quick search the index
1190 return node
1339 return node
1191 except LookupError:
1340 except LookupError:
1192 pass # may be partial hex id
1341 pass # may be partial hex id
1193 try:
1342 try:
1194 # str(rev)
1343 # str(rev)
1195 rev = int(id)
1344 rev = int(id)
1196 if str(rev) != id:
1345 if str(rev) != id:
1197 raise ValueError
1346 raise ValueError
1198 if rev < 0:
1347 if rev < 0:
1199 rev = len(self) + rev
1348 rev = len(self) + rev
1200 if rev < 0 or rev >= len(self):
1349 if rev < 0 or rev >= len(self):
1201 raise ValueError
1350 raise ValueError
1202 return self.node(rev)
1351 return self.node(rev)
1203 except (ValueError, OverflowError):
1352 except (ValueError, OverflowError):
1204 pass
1353 pass
1205 if len(id) == 40:
1354 if len(id) == 40:
1206 try:
1355 try:
1207 # a full hex nodeid?
1356 # a full hex nodeid?
1208 node = bin(id)
1357 node = bin(id)
1209 self.rev(node)
1358 self.rev(node)
1210 return node
1359 return node
1211 except (TypeError, LookupError):
1360 except (TypeError, LookupError):
1212 pass
1361 pass
1213
1362
1214 def _partialmatch(self, id):
1363 def _partialmatch(self, id):
1215 maybewdir = wdirhex.startswith(id)
1364 maybewdir = wdirhex.startswith(id)
1216 try:
1365 try:
1217 partial = self.index.partialmatch(id)
1366 partial = self.index.partialmatch(id)
1218 if partial and self.hasnode(partial):
1367 if partial and self.hasnode(partial):
1219 if maybewdir:
1368 if maybewdir:
1220 # single 'ff...' match in radix tree, ambiguous with wdir
1369 # single 'ff...' match in radix tree, ambiguous with wdir
1221 raise RevlogError
1370 raise RevlogError
1222 return partial
1371 return partial
1223 if maybewdir:
1372 if maybewdir:
1224 # no 'ff...' match in radix tree, wdir identified
1373 # no 'ff...' match in radix tree, wdir identified
1225 raise error.WdirUnsupported
1374 raise error.WdirUnsupported
1226 return None
1375 return None
1227 except RevlogError:
1376 except RevlogError:
1228 # parsers.c radix tree lookup gave multiple matches
1377 # parsers.c radix tree lookup gave multiple matches
1229 # fast path: for unfiltered changelog, radix tree is accurate
1378 # fast path: for unfiltered changelog, radix tree is accurate
1230 if not getattr(self, 'filteredrevs', None):
1379 if not getattr(self, 'filteredrevs', None):
1231 raise LookupError(id, self.indexfile,
1380 raise LookupError(id, self.indexfile,
1232 _('ambiguous identifier'))
1381 _('ambiguous identifier'))
1233 # fall through to slow path that filters hidden revisions
1382 # fall through to slow path that filters hidden revisions
1234 except (AttributeError, ValueError):
1383 except (AttributeError, ValueError):
1235 # we are pure python, or key was too short to search radix tree
1384 # we are pure python, or key was too short to search radix tree
1236 pass
1385 pass
1237
1386
1238 if id in self._pcache:
1387 if id in self._pcache:
1239 return self._pcache[id]
1388 return self._pcache[id]
1240
1389
1241 if len(id) < 40:
1390 if len(id) < 40:
1242 try:
1391 try:
1243 # hex(node)[:...]
1392 # hex(node)[:...]
1244 l = len(id) // 2 # grab an even number of digits
1393 l = len(id) // 2 # grab an even number of digits
1245 prefix = bin(id[:l * 2])
1394 prefix = bin(id[:l * 2])
1246 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1395 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1247 nl = [n for n in nl if hex(n).startswith(id) and
1396 nl = [n for n in nl if hex(n).startswith(id) and
1248 self.hasnode(n)]
1397 self.hasnode(n)]
1249 if len(nl) > 0:
1398 if len(nl) > 0:
1250 if len(nl) == 1 and not maybewdir:
1399 if len(nl) == 1 and not maybewdir:
1251 self._pcache[id] = nl[0]
1400 self._pcache[id] = nl[0]
1252 return nl[0]
1401 return nl[0]
1253 raise LookupError(id, self.indexfile,
1402 raise LookupError(id, self.indexfile,
1254 _('ambiguous identifier'))
1403 _('ambiguous identifier'))
1255 if maybewdir:
1404 if maybewdir:
1256 raise error.WdirUnsupported
1405 raise error.WdirUnsupported
1257 return None
1406 return None
1258 except (TypeError, binascii.Error):
1407 except (TypeError, binascii.Error):
1259 pass
1408 pass
1260
1409
1261 def lookup(self, id):
1410 def lookup(self, id):
1262 """locate a node based on:
1411 """locate a node based on:
1263 - revision number or str(revision number)
1412 - revision number or str(revision number)
1264 - nodeid or subset of hex nodeid
1413 - nodeid or subset of hex nodeid
1265 """
1414 """
1266 n = self._match(id)
1415 n = self._match(id)
1267 if n is not None:
1416 if n is not None:
1268 return n
1417 return n
1269 n = self._partialmatch(id)
1418 n = self._partialmatch(id)
1270 if n:
1419 if n:
1271 return n
1420 return n
1272
1421
1273 raise LookupError(id, self.indexfile, _('no match found'))
1422 raise LookupError(id, self.indexfile, _('no match found'))
1274
1423
1275 def shortest(self, hexnode, minlength=1):
1424 def shortest(self, hexnode, minlength=1):
1276 """Find the shortest unambiguous prefix that matches hexnode."""
1425 """Find the shortest unambiguous prefix that matches hexnode."""
1277 def isvalid(test):
1426 def isvalid(test):
1278 try:
1427 try:
1279 if self._partialmatch(test) is None:
1428 if self._partialmatch(test) is None:
1280 return False
1429 return False
1281
1430
1282 try:
1431 try:
1283 i = int(test)
1432 i = int(test)
1284 # if we are a pure int, then starting with zero will not be
1433 # if we are a pure int, then starting with zero will not be
1285 # confused as a rev; or, obviously, if the int is larger
1434 # confused as a rev; or, obviously, if the int is larger
1286 # than the value of the tip rev
1435 # than the value of the tip rev
1287 if test[0] == '0' or i > len(self):
1436 if test[0] == '0' or i > len(self):
1288 return True
1437 return True
1289 return False
1438 return False
1290 except ValueError:
1439 except ValueError:
1291 return True
1440 return True
1292 except error.RevlogError:
1441 except error.RevlogError:
1293 return False
1442 return False
1294 except error.WdirUnsupported:
1443 except error.WdirUnsupported:
1295 # single 'ff...' match
1444 # single 'ff...' match
1296 return True
1445 return True
1297
1446
1298 shortest = hexnode
1447 shortest = hexnode
1299 startlength = max(6, minlength)
1448 startlength = max(6, minlength)
1300 length = startlength
1449 length = startlength
1301 while True:
1450 while True:
1302 test = hexnode[:length]
1451 test = hexnode[:length]
1303 if isvalid(test):
1452 if isvalid(test):
1304 shortest = test
1453 shortest = test
1305 if length == minlength or length > startlength:
1454 if length == minlength or length > startlength:
1306 return shortest
1455 return shortest
1307 length -= 1
1456 length -= 1
1308 else:
1457 else:
1309 length += 1
1458 length += 1
1310 if len(shortest) <= length:
1459 if len(shortest) <= length:
1311 return shortest
1460 return shortest
1312
1461
1313 def cmp(self, node, text):
1462 def cmp(self, node, text):
1314 """compare text with a given file revision
1463 """compare text with a given file revision
1315
1464
1316 returns True if text is different than what is stored.
1465 returns True if text is different than what is stored.
1317 """
1466 """
1318 p1, p2 = self.parents(node)
1467 p1, p2 = self.parents(node)
1319 return hash(text, p1, p2) != node
1468 return hash(text, p1, p2) != node
1320
1469
1321 def _cachesegment(self, offset, data):
1470 def _cachesegment(self, offset, data):
1322 """Add a segment to the revlog cache.
1471 """Add a segment to the revlog cache.
1323
1472
1324 Accepts an absolute offset and the data that is at that location.
1473 Accepts an absolute offset and the data that is at that location.
1325 """
1474 """
1326 o, d = self._chunkcache
1475 o, d = self._chunkcache
1327 # try to add to existing cache
1476 # try to add to existing cache
1328 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1329 self._chunkcache = o, d + data
1478 self._chunkcache = o, d + data
1330 else:
1479 else:
1331 self._chunkcache = offset, data
1480 self._chunkcache = offset, data
1332
1481
1333 def _readsegment(self, offset, length, df=None):
1482 def _readsegment(self, offset, length, df=None):
1334 """Load a segment of raw data from the revlog.
1483 """Load a segment of raw data from the revlog.
1335
1484
1336 Accepts an absolute offset, length to read, and an optional existing
1485 Accepts an absolute offset, length to read, and an optional existing
1337 file handle to read from.
1486 file handle to read from.
1338
1487
1339 If an existing file handle is passed, it will be seeked and the
1488 If an existing file handle is passed, it will be seeked and the
1340 original seek position will NOT be restored.
1489 original seek position will NOT be restored.
1341
1490
1342 Returns a str or buffer of raw byte data.
1491 Returns a str or buffer of raw byte data.
1343 """
1492 """
1344 if df is not None:
1493 if df is not None:
1345 closehandle = False
1494 closehandle = False
1346 else:
1495 else:
1347 if self._inline:
1496 if self._inline:
1348 df = self.opener(self.indexfile)
1497 df = self.opener(self.indexfile)
1349 else:
1498 else:
1350 df = self.opener(self.datafile)
1499 df = self.opener(self.datafile)
1351 closehandle = True
1500 closehandle = True
1352
1501
1353 # Cache data both forward and backward around the requested
1502 # Cache data both forward and backward around the requested
1354 # data, in a fixed size window. This helps speed up operations
1503 # data, in a fixed size window. This helps speed up operations
1355 # involving reading the revlog backwards.
1504 # involving reading the revlog backwards.
1356 cachesize = self._chunkcachesize
1505 cachesize = self._chunkcachesize
1357 realoffset = offset & ~(cachesize - 1)
1506 realoffset = offset & ~(cachesize - 1)
1358 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1507 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1359 - realoffset)
1508 - realoffset)
1360 df.seek(realoffset)
1509 df.seek(realoffset)
1361 d = df.read(reallength)
1510 d = df.read(reallength)
1362 if closehandle:
1511 if closehandle:
1363 df.close()
1512 df.close()
1364 self._cachesegment(realoffset, d)
1513 self._cachesegment(realoffset, d)
1365 if offset != realoffset or reallength != length:
1514 if offset != realoffset or reallength != length:
1366 return util.buffer(d, offset - realoffset, length)
1515 return util.buffer(d, offset - realoffset, length)
1367 return d
1516 return d
1368
1517
1369 def _getsegment(self, offset, length, df=None):
1518 def _getsegment(self, offset, length, df=None):
1370 """Obtain a segment of raw data from the revlog.
1519 """Obtain a segment of raw data from the revlog.
1371
1520
1372 Accepts an absolute offset, length of bytes to obtain, and an
1521 Accepts an absolute offset, length of bytes to obtain, and an
1373 optional file handle to the already-opened revlog. If the file
1522 optional file handle to the already-opened revlog. If the file
1374 handle is used, it's original seek position will not be preserved.
1523 handle is used, it's original seek position will not be preserved.
1375
1524
1376 Requests for data may be returned from a cache.
1525 Requests for data may be returned from a cache.
1377
1526
1378 Returns a str or a buffer instance of raw byte data.
1527 Returns a str or a buffer instance of raw byte data.
1379 """
1528 """
1380 o, d = self._chunkcache
1529 o, d = self._chunkcache
1381 l = len(d)
1530 l = len(d)
1382
1531
1383 # is it in the cache?
1532 # is it in the cache?
1384 cachestart = offset - o
1533 cachestart = offset - o
1385 cacheend = cachestart + length
1534 cacheend = cachestart + length
1386 if cachestart >= 0 and cacheend <= l:
1535 if cachestart >= 0 and cacheend <= l:
1387 if cachestart == 0 and cacheend == l:
1536 if cachestart == 0 and cacheend == l:
1388 return d # avoid a copy
1537 return d # avoid a copy
1389 return util.buffer(d, cachestart, cacheend - cachestart)
1538 return util.buffer(d, cachestart, cacheend - cachestart)
1390
1539
1391 return self._readsegment(offset, length, df=df)
1540 return self._readsegment(offset, length, df=df)
1392
1541
1393 def _getsegmentforrevs(self, startrev, endrev, df=None):
1542 def _getsegmentforrevs(self, startrev, endrev, df=None):
1394 """Obtain a segment of raw data corresponding to a range of revisions.
1543 """Obtain a segment of raw data corresponding to a range of revisions.
1395
1544
1396 Accepts the start and end revisions and an optional already-open
1545 Accepts the start and end revisions and an optional already-open
1397 file handle to be used for reading. If the file handle is read, its
1546 file handle to be used for reading. If the file handle is read, its
1398 seek position will not be preserved.
1547 seek position will not be preserved.
1399
1548
1400 Requests for data may be satisfied by a cache.
1549 Requests for data may be satisfied by a cache.
1401
1550
1402 Returns a 2-tuple of (offset, data) for the requested range of
1551 Returns a 2-tuple of (offset, data) for the requested range of
1403 revisions. Offset is the integer offset from the beginning of the
1552 revisions. Offset is the integer offset from the beginning of the
1404 revlog and data is a str or buffer of the raw byte data.
1553 revlog and data is a str or buffer of the raw byte data.
1405
1554
1406 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1555 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1407 to determine where each revision's data begins and ends.
1556 to determine where each revision's data begins and ends.
1408 """
1557 """
1409 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1558 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1410 # (functions are expensive).
1559 # (functions are expensive).
1411 index = self.index
1560 index = self.index
1412 istart = index[startrev]
1561 istart = index[startrev]
1413 start = int(istart[0] >> 16)
1562 start = int(istart[0] >> 16)
1414 if startrev == endrev:
1563 if startrev == endrev:
1415 end = start + istart[1]
1564 end = start + istart[1]
1416 else:
1565 else:
1417 iend = index[endrev]
1566 iend = index[endrev]
1418 end = int(iend[0] >> 16) + iend[1]
1567 end = int(iend[0] >> 16) + iend[1]
1419
1568
1420 if self._inline:
1569 if self._inline:
1421 start += (startrev + 1) * self._io.size
1570 start += (startrev + 1) * self._io.size
1422 end += (endrev + 1) * self._io.size
1571 end += (endrev + 1) * self._io.size
1423 length = end - start
1572 length = end - start
1424
1573
1425 return start, self._getsegment(start, length, df=df)
1574 return start, self._getsegment(start, length, df=df)
1426
1575
1427 def _chunk(self, rev, df=None):
1576 def _chunk(self, rev, df=None):
1428 """Obtain a single decompressed chunk for a revision.
1577 """Obtain a single decompressed chunk for a revision.
1429
1578
1430 Accepts an integer revision and an optional already-open file handle
1579 Accepts an integer revision and an optional already-open file handle
1431 to be used for reading. If used, the seek position of the file will not
1580 to be used for reading. If used, the seek position of the file will not
1432 be preserved.
1581 be preserved.
1433
1582
1434 Returns a str holding uncompressed data for the requested revision.
1583 Returns a str holding uncompressed data for the requested revision.
1435 """
1584 """
1436 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1585 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1437
1586
1438 def _chunks(self, revs, df=None):
1587 def _chunks(self, revs, df=None):
1439 """Obtain decompressed chunks for the specified revisions.
1588 """Obtain decompressed chunks for the specified revisions.
1440
1589
1441 Accepts an iterable of numeric revisions that are assumed to be in
1590 Accepts an iterable of numeric revisions that are assumed to be in
1442 ascending order. Also accepts an optional already-open file handle
1591 ascending order. Also accepts an optional already-open file handle
1443 to be used for reading. If used, the seek position of the file will
1592 to be used for reading. If used, the seek position of the file will
1444 not be preserved.
1593 not be preserved.
1445
1594
1446 This function is similar to calling ``self._chunk()`` multiple times,
1595 This function is similar to calling ``self._chunk()`` multiple times,
1447 but is faster.
1596 but is faster.
1448
1597
1449 Returns a list with decompressed data for each requested revision.
1598 Returns a list with decompressed data for each requested revision.
1450 """
1599 """
1451 if not revs:
1600 if not revs:
1452 return []
1601 return []
1453 start = self.start
1602 start = self.start
1454 length = self.length
1603 length = self.length
1455 inline = self._inline
1604 inline = self._inline
1456 iosize = self._io.size
1605 iosize = self._io.size
1457 buffer = util.buffer
1606 buffer = util.buffer
1458
1607
1459 l = []
1608 l = []
1460 ladd = l.append
1609 ladd = l.append
1461
1610
1462 if not self._withsparseread:
1611 if not self._withsparseread:
1463 slicedchunks = (revs,)
1612 slicedchunks = (revs,)
1464 else:
1613 else:
1465 slicedchunks = _slicechunk(self, revs)
1614 slicedchunks = _slicechunk(self, revs)
1466
1615
1467 for revschunk in slicedchunks:
1616 for revschunk in slicedchunks:
1468 firstrev = revschunk[0]
1617 firstrev = revschunk[0]
1469 # Skip trailing revisions with empty diff
1618 # Skip trailing revisions with empty diff
1470 for lastrev in revschunk[::-1]:
1619 for lastrev in revschunk[::-1]:
1471 if length(lastrev) != 0:
1620 if length(lastrev) != 0:
1472 break
1621 break
1473
1622
1474 try:
1623 try:
1475 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1476 except OverflowError:
1625 except OverflowError:
1477 # issue4215 - we can't cache a run of chunks greater than
1626 # issue4215 - we can't cache a run of chunks greater than
1478 # 2G on Windows
1627 # 2G on Windows
1479 return [self._chunk(rev, df=df) for rev in revschunk]
1628 return [self._chunk(rev, df=df) for rev in revschunk]
1480
1629
1481 decomp = self.decompress
1630 decomp = self.decompress
1482 for rev in revschunk:
1631 for rev in revschunk:
1483 chunkstart = start(rev)
1632 chunkstart = start(rev)
1484 if inline:
1633 if inline:
1485 chunkstart += (rev + 1) * iosize
1634 chunkstart += (rev + 1) * iosize
1486 chunklength = length(rev)
1635 chunklength = length(rev)
1487 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1488
1637
1489 return l
1638 return l
1490
1639
1491 def _chunkclear(self):
1640 def _chunkclear(self):
1492 """Clear the raw chunk cache."""
1641 """Clear the raw chunk cache."""
1493 self._chunkcache = (0, '')
1642 self._chunkcache = (0, '')
1494
1643
1495 def deltaparent(self, rev):
1644 def deltaparent(self, rev):
1496 """return deltaparent of the given revision"""
1645 """return deltaparent of the given revision"""
1497 base = self.index[rev][3]
1646 base = self.index[rev][3]
1498 if base == rev:
1647 if base == rev:
1499 return nullrev
1648 return nullrev
1500 elif self._generaldelta:
1649 elif self._generaldelta:
1501 return base
1650 return base
1502 else:
1651 else:
1503 return rev - 1
1652 return rev - 1
1504
1653
1505 def revdiff(self, rev1, rev2):
1654 def revdiff(self, rev1, rev2):
1506 """return or calculate a delta between two revisions
1655 """return or calculate a delta between two revisions
1507
1656
1508 The delta calculated is in binary form and is intended to be written to
1657 The delta calculated is in binary form and is intended to be written to
1509 revlog data directly. So this function needs raw revision data.
1658 revlog data directly. So this function needs raw revision data.
1510 """
1659 """
1511 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1512 return bytes(self._chunk(rev2))
1661 return bytes(self._chunk(rev2))
1513
1662
1514 return mdiff.textdiff(self.revision(rev1, raw=True),
1663 return mdiff.textdiff(self.revision(rev1, raw=True),
1515 self.revision(rev2, raw=True))
1664 self.revision(rev2, raw=True))
1516
1665
1517 def revision(self, nodeorrev, _df=None, raw=False):
1666 def revision(self, nodeorrev, _df=None, raw=False):
1518 """return an uncompressed revision of a given node or revision
1667 """return an uncompressed revision of a given node or revision
1519 number.
1668 number.
1520
1669
1521 _df - an existing file handle to read from. (internal-only)
1670 _df - an existing file handle to read from. (internal-only)
1522 raw - an optional argument specifying if the revision data is to be
1671 raw - an optional argument specifying if the revision data is to be
1523 treated as raw data when applying flag transforms. 'raw' should be set
1672 treated as raw data when applying flag transforms. 'raw' should be set
1524 to True when generating changegroups or in debug commands.
1673 to True when generating changegroups or in debug commands.
1525 """
1674 """
1526 if isinstance(nodeorrev, int):
1675 if isinstance(nodeorrev, int):
1527 rev = nodeorrev
1676 rev = nodeorrev
1528 node = self.node(rev)
1677 node = self.node(rev)
1529 else:
1678 else:
1530 node = nodeorrev
1679 node = nodeorrev
1531 rev = None
1680 rev = None
1532
1681
1533 cachedrev = None
1682 cachedrev = None
1534 flags = None
1683 flags = None
1535 rawtext = None
1684 rawtext = None
1536 if node == nullid:
1685 if node == nullid:
1537 return ""
1686 return ""
1538 if self._cache:
1687 if self._cache:
1539 if self._cache[0] == node:
1688 if self._cache[0] == node:
1540 # _cache only stores rawtext
1689 # _cache only stores rawtext
1541 if raw:
1690 if raw:
1542 return self._cache[2]
1691 return self._cache[2]
1543 # duplicated, but good for perf
1692 # duplicated, but good for perf
1544 if rev is None:
1693 if rev is None:
1545 rev = self.rev(node)
1694 rev = self.rev(node)
1546 if flags is None:
1695 if flags is None:
1547 flags = self.flags(rev)
1696 flags = self.flags(rev)
1548 # no extra flags set, no flag processor runs, text = rawtext
1697 # no extra flags set, no flag processor runs, text = rawtext
1549 if flags == REVIDX_DEFAULT_FLAGS:
1698 if flags == REVIDX_DEFAULT_FLAGS:
1550 return self._cache[2]
1699 return self._cache[2]
1551 # rawtext is reusable. need to run flag processor
1700 # rawtext is reusable. need to run flag processor
1552 rawtext = self._cache[2]
1701 rawtext = self._cache[2]
1553
1702
1554 cachedrev = self._cache[1]
1703 cachedrev = self._cache[1]
1555
1704
1556 # look up what we need to read
1705 # look up what we need to read
1557 if rawtext is None:
1706 if rawtext is None:
1558 if rev is None:
1707 if rev is None:
1559 rev = self.rev(node)
1708 rev = self.rev(node)
1560
1709
1561 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1562 if stopped:
1711 if stopped:
1563 rawtext = self._cache[2]
1712 rawtext = self._cache[2]
1564
1713
1565 # drop cache to save memory
1714 # drop cache to save memory
1566 self._cache = None
1715 self._cache = None
1567
1716
1568 bins = self._chunks(chain, df=_df)
1717 bins = self._chunks(chain, df=_df)
1569 if rawtext is None:
1718 if rawtext is None:
1570 rawtext = bytes(bins[0])
1719 rawtext = bytes(bins[0])
1571 bins = bins[1:]
1720 bins = bins[1:]
1572
1721
1573 rawtext = mdiff.patches(rawtext, bins)
1722 rawtext = mdiff.patches(rawtext, bins)
1574 self._cache = (node, rev, rawtext)
1723 self._cache = (node, rev, rawtext)
1575
1724
1576 if flags is None:
1725 if flags is None:
1577 if rev is None:
1726 if rev is None:
1578 rev = self.rev(node)
1727 rev = self.rev(node)
1579 flags = self.flags(rev)
1728 flags = self.flags(rev)
1580
1729
1581 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1730 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1582 if validatehash:
1731 if validatehash:
1583 self.checkhash(text, node, rev=rev)
1732 self.checkhash(text, node, rev=rev)
1584
1733
1585 return text
1734 return text
1586
1735
1587 def hash(self, text, p1, p2):
1736 def hash(self, text, p1, p2):
1588 """Compute a node hash.
1737 """Compute a node hash.
1589
1738
1590 Available as a function so that subclasses can replace the hash
1739 Available as a function so that subclasses can replace the hash
1591 as needed.
1740 as needed.
1592 """
1741 """
1593 return hash(text, p1, p2)
1742 return hash(text, p1, p2)
1594
1743
1595 def _processflags(self, text, flags, operation, raw=False):
1744 def _processflags(self, text, flags, operation, raw=False):
1596 """Inspect revision data flags and applies transforms defined by
1745 """Inspect revision data flags and applies transforms defined by
1597 registered flag processors.
1746 registered flag processors.
1598
1747
1599 ``text`` - the revision data to process
1748 ``text`` - the revision data to process
1600 ``flags`` - the revision flags
1749 ``flags`` - the revision flags
1601 ``operation`` - the operation being performed (read or write)
1750 ``operation`` - the operation being performed (read or write)
1602 ``raw`` - an optional argument describing if the raw transform should be
1751 ``raw`` - an optional argument describing if the raw transform should be
1603 applied.
1752 applied.
1604
1753
1605 This method processes the flags in the order (or reverse order if
1754 This method processes the flags in the order (or reverse order if
1606 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1755 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1607 flag processors registered for present flags. The order of flags defined
1756 flag processors registered for present flags. The order of flags defined
1608 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1757 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1609
1758
1610 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1759 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1611 processed text and ``validatehash`` is a bool indicating whether the
1760 processed text and ``validatehash`` is a bool indicating whether the
1612 returned text should be checked for hash integrity.
1761 returned text should be checked for hash integrity.
1613
1762
1614 Note: If the ``raw`` argument is set, it has precedence over the
1763 Note: If the ``raw`` argument is set, it has precedence over the
1615 operation and will only update the value of ``validatehash``.
1764 operation and will only update the value of ``validatehash``.
1616 """
1765 """
1617 # fast path: no flag processors will run
1766 # fast path: no flag processors will run
1618 if flags == 0:
1767 if flags == 0:
1619 return text, True
1768 return text, True
1620 if not operation in ('read', 'write'):
1769 if not operation in ('read', 'write'):
1621 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1770 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1622 # Check all flags are known.
1771 # Check all flags are known.
1623 if flags & ~REVIDX_KNOWN_FLAGS:
1772 if flags & ~REVIDX_KNOWN_FLAGS:
1624 raise RevlogError(_("incompatible revision flag '%#x'") %
1773 raise RevlogError(_("incompatible revision flag '%#x'") %
1625 (flags & ~REVIDX_KNOWN_FLAGS))
1774 (flags & ~REVIDX_KNOWN_FLAGS))
1626 validatehash = True
1775 validatehash = True
1627 # Depending on the operation (read or write), the order might be
1776 # Depending on the operation (read or write), the order might be
1628 # reversed due to non-commutative transforms.
1777 # reversed due to non-commutative transforms.
1629 orderedflags = REVIDX_FLAGS_ORDER
1778 orderedflags = REVIDX_FLAGS_ORDER
1630 if operation == 'write':
1779 if operation == 'write':
1631 orderedflags = reversed(orderedflags)
1780 orderedflags = reversed(orderedflags)
1632
1781
1633 for flag in orderedflags:
1782 for flag in orderedflags:
1634 # If a flagprocessor has been registered for a known flag, apply the
1783 # If a flagprocessor has been registered for a known flag, apply the
1635 # related operation transform and update result tuple.
1784 # related operation transform and update result tuple.
1636 if flag & flags:
1785 if flag & flags:
1637 vhash = True
1786 vhash = True
1638
1787
1639 if flag not in _flagprocessors:
1788 if flag not in _flagprocessors:
1640 message = _("missing processor for flag '%#x'") % (flag)
1789 message = _("missing processor for flag '%#x'") % (flag)
1641 raise RevlogError(message)
1790 raise RevlogError(message)
1642
1791
1643 processor = _flagprocessors[flag]
1792 processor = _flagprocessors[flag]
1644 if processor is not None:
1793 if processor is not None:
1645 readtransform, writetransform, rawtransform = processor
1794 readtransform, writetransform, rawtransform = processor
1646
1795
1647 if raw:
1796 if raw:
1648 vhash = rawtransform(self, text)
1797 vhash = rawtransform(self, text)
1649 elif operation == 'read':
1798 elif operation == 'read':
1650 text, vhash = readtransform(self, text)
1799 text, vhash = readtransform(self, text)
1651 else: # write operation
1800 else: # write operation
1652 text, vhash = writetransform(self, text)
1801 text, vhash = writetransform(self, text)
1653 validatehash = validatehash and vhash
1802 validatehash = validatehash and vhash
1654
1803
1655 return text, validatehash
1804 return text, validatehash
1656
1805
1657 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1806 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1658 """Check node hash integrity.
1807 """Check node hash integrity.
1659
1808
1660 Available as a function so that subclasses can extend hash mismatch
1809 Available as a function so that subclasses can extend hash mismatch
1661 behaviors as needed.
1810 behaviors as needed.
1662 """
1811 """
1663 if p1 is None and p2 is None:
1812 if p1 is None and p2 is None:
1664 p1, p2 = self.parents(node)
1813 p1, p2 = self.parents(node)
1665 if node != self.hash(text, p1, p2):
1814 if node != self.hash(text, p1, p2):
1666 revornode = rev
1815 revornode = rev
1667 if revornode is None:
1816 if revornode is None:
1668 revornode = templatefilters.short(hex(node))
1817 revornode = templatefilters.short(hex(node))
1669 raise RevlogError(_("integrity check failed on %s:%s")
1818 raise RevlogError(_("integrity check failed on %s:%s")
1670 % (self.indexfile, pycompat.bytestr(revornode)))
1819 % (self.indexfile, pycompat.bytestr(revornode)))
1671
1820
1672 def checkinlinesize(self, tr, fp=None):
1821 def checkinlinesize(self, tr, fp=None):
1673 """Check if the revlog is too big for inline and convert if so.
1822 """Check if the revlog is too big for inline and convert if so.
1674
1823
1675 This should be called after revisions are added to the revlog. If the
1824 This should be called after revisions are added to the revlog. If the
1676 revlog has grown too large to be an inline revlog, it will convert it
1825 revlog has grown too large to be an inline revlog, it will convert it
1677 to use multiple index and data files.
1826 to use multiple index and data files.
1678 """
1827 """
1679 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1828 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1680 return
1829 return
1681
1830
1682 trinfo = tr.find(self.indexfile)
1831 trinfo = tr.find(self.indexfile)
1683 if trinfo is None:
1832 if trinfo is None:
1684 raise RevlogError(_("%s not found in the transaction")
1833 raise RevlogError(_("%s not found in the transaction")
1685 % self.indexfile)
1834 % self.indexfile)
1686
1835
1687 trindex = trinfo[2]
1836 trindex = trinfo[2]
1688 if trindex is not None:
1837 if trindex is not None:
1689 dataoff = self.start(trindex)
1838 dataoff = self.start(trindex)
1690 else:
1839 else:
1691 # revlog was stripped at start of transaction, use all leftover data
1840 # revlog was stripped at start of transaction, use all leftover data
1692 trindex = len(self) - 1
1841 trindex = len(self) - 1
1693 dataoff = self.end(-2)
1842 dataoff = self.end(-2)
1694
1843
1695 tr.add(self.datafile, dataoff)
1844 tr.add(self.datafile, dataoff)
1696
1845
1697 if fp:
1846 if fp:
1698 fp.flush()
1847 fp.flush()
1699 fp.close()
1848 fp.close()
1700
1849
1701 df = self.opener(self.datafile, 'w')
1850 df = self.opener(self.datafile, 'w')
1702 try:
1851 try:
1703 for r in self:
1852 for r in self:
1704 df.write(self._getsegmentforrevs(r, r)[1])
1853 df.write(self._getsegmentforrevs(r, r)[1])
1705 finally:
1854 finally:
1706 df.close()
1855 df.close()
1707
1856
1708 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1857 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1709 checkambig=self._checkambig)
1858 checkambig=self._checkambig)
1710 self.version &= ~FLAG_INLINE_DATA
1859 self.version &= ~FLAG_INLINE_DATA
1711 self._inline = False
1860 self._inline = False
1712 for i in self:
1861 for i in self:
1713 e = self._io.packentry(self.index[i], self.node, self.version, i)
1862 e = self._io.packentry(self.index[i], self.node, self.version, i)
1714 fp.write(e)
1863 fp.write(e)
1715
1864
1716 # if we don't call close, the temp file will never replace the
1865 # if we don't call close, the temp file will never replace the
1717 # real index
1866 # real index
1718 fp.close()
1867 fp.close()
1719
1868
1720 tr.replace(self.indexfile, trindex * self._io.size)
1869 tr.replace(self.indexfile, trindex * self._io.size)
1721 self._chunkclear()
1870 self._chunkclear()
1722
1871
1723 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1724 node=None, flags=REVIDX_DEFAULT_FLAGS):
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1725 """add a revision to the log
1874 """add a revision to the log
1726
1875
1727 text - the revision data to add
1876 text - the revision data to add
1728 transaction - the transaction object used for rollback
1877 transaction - the transaction object used for rollback
1729 link - the linkrev data to add
1878 link - the linkrev data to add
1730 p1, p2 - the parent nodeids of the revision
1879 p1, p2 - the parent nodeids of the revision
1731 cachedelta - an optional precomputed delta
1880 cachedelta - an optional precomputed delta
1732 node - nodeid of revision; typically node is not specified, and it is
1881 node - nodeid of revision; typically node is not specified, and it is
1733 computed by default as hash(text, p1, p2), however subclasses might
1882 computed by default as hash(text, p1, p2), however subclasses might
1734 use different hashing method (and override checkhash() in such case)
1883 use different hashing method (and override checkhash() in such case)
1735 flags - the known flags to set on the revision
1884 flags - the known flags to set on the revision
1885 deltacomputer - an optional _deltacomputer instance shared between
1886 multiple calls
1736 """
1887 """
1737 if link == nullrev:
1888 if link == nullrev:
1738 raise RevlogError(_("attempted to add linkrev -1 to %s")
1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
1739 % self.indexfile)
1890 % self.indexfile)
1740
1891
1741 if flags:
1892 if flags:
1742 node = node or self.hash(text, p1, p2)
1893 node = node or self.hash(text, p1, p2)
1743
1894
1744 rawtext, validatehash = self._processflags(text, flags, 'write')
1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1745
1896
1746 # If the flag processor modifies the revision data, ignore any provided
1897 # If the flag processor modifies the revision data, ignore any provided
1747 # cachedelta.
1898 # cachedelta.
1748 if rawtext != text:
1899 if rawtext != text:
1749 cachedelta = None
1900 cachedelta = None
1750
1901
1751 if len(rawtext) > _maxentrysize:
1902 if len(rawtext) > _maxentrysize:
1752 raise RevlogError(
1903 raise RevlogError(
1753 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1754 % (self.indexfile, len(rawtext)))
1905 % (self.indexfile, len(rawtext)))
1755
1906
1756 node = node or self.hash(rawtext, p1, p2)
1907 node = node or self.hash(rawtext, p1, p2)
1757 if node in self.nodemap:
1908 if node in self.nodemap:
1758 return node
1909 return node
1759
1910
1760 if validatehash:
1911 if validatehash:
1761 self.checkhash(rawtext, node, p1=p1, p2=p2)
1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1762
1913
1763 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1764 flags, cachedelta=cachedelta)
1915 flags, cachedelta=cachedelta,
1916 deltacomputer=deltacomputer)
1765
1917
1766 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1767 cachedelta=None):
1919 cachedelta=None, deltacomputer=None):
1768 """add a raw revision with known flags, node and parents
1920 """add a raw revision with known flags, node and parents
1769 useful when reusing a revision not stored in this revlog (ex: received
1921 useful when reusing a revision not stored in this revlog (ex: received
1770 over wire, or read from an external bundle).
1922 over wire, or read from an external bundle).
1771 """
1923 """
1772 dfh = None
1924 dfh = None
1773 if not self._inline:
1925 if not self._inline:
1774 dfh = self.opener(self.datafile, "a+")
1926 dfh = self.opener(self.datafile, "a+")
1775 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1776 try:
1928 try:
1777 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1778 flags, cachedelta, ifh, dfh)
1930 flags, cachedelta, ifh, dfh,
1931 deltacomputer=deltacomputer)
1779 finally:
1932 finally:
1780 if dfh:
1933 if dfh:
1781 dfh.close()
1934 dfh.close()
1782 ifh.close()
1935 ifh.close()
1783
1936
1784 def compress(self, data):
1937 def compress(self, data):
1785 """Generate a possibly-compressed representation of data."""
1938 """Generate a possibly-compressed representation of data."""
1786 if not data:
1939 if not data:
1787 return '', data
1940 return '', data
1788
1941
1789 compressed = self._compressor.compress(data)
1942 compressed = self._compressor.compress(data)
1790
1943
1791 if compressed:
1944 if compressed:
1792 # The revlog compressor added the header in the returned data.
1945 # The revlog compressor added the header in the returned data.
1793 return '', compressed
1946 return '', compressed
1794
1947
1795 if data[0:1] == '\0':
1948 if data[0:1] == '\0':
1796 return '', data
1949 return '', data
1797 return 'u', data
1950 return 'u', data
1798
1951
1799 def decompress(self, data):
1952 def decompress(self, data):
1800 """Decompress a revlog chunk.
1953 """Decompress a revlog chunk.
1801
1954
1802 The chunk is expected to begin with a header identifying the
1955 The chunk is expected to begin with a header identifying the
1803 format type so it can be routed to an appropriate decompressor.
1956 format type so it can be routed to an appropriate decompressor.
1804 """
1957 """
1805 if not data:
1958 if not data:
1806 return data
1959 return data
1807
1960
1808 # Revlogs are read much more frequently than they are written and many
1961 # Revlogs are read much more frequently than they are written and many
1809 # chunks only take microseconds to decompress, so performance is
1962 # chunks only take microseconds to decompress, so performance is
1810 # important here.
1963 # important here.
1811 #
1964 #
1812 # We can make a few assumptions about revlogs:
1965 # We can make a few assumptions about revlogs:
1813 #
1966 #
1814 # 1) the majority of chunks will be compressed (as opposed to inline
1967 # 1) the majority of chunks will be compressed (as opposed to inline
1815 # raw data).
1968 # raw data).
1816 # 2) decompressing *any* data will likely by at least 10x slower than
1969 # 2) decompressing *any* data will likely by at least 10x slower than
1817 # returning raw inline data.
1970 # returning raw inline data.
1818 # 3) we want to prioritize common and officially supported compression
1971 # 3) we want to prioritize common and officially supported compression
1819 # engines
1972 # engines
1820 #
1973 #
1821 # It follows that we want to optimize for "decompress compressed data
1974 # It follows that we want to optimize for "decompress compressed data
1822 # when encoded with common and officially supported compression engines"
1975 # when encoded with common and officially supported compression engines"
1823 # case over "raw data" and "data encoded by less common or non-official
1976 # case over "raw data" and "data encoded by less common or non-official
1824 # compression engines." That is why we have the inline lookup first
1977 # compression engines." That is why we have the inline lookup first
1825 # followed by the compengines lookup.
1978 # followed by the compengines lookup.
1826 #
1979 #
1827 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1828 # compressed chunks. And this matters for changelog and manifest reads.
1981 # compressed chunks. And this matters for changelog and manifest reads.
1829 t = data[0:1]
1982 t = data[0:1]
1830
1983
1831 if t == 'x':
1984 if t == 'x':
1832 try:
1985 try:
1833 return _zlibdecompress(data)
1986 return _zlibdecompress(data)
1834 except zlib.error as e:
1987 except zlib.error as e:
1835 raise RevlogError(_('revlog decompress error: %s') % str(e))
1988 raise RevlogError(_('revlog decompress error: %s') % str(e))
1836 # '\0' is more common than 'u' so it goes first.
1989 # '\0' is more common than 'u' so it goes first.
1837 elif t == '\0':
1990 elif t == '\0':
1838 return data
1991 return data
1839 elif t == 'u':
1992 elif t == 'u':
1840 return util.buffer(data, 1)
1993 return util.buffer(data, 1)
1841
1994
1842 try:
1995 try:
1843 compressor = self._decompressors[t]
1996 compressor = self._decompressors[t]
1844 except KeyError:
1997 except KeyError:
1845 try:
1998 try:
1846 engine = util.compengines.forrevlogheader(t)
1999 engine = util.compengines.forrevlogheader(t)
1847 compressor = engine.revlogcompressor()
2000 compressor = engine.revlogcompressor()
1848 self._decompressors[t] = compressor
2001 self._decompressors[t] = compressor
1849 except KeyError:
2002 except KeyError:
1850 raise RevlogError(_('unknown compression type %r') % t)
2003 raise RevlogError(_('unknown compression type %r') % t)
1851
2004
1852 return compressor.decompress(data)
2005 return compressor.decompress(data)
1853
2006
1854 def _isgooddeltainfo(self, d, textlen):
2007 def _isgooddeltainfo(self, d, textlen):
1855 """Returns True if the given delta is good. Good means that it is within
2008 """Returns True if the given delta is good. Good means that it is within
1856 the disk span, disk size, and chain length bounds that we know to be
2009 the disk span, disk size, and chain length bounds that we know to be
1857 performant."""
2010 performant."""
1858 if d is None:
2011 if d is None:
1859 return False
2012 return False
1860
2013
1861 # - 'd.distance' is the distance from the base revision -- bounding it
2014 # - 'd.distance' is the distance from the base revision -- bounding it
1862 # limits the amount of I/O we need to do.
2015 # limits the amount of I/O we need to do.
1863 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2016 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
1864 # need to apply -- bounding it limits the amount of CPU we consume.
2017 # need to apply -- bounding it limits the amount of CPU we consume.
1865
2018
1866 defaultmax = textlen * 4
2019 defaultmax = textlen * 4
1867 maxdist = self._maxdeltachainspan
2020 maxdist = self._maxdeltachainspan
1868 if not maxdist:
2021 if not maxdist:
1869 maxdist = d.distance # ensure the conditional pass
2022 maxdist = d.distance # ensure the conditional pass
1870 maxdist = max(maxdist, defaultmax)
2023 maxdist = max(maxdist, defaultmax)
1871 if (d.distance > maxdist or d.deltalen > textlen or
2024 if (d.distance > maxdist or d.deltalen > textlen or
1872 d.compresseddeltalen > textlen * 2 or
2025 d.compresseddeltalen > textlen * 2 or
1873 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2026 (self._maxchainlen and d.chainlen > self._maxchainlen)):
1874 return False
2027 return False
1875
2028
1876 return True
2029 return True
1877
2030
1878 def _getcandidaterevs(self, p1, p2, cachedelta):
1879 """
1880 Provides revisions that present an interest to be diffed against,
1881 grouped by level of easiness.
1882 """
1883 curr = len(self)
1884 prev = curr - 1
1885 p1r, p2r = self.rev(p1), self.rev(p2)
1886
1887 # should we try to build a delta?
1888 if prev != nullrev and self.storedeltachains:
1889 tested = set()
1890 # This condition is true most of the time when processing
1891 # changegroup data into a generaldelta repo. The only time it
1892 # isn't true is if this is the first revision in a delta chain
1893 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1894 if cachedelta and self._generaldelta and self._lazydeltabase:
1895 # Assume what we received from the server is a good choice
1896 # build delta will reuse the cache
1897 yield (cachedelta[0],)
1898 tested.add(cachedelta[0])
1899
1900 if self._generaldelta:
1901 # exclude already lazy tested base if any
1902 parents = [p for p in (p1r, p2r)
1903 if p != nullrev and p not in tested]
1904 if parents and not self._aggressivemergedeltas:
1905 # Pick whichever parent is closer to us (to minimize the
1906 # chance of having to build a fulltext).
1907 parents = [max(parents)]
1908 tested.update(parents)
1909 yield parents
1910
1911 if prev not in tested:
1912 # other approach failed try against prev to hopefully save us a
1913 # fulltext.
1914 yield (prev,)
1915
1916 def _buildtext(self, revinfo, fh):
1917 """Builds a fulltext version of a revision
1918
1919 revinfo: _revisioninfo instance that contains all needed info
1920 fh: file handle to either the .i or the .d revlog file,
1921 depending on whether it is inlined or not
1922 """
1923 btext = revinfo.btext
1924 if btext[0] is not None:
1925 return btext[0]
1926
1927 cachedelta = revinfo.cachedelta
1928 flags = revinfo.flags
1929 node = revinfo.node
1930
1931 baserev = cachedelta[0]
1932 delta = cachedelta[1]
1933 # special case deltas which replace entire base; no need to decode
1934 # base revision. this neatly avoids censored bases, which throw when
1935 # they're decoded.
1936 hlen = struct.calcsize(">lll")
1937 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1938 len(delta) - hlen):
1939 btext[0] = delta[hlen:]
1940 else:
1941 basetext = self.revision(baserev, _df=fh, raw=True)
1942 btext[0] = mdiff.patch(basetext, delta)
1943
1944 try:
1945 res = self._processflags(btext[0], flags, 'read', raw=True)
1946 btext[0], validatehash = res
1947 if validatehash:
1948 self.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
1949 if flags & REVIDX_ISCENSORED:
1950 raise RevlogError(_('node %s is not censored') % node)
1951 except CensoredNodeError:
1952 # must pass the censored index flag to add censored revisions
1953 if not flags & REVIDX_ISCENSORED:
1954 raise
1955 return btext[0]
1956
1957 def _builddeltadiff(self, base, revinfo, fh):
1958 t = self._buildtext(revinfo, fh)
1959 if self.iscensored(base):
1960 # deltas based on a censored revision must replace the
1961 # full content in one patch, so delta works everywhere
1962 header = mdiff.replacediffheader(self.rawsize(base), len(t))
1963 delta = header + t
1964 else:
1965 ptext = self.revision(base, _df=fh, raw=True)
1966 delta = mdiff.textdiff(ptext, t)
1967
1968 return delta
1969
1970 def _builddeltainfo(self, revinfo, base, fh):
1971 # can we use the cached delta?
1972 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
1973 delta = revinfo.cachedelta[1]
1974 else:
1975 delta = self._builddeltadiff(base, revinfo, fh)
1976 header, data = self.compress(delta)
1977 deltalen = len(header) + len(data)
1978 chainbase = self.chainbase(base)
1979 offset = self.end(len(self) - 1)
1980 dist = deltalen + offset - self.start(chainbase)
1981 if self._generaldelta:
1982 deltabase = base
1983 else:
1984 deltabase = chainbase
1985 chainlen, compresseddeltalen = self._chaininfo(base)
1986 chainlen += 1
1987 compresseddeltalen += deltalen
1988 return _deltainfo(dist, deltalen, (header, data), deltabase,
1989 chainbase, chainlen, compresseddeltalen)
1990
1991 def _finddeltainfo(self, revinfo, fh):
1992 """Find an acceptable delta against a candidate revision
1993
1994 revinfo: information about the revision (instance of _revisioninfo)
1995 fh: file handle to either the .i or the .d revlog file,
1996 depending on whether it is inlined or not
1997
1998 Returns the first acceptable candidate revision, as ordered by
1999 _getcandidaterevs
2000 """
2001 cachedelta = revinfo.cachedelta
2002 p1 = revinfo.p1
2003 p2 = revinfo.p2
2004
2005 deltainfo = None
2006 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
2007 nominateddeltas = []
2008 for candidaterev in candidaterevs:
2009 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
2010 if self._isgooddeltainfo(candidatedelta, revinfo.textlen):
2011 nominateddeltas.append(candidatedelta)
2012 if nominateddeltas:
2013 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
2014 break
2015
2016 return deltainfo
2017
2018 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2019 cachedelta, ifh, dfh, alwayscache=False):
2032 cachedelta, ifh, dfh, alwayscache=False,
2033 deltacomputer=None):
2020 """internal function to add revisions to the log
2034 """internal function to add revisions to the log
2021
2035
2022 see addrevision for argument descriptions.
2036 see addrevision for argument descriptions.
2023
2037
2024 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2025
2039
2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2041 be used.
2042
2026 invariants:
2043 invariants:
2027 - rawtext is optional (can be None); if not set, cachedelta must be set.
2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2028 if both are set, they must correspond to each other.
2045 if both are set, they must correspond to each other.
2029 """
2046 """
2030 if node == nullid:
2047 if node == nullid:
2031 raise RevlogError(_("%s: attempt to add null revision") %
2048 raise RevlogError(_("%s: attempt to add null revision") %
2032 (self.indexfile))
2049 (self.indexfile))
2033 if node == wdirid:
2050 if node == wdirid:
2034 raise RevlogError(_("%s: attempt to add wdir revision") %
2051 raise RevlogError(_("%s: attempt to add wdir revision") %
2035 (self.indexfile))
2052 (self.indexfile))
2036
2053
2037 if self._inline:
2054 if self._inline:
2038 fh = ifh
2055 fh = ifh
2039 else:
2056 else:
2040 fh = dfh
2057 fh = dfh
2041
2058
2042 btext = [rawtext]
2059 btext = [rawtext]
2043
2060
2044 curr = len(self)
2061 curr = len(self)
2045 prev = curr - 1
2062 prev = curr - 1
2046 offset = self.end(prev)
2063 offset = self.end(prev)
2047 p1r, p2r = self.rev(p1), self.rev(p2)
2064 p1r, p2r = self.rev(p1), self.rev(p2)
2048
2065
2049 # full versions are inserted when the needed deltas
2066 # full versions are inserted when the needed deltas
2050 # become comparable to the uncompressed text
2067 # become comparable to the uncompressed text
2051 if rawtext is None:
2068 if rawtext is None:
2052 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2069 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2053 cachedelta[1])
2070 cachedelta[1])
2054 else:
2071 else:
2055 textlen = len(rawtext)
2072 textlen = len(rawtext)
2056
2073
2074 if deltacomputer is None:
2075 deltacomputer = _deltacomputer(self)
2076
2057 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2058 deltainfo = self._finddeltainfo(revinfo, fh)
2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2059
2079
2060 if deltainfo is not None:
2080 if deltainfo is not None:
2061 base = deltainfo.base
2081 base = deltainfo.base
2062 chainbase = deltainfo.chainbase
2082 chainbase = deltainfo.chainbase
2063 data = deltainfo.data
2083 data = deltainfo.data
2064 l = deltainfo.deltalen
2084 l = deltainfo.deltalen
2065 else:
2085 else:
2066 rawtext = self._buildtext(revinfo, fh)
2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2067 data = self.compress(rawtext)
2087 data = self.compress(rawtext)
2068 l = len(data[1]) + len(data[0])
2088 l = len(data[1]) + len(data[0])
2069 base = chainbase = curr
2089 base = chainbase = curr
2070
2090
2071 e = (offset_type(offset, flags), l, textlen,
2091 e = (offset_type(offset, flags), l, textlen,
2072 base, link, p1r, p2r, node)
2092 base, link, p1r, p2r, node)
2073 self.index.insert(-1, e)
2093 self.index.insert(-1, e)
2074 self.nodemap[node] = curr
2094 self.nodemap[node] = curr
2075
2095
2076 entry = self._io.packentry(e, self.node, self.version, curr)
2096 entry = self._io.packentry(e, self.node, self.version, curr)
2077 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2078
2098
2079 if alwayscache and rawtext is None:
2099 if alwayscache and rawtext is None:
2080 rawtext = self._buildtext(revinfo, fh)
2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2081
2101
2082 if type(rawtext) == str: # only accept immutable objects
2102 if type(rawtext) == str: # only accept immutable objects
2083 self._cache = (node, curr, rawtext)
2103 self._cache = (node, curr, rawtext)
2084 self._chainbasecache[curr] = chainbase
2104 self._chainbasecache[curr] = chainbase
2085 return node
2105 return node
2086
2106
2087 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2107 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2088 # Files opened in a+ mode have inconsistent behavior on various
2108 # Files opened in a+ mode have inconsistent behavior on various
2089 # platforms. Windows requires that a file positioning call be made
2109 # platforms. Windows requires that a file positioning call be made
2090 # when the file handle transitions between reads and writes. See
2110 # when the file handle transitions between reads and writes. See
2091 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2111 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2092 # platforms, Python or the platform itself can be buggy. Some versions
2112 # platforms, Python or the platform itself can be buggy. Some versions
2093 # of Solaris have been observed to not append at the end of the file
2113 # of Solaris have been observed to not append at the end of the file
2094 # if the file was seeked to before the end. See issue4943 for more.
2114 # if the file was seeked to before the end. See issue4943 for more.
2095 #
2115 #
2096 # We work around this issue by inserting a seek() before writing.
2116 # We work around this issue by inserting a seek() before writing.
2097 # Note: This is likely not necessary on Python 3.
2117 # Note: This is likely not necessary on Python 3.
2098 ifh.seek(0, os.SEEK_END)
2118 ifh.seek(0, os.SEEK_END)
2099 if dfh:
2119 if dfh:
2100 dfh.seek(0, os.SEEK_END)
2120 dfh.seek(0, os.SEEK_END)
2101
2121
2102 curr = len(self) - 1
2122 curr = len(self) - 1
2103 if not self._inline:
2123 if not self._inline:
2104 transaction.add(self.datafile, offset)
2124 transaction.add(self.datafile, offset)
2105 transaction.add(self.indexfile, curr * len(entry))
2125 transaction.add(self.indexfile, curr * len(entry))
2106 if data[0]:
2126 if data[0]:
2107 dfh.write(data[0])
2127 dfh.write(data[0])
2108 dfh.write(data[1])
2128 dfh.write(data[1])
2109 ifh.write(entry)
2129 ifh.write(entry)
2110 else:
2130 else:
2111 offset += curr * self._io.size
2131 offset += curr * self._io.size
2112 transaction.add(self.indexfile, offset, curr)
2132 transaction.add(self.indexfile, offset, curr)
2113 ifh.write(entry)
2133 ifh.write(entry)
2114 ifh.write(data[0])
2134 ifh.write(data[0])
2115 ifh.write(data[1])
2135 ifh.write(data[1])
2116 self.checkinlinesize(transaction, ifh)
2136 self.checkinlinesize(transaction, ifh)
2117
2137
2118 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2138 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2119 """
2139 """
2120 add a delta group
2140 add a delta group
2121
2141
2122 given a set of deltas, add them to the revision log. the
2142 given a set of deltas, add them to the revision log. the
2123 first delta is against its parent, which should be in our
2143 first delta is against its parent, which should be in our
2124 log, the rest are against the previous delta.
2144 log, the rest are against the previous delta.
2125
2145
2126 If ``addrevisioncb`` is defined, it will be called with arguments of
2146 If ``addrevisioncb`` is defined, it will be called with arguments of
2127 this revlog and the node that was added.
2147 this revlog and the node that was added.
2128 """
2148 """
2129
2149
2130 nodes = []
2150 nodes = []
2131
2151
2132 r = len(self)
2152 r = len(self)
2133 end = 0
2153 end = 0
2134 if r:
2154 if r:
2135 end = self.end(r - 1)
2155 end = self.end(r - 1)
2136 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2156 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2137 isize = r * self._io.size
2157 isize = r * self._io.size
2138 if self._inline:
2158 if self._inline:
2139 transaction.add(self.indexfile, end + isize, r)
2159 transaction.add(self.indexfile, end + isize, r)
2140 dfh = None
2160 dfh = None
2141 else:
2161 else:
2142 transaction.add(self.indexfile, isize, r)
2162 transaction.add(self.indexfile, isize, r)
2143 transaction.add(self.datafile, end)
2163 transaction.add(self.datafile, end)
2144 dfh = self.opener(self.datafile, "a+")
2164 dfh = self.opener(self.datafile, "a+")
2145 def flush():
2165 def flush():
2146 if dfh:
2166 if dfh:
2147 dfh.flush()
2167 dfh.flush()
2148 ifh.flush()
2168 ifh.flush()
2149 try:
2169 try:
2170 deltacomputer = _deltacomputer(self)
2150 # loop through our set of deltas
2171 # loop through our set of deltas
2151 for data in deltas:
2172 for data in deltas:
2152 node, p1, p2, linknode, deltabase, delta, flags = data
2173 node, p1, p2, linknode, deltabase, delta, flags = data
2153 link = linkmapper(linknode)
2174 link = linkmapper(linknode)
2154 flags = flags or REVIDX_DEFAULT_FLAGS
2175 flags = flags or REVIDX_DEFAULT_FLAGS
2155
2176
2156 nodes.append(node)
2177 nodes.append(node)
2157
2178
2158 if node in self.nodemap:
2179 if node in self.nodemap:
2159 # this can happen if two branches make the same change
2180 # this can happen if two branches make the same change
2160 continue
2181 continue
2161
2182
2162 for p in (p1, p2):
2183 for p in (p1, p2):
2163 if p not in self.nodemap:
2184 if p not in self.nodemap:
2164 raise LookupError(p, self.indexfile,
2185 raise LookupError(p, self.indexfile,
2165 _('unknown parent'))
2186 _('unknown parent'))
2166
2187
2167 if deltabase not in self.nodemap:
2188 if deltabase not in self.nodemap:
2168 raise LookupError(deltabase, self.indexfile,
2189 raise LookupError(deltabase, self.indexfile,
2169 _('unknown delta base'))
2190 _('unknown delta base'))
2170
2191
2171 baserev = self.rev(deltabase)
2192 baserev = self.rev(deltabase)
2172
2193
2173 if baserev != nullrev and self.iscensored(baserev):
2194 if baserev != nullrev and self.iscensored(baserev):
2174 # if base is censored, delta must be full replacement in a
2195 # if base is censored, delta must be full replacement in a
2175 # single patch operation
2196 # single patch operation
2176 hlen = struct.calcsize(">lll")
2197 hlen = struct.calcsize(">lll")
2177 oldlen = self.rawsize(baserev)
2198 oldlen = self.rawsize(baserev)
2178 newlen = len(delta) - hlen
2199 newlen = len(delta) - hlen
2179 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2200 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2180 raise error.CensoredBaseError(self.indexfile,
2201 raise error.CensoredBaseError(self.indexfile,
2181 self.node(baserev))
2202 self.node(baserev))
2182
2203
2183 if not flags and self._peek_iscensored(baserev, delta, flush):
2204 if not flags and self._peek_iscensored(baserev, delta, flush):
2184 flags |= REVIDX_ISCENSORED
2205 flags |= REVIDX_ISCENSORED
2185
2206
2186 # We assume consumers of addrevisioncb will want to retrieve
2207 # We assume consumers of addrevisioncb will want to retrieve
2187 # the added revision, which will require a call to
2208 # the added revision, which will require a call to
2188 # revision(). revision() will fast path if there is a cache
2209 # revision(). revision() will fast path if there is a cache
2189 # hit. So, we tell _addrevision() to always cache in this case.
2210 # hit. So, we tell _addrevision() to always cache in this case.
2190 # We're only using addgroup() in the context of changegroup
2211 # We're only using addgroup() in the context of changegroup
2191 # generation so the revision data can always be handled as raw
2212 # generation so the revision data can always be handled as raw
2192 # by the flagprocessor.
2213 # by the flagprocessor.
2193 self._addrevision(node, None, transaction, link,
2214 self._addrevision(node, None, transaction, link,
2194 p1, p2, flags, (baserev, delta),
2215 p1, p2, flags, (baserev, delta),
2195 ifh, dfh,
2216 ifh, dfh,
2196 alwayscache=bool(addrevisioncb))
2217 alwayscache=bool(addrevisioncb),
2218 deltacomputer=deltacomputer)
2197
2219
2198 if addrevisioncb:
2220 if addrevisioncb:
2199 addrevisioncb(self, node)
2221 addrevisioncb(self, node)
2200
2222
2201 if not dfh and not self._inline:
2223 if not dfh and not self._inline:
2202 # addrevision switched from inline to conventional
2224 # addrevision switched from inline to conventional
2203 # reopen the index
2225 # reopen the index
2204 ifh.close()
2226 ifh.close()
2205 dfh = self.opener(self.datafile, "a+")
2227 dfh = self.opener(self.datafile, "a+")
2206 ifh = self.opener(self.indexfile, "a+",
2228 ifh = self.opener(self.indexfile, "a+",
2207 checkambig=self._checkambig)
2229 checkambig=self._checkambig)
2208 finally:
2230 finally:
2209 if dfh:
2231 if dfh:
2210 dfh.close()
2232 dfh.close()
2211 ifh.close()
2233 ifh.close()
2212
2234
2213 return nodes
2235 return nodes
2214
2236
2215 def iscensored(self, rev):
2237 def iscensored(self, rev):
2216 """Check if a file revision is censored."""
2238 """Check if a file revision is censored."""
2217 return False
2239 return False
2218
2240
2219 def _peek_iscensored(self, baserev, delta, flush):
2241 def _peek_iscensored(self, baserev, delta, flush):
2220 """Quickly check if a delta produces a censored revision."""
2242 """Quickly check if a delta produces a censored revision."""
2221 return False
2243 return False
2222
2244
2223 def getstrippoint(self, minlink):
2245 def getstrippoint(self, minlink):
2224 """find the minimum rev that must be stripped to strip the linkrev
2246 """find the minimum rev that must be stripped to strip the linkrev
2225
2247
2226 Returns a tuple containing the minimum rev and a set of all revs that
2248 Returns a tuple containing the minimum rev and a set of all revs that
2227 have linkrevs that will be broken by this strip.
2249 have linkrevs that will be broken by this strip.
2228 """
2250 """
2229 brokenrevs = set()
2251 brokenrevs = set()
2230 strippoint = len(self)
2252 strippoint = len(self)
2231
2253
2232 heads = {}
2254 heads = {}
2233 futurelargelinkrevs = set()
2255 futurelargelinkrevs = set()
2234 for head in self.headrevs():
2256 for head in self.headrevs():
2235 headlinkrev = self.linkrev(head)
2257 headlinkrev = self.linkrev(head)
2236 heads[head] = headlinkrev
2258 heads[head] = headlinkrev
2237 if headlinkrev >= minlink:
2259 if headlinkrev >= minlink:
2238 futurelargelinkrevs.add(headlinkrev)
2260 futurelargelinkrevs.add(headlinkrev)
2239
2261
2240 # This algorithm involves walking down the rev graph, starting at the
2262 # This algorithm involves walking down the rev graph, starting at the
2241 # heads. Since the revs are topologically sorted according to linkrev,
2263 # heads. Since the revs are topologically sorted according to linkrev,
2242 # once all head linkrevs are below the minlink, we know there are
2264 # once all head linkrevs are below the minlink, we know there are
2243 # no more revs that could have a linkrev greater than minlink.
2265 # no more revs that could have a linkrev greater than minlink.
2244 # So we can stop walking.
2266 # So we can stop walking.
2245 while futurelargelinkrevs:
2267 while futurelargelinkrevs:
2246 strippoint -= 1
2268 strippoint -= 1
2247 linkrev = heads.pop(strippoint)
2269 linkrev = heads.pop(strippoint)
2248
2270
2249 if linkrev < minlink:
2271 if linkrev < minlink:
2250 brokenrevs.add(strippoint)
2272 brokenrevs.add(strippoint)
2251 else:
2273 else:
2252 futurelargelinkrevs.remove(linkrev)
2274 futurelargelinkrevs.remove(linkrev)
2253
2275
2254 for p in self.parentrevs(strippoint):
2276 for p in self.parentrevs(strippoint):
2255 if p != nullrev:
2277 if p != nullrev:
2256 plinkrev = self.linkrev(p)
2278 plinkrev = self.linkrev(p)
2257 heads[p] = plinkrev
2279 heads[p] = plinkrev
2258 if plinkrev >= minlink:
2280 if plinkrev >= minlink:
2259 futurelargelinkrevs.add(plinkrev)
2281 futurelargelinkrevs.add(plinkrev)
2260
2282
2261 return strippoint, brokenrevs
2283 return strippoint, brokenrevs
2262
2284
2263 def strip(self, minlink, transaction):
2285 def strip(self, minlink, transaction):
2264 """truncate the revlog on the first revision with a linkrev >= minlink
2286 """truncate the revlog on the first revision with a linkrev >= minlink
2265
2287
2266 This function is called when we're stripping revision minlink and
2288 This function is called when we're stripping revision minlink and
2267 its descendants from the repository.
2289 its descendants from the repository.
2268
2290
2269 We have to remove all revisions with linkrev >= minlink, because
2291 We have to remove all revisions with linkrev >= minlink, because
2270 the equivalent changelog revisions will be renumbered after the
2292 the equivalent changelog revisions will be renumbered after the
2271 strip.
2293 strip.
2272
2294
2273 So we truncate the revlog on the first of these revisions, and
2295 So we truncate the revlog on the first of these revisions, and
2274 trust that the caller has saved the revisions that shouldn't be
2296 trust that the caller has saved the revisions that shouldn't be
2275 removed and that it'll re-add them after this truncation.
2297 removed and that it'll re-add them after this truncation.
2276 """
2298 """
2277 if len(self) == 0:
2299 if len(self) == 0:
2278 return
2300 return
2279
2301
2280 rev, _ = self.getstrippoint(minlink)
2302 rev, _ = self.getstrippoint(minlink)
2281 if rev == len(self):
2303 if rev == len(self):
2282 return
2304 return
2283
2305
2284 # first truncate the files on disk
2306 # first truncate the files on disk
2285 end = self.start(rev)
2307 end = self.start(rev)
2286 if not self._inline:
2308 if not self._inline:
2287 transaction.add(self.datafile, end)
2309 transaction.add(self.datafile, end)
2288 end = rev * self._io.size
2310 end = rev * self._io.size
2289 else:
2311 else:
2290 end += rev * self._io.size
2312 end += rev * self._io.size
2291
2313
2292 transaction.add(self.indexfile, end)
2314 transaction.add(self.indexfile, end)
2293
2315
2294 # then reset internal state in memory to forget those revisions
2316 # then reset internal state in memory to forget those revisions
2295 self._cache = None
2317 self._cache = None
2296 self._chaininfocache = {}
2318 self._chaininfocache = {}
2297 self._chunkclear()
2319 self._chunkclear()
2298 for x in xrange(rev, len(self)):
2320 for x in xrange(rev, len(self)):
2299 del self.nodemap[self.node(x)]
2321 del self.nodemap[self.node(x)]
2300
2322
2301 del self.index[rev:-1]
2323 del self.index[rev:-1]
2302
2324
2303 def checksize(self):
2325 def checksize(self):
2304 expected = 0
2326 expected = 0
2305 if len(self):
2327 if len(self):
2306 expected = max(0, self.end(len(self) - 1))
2328 expected = max(0, self.end(len(self) - 1))
2307
2329
2308 try:
2330 try:
2309 f = self.opener(self.datafile)
2331 f = self.opener(self.datafile)
2310 f.seek(0, 2)
2332 f.seek(0, 2)
2311 actual = f.tell()
2333 actual = f.tell()
2312 f.close()
2334 f.close()
2313 dd = actual - expected
2335 dd = actual - expected
2314 except IOError as inst:
2336 except IOError as inst:
2315 if inst.errno != errno.ENOENT:
2337 if inst.errno != errno.ENOENT:
2316 raise
2338 raise
2317 dd = 0
2339 dd = 0
2318
2340
2319 try:
2341 try:
2320 f = self.opener(self.indexfile)
2342 f = self.opener(self.indexfile)
2321 f.seek(0, 2)
2343 f.seek(0, 2)
2322 actual = f.tell()
2344 actual = f.tell()
2323 f.close()
2345 f.close()
2324 s = self._io.size
2346 s = self._io.size
2325 i = max(0, actual // s)
2347 i = max(0, actual // s)
2326 di = actual - (i * s)
2348 di = actual - (i * s)
2327 if self._inline:
2349 if self._inline:
2328 databytes = 0
2350 databytes = 0
2329 for r in self:
2351 for r in self:
2330 databytes += max(0, self.length(r))
2352 databytes += max(0, self.length(r))
2331 dd = 0
2353 dd = 0
2332 di = actual - len(self) * s - databytes
2354 di = actual - len(self) * s - databytes
2333 except IOError as inst:
2355 except IOError as inst:
2334 if inst.errno != errno.ENOENT:
2356 if inst.errno != errno.ENOENT:
2335 raise
2357 raise
2336 di = 0
2358 di = 0
2337
2359
2338 return (dd, di)
2360 return (dd, di)
2339
2361
2340 def files(self):
2362 def files(self):
2341 res = [self.indexfile]
2363 res = [self.indexfile]
2342 if not self._inline:
2364 if not self._inline:
2343 res.append(self.datafile)
2365 res.append(self.datafile)
2344 return res
2366 return res
2345
2367
2346 DELTAREUSEALWAYS = 'always'
2368 DELTAREUSEALWAYS = 'always'
2347 DELTAREUSESAMEREVS = 'samerevs'
2369 DELTAREUSESAMEREVS = 'samerevs'
2348 DELTAREUSENEVER = 'never'
2370 DELTAREUSENEVER = 'never'
2349
2371
2350 DELTAREUSEFULLADD = 'fulladd'
2372 DELTAREUSEFULLADD = 'fulladd'
2351
2373
2352 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2353
2375
2354 def clone(self, tr, destrevlog, addrevisioncb=None,
2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2355 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2377 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2356 """Copy this revlog to another, possibly with format changes.
2378 """Copy this revlog to another, possibly with format changes.
2357
2379
2358 The destination revlog will contain the same revisions and nodes.
2380 The destination revlog will contain the same revisions and nodes.
2359 However, it may not be bit-for-bit identical due to e.g. delta encoding
2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2360 differences.
2382 differences.
2361
2383
2362 The ``deltareuse`` argument control how deltas from the existing revlog
2384 The ``deltareuse`` argument control how deltas from the existing revlog
2363 are preserved in the destination revlog. The argument can have the
2385 are preserved in the destination revlog. The argument can have the
2364 following values:
2386 following values:
2365
2387
2366 DELTAREUSEALWAYS
2388 DELTAREUSEALWAYS
2367 Deltas will always be reused (if possible), even if the destination
2389 Deltas will always be reused (if possible), even if the destination
2368 revlog would not select the same revisions for the delta. This is the
2390 revlog would not select the same revisions for the delta. This is the
2369 fastest mode of operation.
2391 fastest mode of operation.
2370 DELTAREUSESAMEREVS
2392 DELTAREUSESAMEREVS
2371 Deltas will be reused if the destination revlog would pick the same
2393 Deltas will be reused if the destination revlog would pick the same
2372 revisions for the delta. This mode strikes a balance between speed
2394 revisions for the delta. This mode strikes a balance between speed
2373 and optimization.
2395 and optimization.
2374 DELTAREUSENEVER
2396 DELTAREUSENEVER
2375 Deltas will never be reused. This is the slowest mode of execution.
2397 Deltas will never be reused. This is the slowest mode of execution.
2376 This mode can be used to recompute deltas (e.g. if the diff/delta
2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2377 algorithm changes).
2399 algorithm changes).
2378
2400
2379 Delta computation can be slow, so the choice of delta reuse policy can
2401 Delta computation can be slow, so the choice of delta reuse policy can
2380 significantly affect run time.
2402 significantly affect run time.
2381
2403
2382 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2383 two extremes. Deltas will be reused if they are appropriate. But if the
2405 two extremes. Deltas will be reused if they are appropriate. But if the
2384 delta could choose a better revision, it will do so. This means if you
2406 delta could choose a better revision, it will do so. This means if you
2385 are converting a non-generaldelta revlog to a generaldelta revlog,
2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2386 deltas will be recomputed if the delta's parent isn't a parent of the
2408 deltas will be recomputed if the delta's parent isn't a parent of the
2387 revision.
2409 revision.
2388
2410
2389 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2411 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2390 controls whether to compute deltas against both parents for merges.
2412 controls whether to compute deltas against both parents for merges.
2391 By default, the current default is used.
2413 By default, the current default is used.
2392 """
2414 """
2393 if deltareuse not in self.DELTAREUSEALL:
2415 if deltareuse not in self.DELTAREUSEALL:
2394 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2395
2417
2396 if len(destrevlog):
2418 if len(destrevlog):
2397 raise ValueError(_('destination revlog is not empty'))
2419 raise ValueError(_('destination revlog is not empty'))
2398
2420
2399 if getattr(self, 'filteredrevs', None):
2421 if getattr(self, 'filteredrevs', None):
2400 raise ValueError(_('source revlog has filtered revisions'))
2422 raise ValueError(_('source revlog has filtered revisions'))
2401 if getattr(destrevlog, 'filteredrevs', None):
2423 if getattr(destrevlog, 'filteredrevs', None):
2402 raise ValueError(_('destination revlog has filtered revisions'))
2424 raise ValueError(_('destination revlog has filtered revisions'))
2403
2425
2404 # lazydeltabase controls whether to reuse a cached delta, if possible.
2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2405 oldlazydeltabase = destrevlog._lazydeltabase
2427 oldlazydeltabase = destrevlog._lazydeltabase
2406 oldamd = destrevlog._aggressivemergedeltas
2428 oldamd = destrevlog._aggressivemergedeltas
2407
2429
2408 try:
2430 try:
2409 if deltareuse == self.DELTAREUSEALWAYS:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2410 destrevlog._lazydeltabase = True
2432 destrevlog._lazydeltabase = True
2411 elif deltareuse == self.DELTAREUSESAMEREVS:
2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2412 destrevlog._lazydeltabase = False
2434 destrevlog._lazydeltabase = False
2413
2435
2414 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2436 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2415
2437
2416 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2417 self.DELTAREUSESAMEREVS)
2439 self.DELTAREUSESAMEREVS)
2418
2440
2441 deltacomputer = _deltacomputer(destrevlog)
2419 index = self.index
2442 index = self.index
2420 for rev in self:
2443 for rev in self:
2421 entry = index[rev]
2444 entry = index[rev]
2422
2445
2423 # Some classes override linkrev to take filtered revs into
2446 # Some classes override linkrev to take filtered revs into
2424 # account. Use raw entry from index.
2447 # account. Use raw entry from index.
2425 flags = entry[0] & 0xffff
2448 flags = entry[0] & 0xffff
2426 linkrev = entry[4]
2449 linkrev = entry[4]
2427 p1 = index[entry[5]][7]
2450 p1 = index[entry[5]][7]
2428 p2 = index[entry[6]][7]
2451 p2 = index[entry[6]][7]
2429 node = entry[7]
2452 node = entry[7]
2430
2453
2431 # (Possibly) reuse the delta from the revlog if allowed and
2454 # (Possibly) reuse the delta from the revlog if allowed and
2432 # the revlog chunk is a delta.
2455 # the revlog chunk is a delta.
2433 cachedelta = None
2456 cachedelta = None
2434 rawtext = None
2457 rawtext = None
2435 if populatecachedelta:
2458 if populatecachedelta:
2436 dp = self.deltaparent(rev)
2459 dp = self.deltaparent(rev)
2437 if dp != nullrev:
2460 if dp != nullrev:
2438 cachedelta = (dp, str(self._chunk(rev)))
2461 cachedelta = (dp, str(self._chunk(rev)))
2439
2462
2440 if not cachedelta:
2463 if not cachedelta:
2441 rawtext = self.revision(rev, raw=True)
2464 rawtext = self.revision(rev, raw=True)
2442
2465
2443
2466
2444 if deltareuse == self.DELTAREUSEFULLADD:
2467 if deltareuse == self.DELTAREUSEFULLADD:
2445 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2446 cachedelta=cachedelta,
2469 cachedelta=cachedelta,
2447 node=node, flags=flags)
2470 node=node, flags=flags,
2471 deltacomputer=deltacomputer)
2448 else:
2472 else:
2449 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2450 checkambig=False)
2474 checkambig=False)
2451 dfh = None
2475 dfh = None
2452 if not destrevlog._inline:
2476 if not destrevlog._inline:
2453 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2454 try:
2478 try:
2455 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2456 p2, flags, cachedelta, ifh, dfh)
2480 p2, flags, cachedelta, ifh, dfh,
2481 deltacomputer=deltacomputer)
2457 finally:
2482 finally:
2458 if dfh:
2483 if dfh:
2459 dfh.close()
2484 dfh.close()
2460 ifh.close()
2485 ifh.close()
2461
2486
2462 if addrevisioncb:
2487 if addrevisioncb:
2463 addrevisioncb(self, rev, node)
2488 addrevisioncb(self, rev, node)
2464 finally:
2489 finally:
2465 destrevlog._lazydeltabase = oldlazydeltabase
2490 destrevlog._lazydeltabase = oldlazydeltabase
2466 destrevlog._aggressivemergedeltas = oldamd
2491 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now