upstream/mercurial-mirror Commit - r39366:655b5b46

revlog: split functionality related to deltas computation in a new module...

Boris Feld -

r39366:655b5b46 default

parent child

mercurial/debugcommands.py

0 +5 -1

                  stringutil,
              )
+             from .revlogutils import (
+                 deltas as deltautil
+             )
              release = lockmod.release
              command = registrar.command()
                          largestblock = 0
                          srchunks = 0
-                         for revschunk in revlog._slicechunk(r, chain):
+                         for revschunk in deltautil.slicechunk(r, chain):
                              srchunks += 1
                              blkend = start(revschunk[-1]) + length(revschunk[-1])
                              blksize = blkend - start(revschunk[0])

mercurial/revlog.py

0 +9 -700

This diff has been collapsed as it changes many lines, (709 lines changed) Show them Hide them
			@@ -17,7 +17,6 b' import collections'
	17	17	import contextlib
	18	18	import errno
	19	19	import hashlib
	20		import heapq
	21	20	import os
	22	21	import re
	23	22	import struct
			@@ -39,7 +38,6 b' from .i18n import _'
	39	38	from .revlogutils.constants import (
	40	39	FLAG_GENERALDELTA,
	41	40	FLAG_INLINE_DATA,
	42		LIMIT_DELTA2TEXT,
	43	41	REVIDX_DEFAULT_FLAGS,
	44	42	REVIDX_ELLIPSIS,
	45	43	REVIDX_EXTSTORED,
			@@ -69,6 +67,9 b' from . import ('
	69	67	templatefilters,
	70	68	util,
	71	69	)
		70	from .revlogutils import (
		71	deltas as deltautil,
		72	)
	72	73	from .utils import (
	73	74	interfaceutil,
	74	75	stringutil,
			@@ -211,610 +212,6 b' def hash(text, p1, p2):'
	211	212	s.update(text)
	212	213	return s.digest()
	213	214
	214		class _testrevlog(object):
	215		"""minimalist fake revlog to use in doctests"""
	216
	217		def __init__(self, data, density=0.5, mingap=0):
	218		"""data is an list of revision payload boundaries"""
	219		self._data = data
	220		self._srdensitythreshold = density
	221		self._srmingapsize = mingap
	222
	223		def start(self, rev):
	224		if rev == 0:
	225		return 0
	226		return self._data[rev - 1]
	227
	228		def end(self, rev):
	229		return self._data[rev]
	230
	231		def length(self, rev):
	232		return self.end(rev) - self.start(rev)
	233
	234		def __len__(self):
	235		return len(self._data)
	236
	237		def _trimchunk(revlog, revs, startidx, endidx=None):
	238		"""returns revs[startidx:endidx] without empty trailing revs
	239
	240		Doctest Setup
	241		>>> revlog = _testrevlog([
	242		... 5, #0
	243		... 10, #1
	244		... 12, #2
	245		... 12, #3 (empty)
	246		... 17, #4
	247		... 21, #5
	248		... 21, #6 (empty)
	249		... ])
	250
	251		Contiguous cases:
	252		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
	253		[0, 1, 2, 3, 4, 5]
	254		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
	255		[0, 1, 2, 3, 4]
	256		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
	257		[0, 1, 2]
	258		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
	259		[2]
	260		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
	261		[3, 4, 5]
	262		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
	263		[3, 4]
	264
	265		Discontiguous cases:
	266		>>> _trimchunk(revlog, [1, 3, 5, 6], 0)
	267		[1, 3, 5]
	268		>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
	269		[1]
	270		>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
	271		[3, 5]
	272		>>> _trimchunk(revlog, [1, 3, 5, 6], 1)
	273		[3, 5]
	274		"""
	275		length = revlog.length
	276
	277		if endidx is None:
	278		endidx = len(revs)
	279
	280		# If we have a non-emtpy delta candidate, there are nothing to trim
	281		if revs[endidx - 1] < len(revlog):
	282		# Trim empty revs at the end, except the very first revision of a chain
	283		while (endidx > 1
	284		and endidx > startidx
	285		and length(revs[endidx - 1]) == 0):
	286		endidx -= 1
	287
	288		return revs[startidx:endidx]
	289
	290		def _segmentspan(revlog, revs, deltainfo=None):
	291		"""Get the byte span of a segment of revisions
	292
	293		revs is a sorted array of revision numbers
	294
	295		>>> revlog = _testrevlog([
	296		... 5, #0
	297		... 10, #1
	298		... 12, #2
	299		... 12, #3 (empty)
	300		... 17, #4
	301		... ])
	302
	303		>>> _segmentspan(revlog, [0, 1, 2, 3, 4])
	304		17
	305		>>> _segmentspan(revlog, [0, 4])
	306		17
	307		>>> _segmentspan(revlog, [3, 4])
	308		5
	309		>>> _segmentspan(revlog, [1, 2, 3,])
	310		7
	311		>>> _segmentspan(revlog, [1, 3])
	312		7
	313		"""
	314		if not revs:
	315		return 0
	316		if deltainfo is not None and len(revlog) <= revs[-1]:
	317		if len(revs) == 1:
	318		return deltainfo.deltalen
	319		offset = revlog.end(len(revlog) - 1)
	320		end = deltainfo.deltalen + offset
	321		else:
	322		end = revlog.end(revs[-1])
	323		return end - revlog.start(revs[0])
	324
	325		def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
	326		"""slice revs to reduce the amount of unrelated data to be read from disk.
	327
	328		``revs`` is sliced into groups that should be read in one time.
	329		Assume that revs are sorted.
	330
	331		The initial chunk is sliced until the overall density (payload/chunks-span
	332		ratio) is above `revlog._srdensitythreshold`. No gap smaller than
	333		`revlog._srmingapsize` is skipped.
	334
	335		If `targetsize` is set, no chunk larger than `targetsize` will be yield.
	336		For consistency with other slicing choice, this limit won't go lower than
	337		`revlog._srmingapsize`.
	338
	339		If individual revisions chunk are larger than this limit, they will still
	340		be raised individually.
	341
	342		>>> revlog = _testrevlog([
	343		... 5, #00 (5)
	344		... 10, #01 (5)
	345		... 12, #02 (2)
	346		... 12, #03 (empty)
	347		... 27, #04 (15)
	348		... 31, #05 (4)
	349		... 31, #06 (empty)
	350		... 42, #07 (11)
	351		... 47, #08 (5)
	352		... 47, #09 (empty)
	353		... 48, #10 (1)
	354		... 51, #11 (3)
	355		... 74, #12 (23)
	356		... 85, #13 (11)
	357		... 86, #14 (1)
	358		... 91, #15 (5)
	359		... ])
	360
	361		>>> list(_slicechunk(revlog, list(range(16))))
	362		[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
	363		>>> list(_slicechunk(revlog, [0, 15]))
	364		[[0], [15]]
	365		>>> list(_slicechunk(revlog, [0, 11, 15]))
	366		[[0], [11], [15]]
	367		>>> list(_slicechunk(revlog, [0, 11, 13, 15]))
	368		[[0], [11, 13, 15]]
	369		>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
	370		[[1, 2], [5, 8, 10, 11], [14]]
	371
	372		Slicing with a maximum chunk size
	373		>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
	374		[[0], [11], [13], [15]]
	375		>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
	376		[[0], [11], [13, 15]]
	377		"""
	378		if targetsize is not None:
	379		targetsize = max(targetsize, revlog._srmingapsize)
	380		# targetsize should not be specified when evaluating delta candidates:
	381		# * targetsize is used to ensure we stay within specification when reading,
	382		# * deltainfo is used to pick are good delta chain when writing.
	383		if not (deltainfo is None or targetsize is None):
	384		msg = 'cannot use `targetsize` with a `deltainfo`'
	385		raise error.ProgrammingError(msg)
	386		for chunk in _slicechunktodensity(revlog, revs,
	387		deltainfo,
	388		revlog._srdensitythreshold,
	389		revlog._srmingapsize):
	390		for subchunk in _slicechunktosize(revlog, chunk, targetsize):
	391		yield subchunk
	392
	393		def _slicechunktosize(revlog, revs, targetsize=None):
	394		"""slice revs to match the target size
	395
	396		This is intended to be used on chunk that density slicing selected by that
	397		are still too large compared to the read garantee of revlog. This might
	398		happens when "minimal gap size" interrupted the slicing or when chain are
	399		built in a way that create large blocks next to each other.
	400
	401		>>> revlog = _testrevlog([
	402		... 3, #0 (3)
	403		... 5, #1 (2)
	404		... 6, #2 (1)
	405		... 8, #3 (2)
	406		... 8, #4 (empty)
	407		... 11, #5 (3)
	408		... 12, #6 (1)
	409		... 13, #7 (1)
	410		... 14, #8 (1)
	411		... ])
	412
	413		Cases where chunk is already small enough
	414		>>> list(_slicechunktosize(revlog, [0], 3))
	415		[[0]]
	416		>>> list(_slicechunktosize(revlog, [6, 7], 3))
	417		[[6, 7]]
	418		>>> list(_slicechunktosize(revlog, [0], None))
	419		[[0]]
	420		>>> list(_slicechunktosize(revlog, [6, 7], None))
	421		[[6, 7]]
	422
	423		cases where we need actual slicing
	424		>>> list(_slicechunktosize(revlog, [0, 1], 3))
	425		[[0], [1]]
	426		>>> list(_slicechunktosize(revlog, [1, 3], 3))
	427		[[1], [3]]
	428		>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
	429		[[1, 2], [3]]
	430		>>> list(_slicechunktosize(revlog, [3, 5], 3))
	431		[[3], [5]]
	432		>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
	433		[[3], [5]]
	434		>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
	435		[[5], [6, 7, 8]]
	436		>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
	437		[[0], [1, 2], [3], [5], [6, 7, 8]]
	438
	439		Case with too large individual chunk (must return valid chunk)
	440		>>> list(_slicechunktosize(revlog, [0, 1], 2))
	441		[[0], [1]]
	442		>>> list(_slicechunktosize(revlog, [1, 3], 1))
	443		[[1], [3]]
	444		>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
	445		[[3], [5]]
	446		"""
	447		assert targetsize is None or 0 <= targetsize
	448		if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
	449		yield revs
	450		return
	451
	452		startrevidx = 0
	453		startdata = revlog.start(revs[0])
	454		endrevidx = 0
	455		iterrevs = enumerate(revs)
	456		next(iterrevs) # skip first rev.
	457		for idx, r in iterrevs:
	458		span = revlog.end(r) - startdata
	459		if span <= targetsize:
	460		endrevidx = idx
	461		else:
	462		chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
	463		if chunk:
	464		yield chunk
	465		startrevidx = idx
	466		startdata = revlog.start(r)
	467		endrevidx = idx
	468		yield _trimchunk(revlog, revs, startrevidx)
	469
	470		def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
	471		mingapsize=0):
	472		"""slice revs to reduce the amount of unrelated data to be read from disk.
	473
	474		``revs`` is sliced into groups that should be read in one time.
	475		Assume that revs are sorted.
	476
	477		``deltainfo`` is a _deltainfo instance of a revision that we would append
	478		to the top of the revlog.
	479
	480		The initial chunk is sliced until the overall density (payload/chunks-span
	481		ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
	482		skipped.
	483
	484		>>> revlog = _testrevlog([
	485		... 5, #00 (5)
	486		... 10, #01 (5)
	487		... 12, #02 (2)
	488		... 12, #03 (empty)
	489		... 27, #04 (15)
	490		... 31, #05 (4)
	491		... 31, #06 (empty)
	492		... 42, #07 (11)
	493		... 47, #08 (5)
	494		... 47, #09 (empty)
	495		... 48, #10 (1)
	496		... 51, #11 (3)
	497		... 74, #12 (23)
	498		... 85, #13 (11)
	499		... 86, #14 (1)
	500		... 91, #15 (5)
	501		... ])
	502
	503		>>> list(_slicechunktodensity(revlog, list(range(16))))
	504		[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
	505		>>> list(_slicechunktodensity(revlog, [0, 15]))
	506		[[0], [15]]
	507		>>> list(_slicechunktodensity(revlog, [0, 11, 15]))
	508		[[0], [11], [15]]
	509		>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
	510		[[0], [11, 13, 15]]
	511		>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
	512		[[1, 2], [5, 8, 10, 11], [14]]
	513		>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
	514		... mingapsize=20))
	515		[[1, 2, 3, 5, 8, 10, 11], [14]]
	516		>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
	517		... targetdensity=0.95))
	518		[[1, 2], [5], [8, 10, 11], [14]]
	519		>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
	520		... targetdensity=0.95, mingapsize=12))
	521		[[1, 2], [5, 8, 10, 11], [14]]
	522		"""
	523		start = revlog.start
	524		length = revlog.length
	525
	526		if len(revs) <= 1:
	527		yield revs
	528		return
	529
	530		nextrev = len(revlog)
	531		nextoffset = revlog.end(nextrev - 1)
	532
	533		if deltainfo is None:
	534		deltachainspan = _segmentspan(revlog, revs)
	535		chainpayload = sum(length(r) for r in revs)
	536		else:
	537		deltachainspan = deltainfo.distance
	538		chainpayload = deltainfo.compresseddeltalen
	539
	540		if deltachainspan < mingapsize:
	541		yield revs
	542		return
	543
	544		readdata = deltachainspan
	545
	546		if deltachainspan:
	547		density = chainpayload / float(deltachainspan)
	548		else:
	549		density = 1.0
	550
	551		if density >= targetdensity:
	552		yield revs
	553		return
	554
	555		if deltainfo is not None and deltainfo.deltalen:
	556		revs = list(revs)
	557		revs.append(nextrev)
	558
	559		# Store the gaps in a heap to have them sorted by decreasing size
	560		gapsheap = []
	561		heapq.heapify(gapsheap)
	562		prevend = None
	563		for i, rev in enumerate(revs):
	564		if rev < nextrev:
	565		revstart = start(rev)
	566		revlen = length(rev)
	567		else:
	568		revstart = nextoffset
	569		revlen = deltainfo.deltalen
	570
	571		# Skip empty revisions to form larger holes
	572		if revlen == 0:
	573		continue
	574
	575		if prevend is not None:
	576		gapsize = revstart - prevend
	577		# only consider holes that are large enough
	578		if gapsize > mingapsize:
	579		heapq.heappush(gapsheap, (-gapsize, i))
	580
	581		prevend = revstart + revlen
	582
	583		# Collect the indices of the largest holes until the density is acceptable
	584		indicesheap = []
	585		heapq.heapify(indicesheap)
	586		while gapsheap and density < targetdensity:
	587		oppgapsize, gapidx = heapq.heappop(gapsheap)
	588
	589		heapq.heappush(indicesheap, gapidx)
	590
	591		# the gap sizes are stored as negatives to be sorted decreasingly
	592		# by the heap
	593		readdata -= (-oppgapsize)
	594		if readdata > 0:
	595		density = chainpayload / float(readdata)
	596		else:
	597		density = 1.0
	598
	599		# Cut the revs at collected indices
	600		previdx = 0
	601		while indicesheap:
	602		idx = heapq.heappop(indicesheap)
	603
	604		chunk = _trimchunk(revlog, revs, previdx, idx)
	605		if chunk:
	606		yield chunk
	607
	608		previdx = idx
	609
	610		chunk = _trimchunk(revlog, revs, previdx)
	611		if chunk:
	612		yield chunk
	613
	614		@attr.s(slots=True, frozen=True)
	615		class _deltainfo(object):
	616		distance = attr.ib()
	617		deltalen = attr.ib()
	618		data = attr.ib()
	619		base = attr.ib()
	620		chainbase = attr.ib()
	621		chainlen = attr.ib()
	622		compresseddeltalen = attr.ib()
	623		snapshotdepth = attr.ib()
	624
	625		class _deltacomputer(object):
	626		def __init__(self, revlog):
	627		self.revlog = revlog
	628
	629		def _getcandidaterevs(self, p1, p2, cachedelta):
	630		"""
	631		Provides revisions that present an interest to be diffed against,
	632		grouped by level of easiness.
	633		"""
	634		revlog = self.revlog
	635		gdelta = revlog._generaldelta
	636		curr = len(revlog)
	637		prev = curr - 1
	638		p1r, p2r = revlog.rev(p1), revlog.rev(p2)
	639
	640		# should we try to build a delta?
	641		if prev != nullrev and revlog._storedeltachains:
	642		tested = set()
	643		# This condition is true most of the time when processing
	644		# changegroup data into a generaldelta repo. The only time it
	645		# isn't true is if this is the first revision in a delta chain
	646		# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
	647		if cachedelta and gdelta and revlog._lazydeltabase:
	648		# Assume what we received from the server is a good choice
	649		# build delta will reuse the cache
	650		yield (cachedelta[0],)
	651		tested.add(cachedelta[0])
	652
	653		if gdelta:
	654		# exclude already lazy tested base if any
	655		parents = [p for p in (p1r, p2r)
	656		if p != nullrev and p not in tested]
	657
	658		if not revlog._deltabothparents and len(parents) == 2:
	659		parents.sort()
	660		# To minimize the chance of having to build a fulltext,
	661		# pick first whichever parent is closest to us (max rev)
	662		yield (parents[1],)
	663		# then the other one (min rev) if the first did not fit
	664		yield (parents[0],)
	665		tested.update(parents)
	666		elif len(parents) > 0:
	667		# Test all parents (1 or 2), and keep the best candidate
	668		yield parents
	669		tested.update(parents)
	670
	671		if prev not in tested:
	672		# other approach failed try against prev to hopefully save us a
	673		# fulltext.
	674		yield (prev,)
	675		tested.add(prev)
	676
	677		def buildtext(self, revinfo, fh):
	678		"""Builds a fulltext version of a revision
	679
	680		revinfo: _revisioninfo instance that contains all needed info
	681		fh: file handle to either the .i or the .d revlog file,
	682		depending on whether it is inlined or not
	683		"""
	684		btext = revinfo.btext
	685		if btext[0] is not None:
	686		return btext[0]
	687
	688		revlog = self.revlog
	689		cachedelta = revinfo.cachedelta
	690		flags = revinfo.flags
	691		node = revinfo.node
	692
	693		baserev = cachedelta[0]
	694		delta = cachedelta[1]
	695		# special case deltas which replace entire base; no need to decode
	696		# base revision. this neatly avoids censored bases, which throw when
	697		# they're decoded.
	698		hlen = struct.calcsize(">lll")
	699		if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
	700		len(delta) - hlen):
	701		btext[0] = delta[hlen:]
	702		else:
	703		# deltabase is rawtext before changed by flag processors, which is
	704		# equivalent to non-raw text
	705		basetext = revlog.revision(baserev, _df=fh, raw=False)
	706		btext[0] = mdiff.patch(basetext, delta)
	707
	708		try:
	709		res = revlog._processflags(btext[0], flags, 'read', raw=True)
	710		btext[0], validatehash = res
	711		if validatehash:
	712		revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
	713		if flags & REVIDX_ISCENSORED:
	714		raise RevlogError(_('node %s is not censored') % node)
	715		except CensoredNodeError:
	716		# must pass the censored index flag to add censored revisions
	717		if not flags & REVIDX_ISCENSORED:
	718		raise
	719		return btext[0]
	720
	721		def _builddeltadiff(self, base, revinfo, fh):
	722		revlog = self.revlog
	723		t = self.buildtext(revinfo, fh)
	724		if revlog.iscensored(base):
	725		# deltas based on a censored revision must replace the
	726		# full content in one patch, so delta works everywhere
	727		header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
	728		delta = header + t
	729		else:
	730		ptext = revlog.revision(base, _df=fh, raw=True)
	731		delta = mdiff.textdiff(ptext, t)
	732
	733		return delta
	734
	735		def _builddeltainfo(self, revinfo, base, fh):
	736		# can we use the cached delta?
	737		if revinfo.cachedelta and revinfo.cachedelta[0] == base:
	738		delta = revinfo.cachedelta[1]
	739		else:
	740		delta = self._builddeltadiff(base, revinfo, fh)
	741		revlog = self.revlog
	742		header, data = revlog.compress(delta)
	743		deltalen = len(header) + len(data)
	744		chainbase = revlog.chainbase(base)
	745		offset = revlog.end(len(revlog) - 1)
	746		dist = deltalen + offset - revlog.start(chainbase)
	747		if revlog._generaldelta:
	748		deltabase = base
	749		else:
	750		deltabase = chainbase
	751		chainlen, compresseddeltalen = revlog._chaininfo(base)
	752		chainlen += 1
	753		compresseddeltalen += deltalen
	754
	755		revlog = self.revlog
	756		snapshotdepth = None
	757		if deltabase == nullrev:
	758		snapshotdepth = 0
	759		elif revlog._sparserevlog and revlog.issnapshot(deltabase):
	760		# A delta chain should always be one full snapshot,
	761		# zero or more semi-snapshots, and zero or more deltas
	762		p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
	763		if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
	764		snapshotdepth = len(revlog._deltachain(deltabase)[0])
	765
	766		return _deltainfo(dist, deltalen, (header, data), deltabase,
	767		chainbase, chainlen, compresseddeltalen,
	768		snapshotdepth)
	769
	770		def finddeltainfo(self, revinfo, fh):
	771		"""Find an acceptable delta against a candidate revision
	772
	773		revinfo: information about the revision (instance of _revisioninfo)
	774		fh: file handle to either the .i or the .d revlog file,
	775		depending on whether it is inlined or not
	776
	777		Returns the first acceptable candidate revision, as ordered by
	778		_getcandidaterevs
	779		"""
	780		if not revinfo.textlen:
	781		return None # empty file do not need delta
	782
	783		cachedelta = revinfo.cachedelta
	784		p1 = revinfo.p1
	785		p2 = revinfo.p2
	786		revlog = self.revlog
	787
	788		deltalength = self.revlog.length
	789		deltaparent = self.revlog.deltaparent
	790
	791		deltainfo = None
	792		deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
	793		for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
	794		# filter out delta base that will never produce good delta
	795		candidaterevs = [r for r in candidaterevs
	796		if self.revlog.length(r) <= deltas_limit]
	797		nominateddeltas = []
	798		for candidaterev in candidaterevs:
	799		# skip over empty delta (no need to include them in a chain)
	800		while candidaterev != nullrev and not deltalength(candidaterev):
	801		candidaterev = deltaparent(candidaterev)
	802		# no need to try a delta against nullid, this will be handled
	803		# by fulltext later.
	804		if candidaterev == nullrev:
	805		continue
	806		# no delta for rawtext-changing revs (see "candelta" for why)
	807		if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
	808		continue
	809		candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
	810		if revlog._isgooddeltainfo(candidatedelta, revinfo):
	811		nominateddeltas.append(candidatedelta)
	812		if nominateddeltas:
	813		deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
	814		break
	815
	816		return deltainfo
	817
	818	215	@attr.s(slots=True, frozen=True)
	819	216	class _revisioninfo(object):
	820	217	"""Information about a revision that allows building its fulltext
			@@ -2095,7 +1492,8 b' class revlog(object):'
	2095	1492	if not self._withsparseread:
	2096	1493	slicedchunks = (revs,)
	2097	1494	else:
	2098		slicedchunks = _slicechunk(self, revs, ~~targetsize~~=~~targetsize~~)
		1495	slicedchunks = deltautil.slicechunk(self, revs,
		1496	targetsize=targetsize)
	2099	1497
	2100	1498	for revschunk in slicedchunks:
	2101	1499	firstrev = revschunk[0]
			@@ -2393,7 +1791,7 b' class revlog(object):'
	2393	1791	computed by default as hash(text, p1, p2), however subclasses might
	2394	1792	use different hashing method (and override checkhash() in such case)
	2395	1793	flags - the known flags to set on the revision
	2396		deltacomputer - an optional _deltacomputer instance shared between
		1794	deltacomputer - an optional deltacomputer instance shared between
	2397	1795	multiple calls
	2398	1796	"""
	2399	1797	if link == nullrev:
			@@ -2516,95 +1914,6 b' class revlog(object):'
	2516	1914
	2517	1915	return compressor.decompress(data)
	2518	1916
	2519		def _isgooddeltainfo(self, deltainfo, revinfo):
	2520		"""Returns True if the given delta is good. Good means that it is within
	2521		the disk span, disk size, and chain length bounds that we know to be
	2522		performant."""
	2523		if deltainfo is None:
	2524		return False
	2525
	2526		# - 'deltainfo.distance' is the distance from the base revision --
	2527		# bounding it limits the amount of I/O we need to do.
	2528		# - 'deltainfo.compresseddeltalen' is the sum of the total size of
	2529		# deltas we need to apply -- bounding it limits the amount of CPU
	2530		# we consume.
	2531
	2532		if self._sparserevlog:
	2533		# As sparse-read will be used, we can consider that the distance,
	2534		# instead of being the span of the whole chunk,
	2535		# is the span of the largest read chunk
	2536		base = deltainfo.base
	2537
	2538		if base != nullrev:
	2539		deltachain = self._deltachain(base)[0]
	2540		else:
	2541		deltachain = []
	2542
	2543		# search for the first non-snapshot revision
	2544		for idx, r in enumerate(deltachain):
	2545		if not self.issnapshot(r):
	2546		break
	2547		deltachain = deltachain[idx:]
	2548		chunks = _slicechunk(self, deltachain, deltainfo)
	2549		all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
	2550		distance = max(all_span)
	2551		else:
	2552		distance = deltainfo.distance
	2553
	2554		textlen = revinfo.textlen
	2555		defaultmax = textlen * 4
	2556		maxdist = self._maxdeltachainspan
	2557		if not maxdist:
	2558		maxdist = distance # ensure the conditional pass
	2559		maxdist = max(maxdist, defaultmax)
	2560		if self._sparserevlog and maxdist < self._srmingapsize:
	2561		# In multiple place, we are ignoring irrelevant data range below a
	2562		# certain size. Be also apply this tradeoff here and relax span
	2563		# constraint for small enought content.
	2564		maxdist = self._srmingapsize
	2565
	2566		# Bad delta from read span:
	2567		#
	2568		# If the span of data read is larger than the maximum allowed.
	2569		if maxdist < distance:
	2570		return False
	2571
	2572		# Bad delta from new delta size:
	2573		#
	2574		# If the delta size is larger than the target text, storing the
	2575		# delta will be inefficient.
	2576		if textlen < deltainfo.deltalen:
	2577		return False
	2578
	2579		# Bad delta from cumulated payload size:
	2580		#
	2581		# If the sum of delta get larger than K * target text length.
	2582		if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
	2583		return False
	2584
	2585		# Bad delta from chain length:
	2586		#
	2587		# If the number of delta in the chain gets too high.
	2588		if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
	2589		return False
	2590
	2591		# bad delta from intermediate snapshot size limit
	2592		#
	2593		# If an intermediate snapshot size is higher than the limit. The
	2594		# limit exist to prevent endless chain of intermediate delta to be
	2595		# created.
	2596		if (deltainfo.snapshotdepth is not None and
	2597		(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
	2598		return False
	2599
	2600		# bad delta if new intermediate snapshot is larger than the previous
	2601		# snapshot
	2602		if (deltainfo.snapshotdepth
	2603		and self.length(deltainfo.base) < deltainfo.deltalen):
	2604		return False
	2605
	2606		return True
	2607
	2608	1917	def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
	2609	1918	cachedelta, ifh, dfh, alwayscache=False,
	2610	1919	deltacomputer=None):
			@@ -2652,7 +1961,7 b' class revlog(object):'
	2652	1961	textlen = len(rawtext)
	2653	1962
	2654	1963	if deltacomputer is None:
	2655		deltacomputer = _deltacomputer(self)
		1964	deltacomputer = deltautil.deltacomputer(self)
	2656	1965
	2657	1966	revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
	2658	1967
			@@ -2754,7 +2063,7 b' class revlog(object):'
	2754	2063	dfh.flush()
	2755	2064	ifh.flush()
	2756	2065	try:
	2757		deltacomputer = _deltacomputer(self)
		2066	deltacomputer = deltautil.deltacomputer(self)
	2758	2067	# loop through our set of deltas
	2759	2068	for data in deltas:
	2760	2069	node, p1, p2, linknode, deltabase, delta, flags = data
			@@ -3127,7 +2436,7 b' class revlog(object):'
	3127	2436	populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
	3128	2437	self.DELTAREUSESAMEREVS)
	3129	2438
	3130		deltacomputer = _deltacomputer(destrevlog)
		2439	deltacomputer = deltautil.deltacomputer(destrevlog)
	3131	2440	index = self.index
	3132	2441	for rev in self:
	3133	2442	entry = index[rev]

mercurial/revlogutils/constants.py

0 0 -3

              REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
              # bitmark for flags that could cause rawdata content change
              REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
-             # maximum <delta-chain-data>/<revision-text-length> ratio
-             LIMIT_DELTA2TEXT = 2

mercurial/revlogutils/deltas.py mercurial/revlog.py

0 copied +204 -2650

This diff has been collapsed as it changes many lines, (2854 lines changed) Show them Hide them
			@@ -1,215 +1,42 b''
	1		# revlog.py - storage back-end for mercurial
		1	# revlogdeltas.py - Logic around delta computation for revlog
	2	2	#
	3	3	# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
		4	# Copyright 2018 Octobus <contact@octobus.net>
	4	5	#
	5	6	# This software may be used and distributed according to the terms of the
	6	7	# GNU General Public License version 2 or any later version.
	7
	8		"""Storage back-end for Mercurial.
	9
	10		This provides efficient delta storage with O(1) retrieve and append
	11		and O(changes) merge between branches.
	12		"""
		8	"""Helper class to compute deltas stored inside revlogs"""
	13	9
	14	10	from __future__ import absolute_import
	15	11
	16		import collections
	17		import contextlib
	18		import errno
	19		import hashlib
	20	12	import heapq
	21		import os
	22		import re
	23	13	import struct
	24		import zlib
	25	14
	26	15	# import stuff from node for others to import from revlog
	27		from .node import (
	28		bin,
	29		hex,
	30		nullhex,
	31		nullid,
		16	from ..node import (
	32	17	nullrev,
	33		wdirfilenodeids,
	34		wdirhex,
	35		wdirid,
	36		wdirrev,
	37	18	)
	38		from .i18n import _
	39		from .revlogutils.constants import (
	40		FLAG_GENERALDELTA,
	41		FLAG_INLINE_DATA,
	42		LIMIT_DELTA2TEXT,
	43		REVIDX_DEFAULT_FLAGS,
	44		REVIDX_ELLIPSIS,
	45		REVIDX_EXTSTORED,
	46		REVIDX_FLAGS_ORDER,
		19	from ..i18n import _
		20
		21	from .constants import (
	47	22	REVIDX_ISCENSORED,
	48		REVIDX_KNOWN_FLAGS,
	49	23	REVIDX_RAWTEXT_CHANGING_FLAGS,
	50		REVLOGV0,
	51		REVLOGV1,
	52		REVLOGV1_FLAGS,
	53		REVLOGV2,
	54		REVLOGV2_FLAGS,
	55		REVLOG_DEFAULT_FLAGS,
	56		REVLOG_DEFAULT_FORMAT,
	57		REVLOG_DEFAULT_VERSION,
	58	24	)
	59		from .thirdparty import (
		25
		26	from ..thirdparty import (
	60	27	attr,
	61	28	)
	62		from . import (
	63		ancestor,
		29
		30	from .. import (
	64	31	error,
	65	32	mdiff,
	66		policy,
	67		pycompat,
	68		repository,
	69		templatefilters,
	70		util,
	71	33	)
	72		from .utils import (
	73		interfaceutil,
	74		stringutil,
	75		)
	76
	77		# blanked usage of all the name to prevent pyflakes constraints
	78		# We need these name available in the module for extensions.
	79		REVLOGV0
	80		REVLOGV1
	81		REVLOGV2
	82		FLAG_INLINE_DATA
	83		FLAG_GENERALDELTA
	84		REVLOG_DEFAULT_FLAGS
	85		REVLOG_DEFAULT_FORMAT
	86		REVLOG_DEFAULT_VERSION
	87		REVLOGV1_FLAGS
	88		REVLOGV2_FLAGS
	89		REVIDX_ISCENSORED
	90		REVIDX_ELLIPSIS
	91		REVIDX_EXTSTORED
	92		REVIDX_DEFAULT_FLAGS
	93		REVIDX_FLAGS_ORDER
	94		REVIDX_KNOWN_FLAGS
	95		REVIDX_RAWTEXT_CHANGING_FLAGS
	96
	97		parsers = policy.importmod(r'parsers')
	98
	99		# Aliased for performance.
	100		_zlibdecompress = zlib.decompress
	101
	102		# max size of revlog with inline data
	103		_maxinline = 131072
	104		_chunksize = 1048576
	105	34
	106	35	RevlogError = error.RevlogError
	107		LookupError = error.LookupError
	108		AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
	109	36	CensoredNodeError = error.CensoredNodeError
	110		ProgrammingError = error.ProgrammingError
	111
	112		# Store flag processors (cf. 'addflagprocessor()' to register)
	113		_flagprocessors = {
	114		REVIDX_ISCENSORED: None,
	115		}
	116
	117		_mdre = re.compile('\1\n')
	118		def parsemeta(text):
	119		"""return (metadatadict, metadatasize)"""
	120		# text can be buffer, so we can't use .startswith or .index
	121		if text[:2] != '\1\n':
	122		return None, None
	123		s = _mdre.search(text, 2).start()
	124		mtext = text[2:s]
	125		meta = {}
	126		for l in mtext.splitlines():
	127		k, v = l.split(": ", 1)
	128		meta[k] = v
	129		return meta, (s + 2)
	130
	131		def packmeta(meta, text):
	132		keys = sorted(meta)
	133		metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
	134		return "\1\n%s\1\n%s" % (metatext, text)
	135
	136		def _censoredtext(text):
	137		m, offs = parsemeta(text)
	138		return m and "censored" in m
	139
	140		def addflagprocessor(flag, processor):
	141		"""Register a flag processor on a revision data flag.
	142	37
	143		Invariant:
	144		- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
	145		and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
	146		- Only one flag processor can be registered on a specific flag.
	147		- flagprocessors must be 3-tuples of functions (read, write, raw) with the
	148		following signatures:
	149		- (read) f(self, rawtext) -> text, bool
	150		- (write) f(self, text) -> rawtext, bool
	151		- (raw) f(self, rawtext) -> bool
	152		"text" is presented to the user. "rawtext" is stored in revlog data, not
	153		directly visible to the user.
	154		The boolean returned by these transforms is used to determine whether
	155		the returned text can be used for hash integrity checking. For example,
	156		if "write" returns False, then "text" is used to generate hash. If
	157		"write" returns True, that basically means "rawtext" returned by "write"
	158		should be used to generate hash. Usually, "write" and "read" return
	159		different booleans. And "raw" returns a same boolean as "write".
	160
	161		Note: The 'raw' transform is used for changegroup generation and in some
	162		debug commands. In this case the transform only indicates whether the
	163		contents can be used for hash integrity checks.
	164		"""
	165		if not flag & REVIDX_KNOWN_FLAGS:
	166		msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
	167		raise ProgrammingError(msg)
	168		if flag not in REVIDX_FLAGS_ORDER:
	169		msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
	170		raise ProgrammingError(msg)
	171		if flag in _flagprocessors:
	172		msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
	173		raise error.Abort(msg)
	174		_flagprocessors[flag] = processor
	175
	176		def getoffset(q):
	177		return int(q >> 16)
	178
	179		def gettype(q):
	180		return int(q & 0xFFFF)
	181
	182		def offset_type(offset, type):
	183		if (type & ~REVIDX_KNOWN_FLAGS) != 0:
	184		raise ValueError('unknown revlog index flags')
	185		return int(int(offset) << 16 \| type)
	186
	187		_nullhash = hashlib.sha1(nullid)
	188
	189		def hash(text, p1, p2):
	190		"""generate a hash from the given text and its parent hashes
	191
	192		This hash combines both the current file contents and its history
	193		in a manner that makes it easy to distinguish nodes with the same
	194		content in the revision graph.
	195		"""
	196		# As of now, if one of the parent node is null, p2 is null
	197		if p2 == nullid:
	198		# deep copy of a hash is faster than creating one
	199		s = _nullhash.copy()
	200		s.update(p1)
	201		else:
	202		# none of the parent nodes are nullid
	203		if p1 < p2:
	204		a = p1
	205		b = p2
	206		else:
	207		a = p2
	208		b = p1
	209		s = hashlib.sha1(a)
	210		s.update(b)
	211		s.update(text)
	212		return s.digest()
		38	# maximum <delta-chain-data>/<revision-text-length> ratio
		39	LIMIT_DELTA2TEXT = 2
	213	40
	214	41	class _testrevlog(object):
	215	42	"""minimalist fake revlog to use in doctests"""
			@@ -234,95 +61,7 b' class _testrevlog(object):'
	234	61	def __len__(self):
	235	62	return len(self._data)
	236	63
	237		def ~~_trim~~chunk(revlog, revs, ~~startidx~~, ~~endidx~~=None):
	238		"""returns revs[startidx:endidx] without empty trailing revs
	239
	240		Doctest Setup
	241		>>> revlog = _testrevlog([
	242		... 5, #0
	243		... 10, #1
	244		... 12, #2
	245		... 12, #3 (empty)
	246		... 17, #4
	247		... 21, #5
	248		... 21, #6 (empty)
	249		... ])
	250
	251		Contiguous cases:
	252		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
	253		[0, 1, 2, 3, 4, 5]
	254		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
	255		[0, 1, 2, 3, 4]
	256		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
	257		[0, 1, 2]
	258		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
	259		[2]
	260		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
	261		[3, 4, 5]
	262		>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
	263		[3, 4]
	264
	265		Discontiguous cases:
	266		>>> _trimchunk(revlog, [1, 3, 5, 6], 0)
	267		[1, 3, 5]
	268		>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
	269		[1]
	270		>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
	271		[3, 5]
	272		>>> _trimchunk(revlog, [1, 3, 5, 6], 1)
	273		[3, 5]
	274		"""
	275		length = revlog.length
	276
	277		if endidx is None:
	278		endidx = len(revs)
	279
	280		# If we have a non-emtpy delta candidate, there are nothing to trim
	281		if revs[endidx - 1] < len(revlog):
	282		# Trim empty revs at the end, except the very first revision of a chain
	283		while (endidx > 1
	284		and endidx > startidx
	285		and length(revs[endidx - 1]) == 0):
	286		endidx -= 1
	287
	288		return revs[startidx:endidx]
	289
	290		def _segmentspan(revlog, revs, deltainfo=None):
	291		"""Get the byte span of a segment of revisions
	292
	293		revs is a sorted array of revision numbers
	294
	295		>>> revlog = _testrevlog([
	296		... 5, #0
	297		... 10, #1
	298		... 12, #2
	299		... 12, #3 (empty)
	300		... 17, #4
	301		... ])
	302
	303		>>> _segmentspan(revlog, [0, 1, 2, 3, 4])
	304		17
	305		>>> _segmentspan(revlog, [0, 4])
	306		17
	307		>>> _segmentspan(revlog, [3, 4])
	308		5
	309		>>> _segmentspan(revlog, [1, 2, 3,])
	310		7
	311		>>> _segmentspan(revlog, [1, 3])
	312		7
	313		"""
	314		if not revs:
	315		return 0
	316		if deltainfo is not None and len(revlog) <= revs[-1]:
	317		if len(revs) == 1:
	318		return deltainfo.deltalen
	319		offset = revlog.end(len(revlog) - 1)
	320		end = deltainfo.deltalen + offset
	321		else:
	322		end = revlog.end(revs[-1])
	323		return end - revlog.start(revs[0])
	324
	325		def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
		64	def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
	326	65	"""slice revs to reduce the amount of unrelated data to be read from disk.
	327	66
	328	67	``revs`` is sliced into groups that should be read in one time.
			@@ -358,21 +97,21 b' def _slicechunk(revlog, revs, deltainfo='
	358	97	... 91, #15 (5)
	359	98	... ])
	360	99
	361		>>> list(_slicechunk(revlog, list(range(16))))
		100	>>> list(slicechunk(revlog, list(range(16))))
	362	101	[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
	363		>>> list(_slicechunk(revlog, [0, 15]))
		102	>>> list(slicechunk(revlog, [0, 15]))
	364	103	[[0], [15]]
	365		>>> list(_slicechunk(revlog, [0, 11, 15]))
		104	>>> list(slicechunk(revlog, [0, 11, 15]))
	366	105	[[0], [11], [15]]
	367		>>> list(_slicechunk(revlog, [0, 11, 13, 15]))
		106	>>> list(slicechunk(revlog, [0, 11, 13, 15]))
	368	107	[[0], [11, 13, 15]]
	369		>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
		108	>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
	370	109	[[1, 2], [5, 8, 10, 11], [14]]
	371	110
	372	111	Slicing with a maximum chunk size
	373		>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
		112	>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
	374	113	[[0], [11], [13], [15]]
	375		>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
		114	>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
	376	115	[[0], [11], [13, 15]]
	377	116	"""
	378	117	if targetsize is not None:
			@@ -445,7 +184,7 b' def _slicechunktosize(revlog, revs, targ'
	445	184	[[3], [5]]
	446	185	"""
	447	186	assert targetsize is None or 0 <= targetsize
	448		if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
		187	if targetsize is None or segmentspan(revlog, revs) <= targetsize:
	449	188	yield revs
	450	189	return
	451	190
			@@ -531,7 +270,7 b' def _slicechunktodensity(revlog, revs, d'
	531	270	nextoffset = revlog.end(nextrev - 1)
	532	271
	533	272	if deltainfo is None:
	534		deltachainspan = _segmentspan(revlog, revs)
		273	deltachainspan = segmentspan(revlog, revs)
	535	274	chainpayload = sum(length(r) for r in revs)
	536	275	else:
	537	276	deltachainspan = deltainfo.distance
			@@ -611,6 +350,94 b' def _slicechunktodensity(revlog, revs, d'
	611	350	if chunk:
	612	351	yield chunk
	613	352
		353	def _trimchunk(revlog, revs, startidx, endidx=None):
		354	"""returns revs[startidx:endidx] without empty trailing revs
		355
		356	Doctest Setup
		357	>>> revlog = _testrevlog([
		358	... 5, #0
		359	... 10, #1
		360	... 12, #2
		361	... 12, #3 (empty)
		362	... 17, #4
		363	... 21, #5
		364	... 21, #6 (empty)
		365	... ])
		366
		367	Contiguous cases:
		368	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
		369	[0, 1, 2, 3, 4, 5]
		370	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
		371	[0, 1, 2, 3, 4]
		372	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
		373	[0, 1, 2]
		374	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
		375	[2]
		376	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
		377	[3, 4, 5]
		378	>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
		379	[3, 4]
		380
		381	Discontiguous cases:
		382	>>> _trimchunk(revlog, [1, 3, 5, 6], 0)
		383	[1, 3, 5]
		384	>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
		385	[1]
		386	>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
		387	[3, 5]
		388	>>> _trimchunk(revlog, [1, 3, 5, 6], 1)
		389	[3, 5]
		390	"""
		391	length = revlog.length
		392
		393	if endidx is None:
		394	endidx = len(revs)
		395
		396	# If we have a non-emtpy delta candidate, there are nothing to trim
		397	if revs[endidx - 1] < len(revlog):
		398	# Trim empty revs at the end, except the very first revision of a chain
		399	while (endidx > 1
		400	and endidx > startidx
		401	and length(revs[endidx - 1]) == 0):
		402	endidx -= 1
		403
		404	return revs[startidx:endidx]
		405
		406	def segmentspan(revlog, revs, deltainfo=None):
		407	"""Get the byte span of a segment of revisions
		408
		409	revs is a sorted array of revision numbers
		410
		411	>>> revlog = _testrevlog([
		412	... 5, #0
		413	... 10, #1
		414	... 12, #2
		415	... 12, #3 (empty)
		416	... 17, #4
		417	... ])
		418
		419	>>> segmentspan(revlog, [0, 1, 2, 3, 4])
		420	17
		421	>>> segmentspan(revlog, [0, 4])
		422	17
		423	>>> segmentspan(revlog, [3, 4])
		424	5
		425	>>> segmentspan(revlog, [1, 2, 3,])
		426	7
		427	>>> segmentspan(revlog, [1, 3])
		428	7
		429	"""
		430	if not revs:
		431	return 0
		432	if deltainfo is not None and len(revlog) <= revs[-1]:
		433	if len(revs) == 1:
		434	return deltainfo.deltalen
		435	offset = revlog.end(len(revlog) - 1)
		436	end = deltainfo.deltalen + offset
		437	else:
		438	end = revlog.end(revs[-1])
		439	return end - revlog.start(revs[0])
		440
	614	441	@attr.s(slots=True, frozen=True)
	615	442	class _deltainfo(object):
	616	443	distance = attr.ib()
			@@ -622,7 +449,98 b' class _deltainfo(object):'
	622	449	compresseddeltalen = attr.ib()
	623	450	snapshotdepth = attr.ib()
	624	451
	625		class _deltacomputer(object):
		452	def isgooddeltainfo(revlog, deltainfo, revinfo):
		453	"""Returns True if the given delta is good. Good means that it is within
		454	the disk span, disk size, and chain length bounds that we know to be
		455	performant."""
		456	if deltainfo is None:
		457	return False
		458
		459	# - 'deltainfo.distance' is the distance from the base revision --
		460	# bounding it limits the amount of I/O we need to do.
		461	# - 'deltainfo.compresseddeltalen' is the sum of the total size of
		462	# deltas we need to apply -- bounding it limits the amount of CPU
		463	# we consume.
		464
		465	if revlog._sparserevlog:
		466	# As sparse-read will be used, we can consider that the distance,
		467	# instead of being the span of the whole chunk,
		468	# is the span of the largest read chunk
		469	base = deltainfo.base
		470
		471	if base != nullrev:
		472	deltachain = revlog._deltachain(base)[0]
		473	else:
		474	deltachain = []
		475
		476	# search for the first non-snapshot revision
		477	for idx, r in enumerate(deltachain):
		478	if not revlog.issnapshot(r):
		479	break
		480	deltachain = deltachain[idx:]
		481	chunks = slicechunk(revlog, deltachain, deltainfo)
		482	all_span = [segmentspan(revlog, revs, deltainfo)
		483	for revs in chunks]
		484	distance = max(all_span)
		485	else:
		486	distance = deltainfo.distance
		487
		488	textlen = revinfo.textlen
		489	defaultmax = textlen * 4
		490	maxdist = revlog._maxdeltachainspan
		491	if not maxdist:
		492	maxdist = distance # ensure the conditional pass
		493	maxdist = max(maxdist, defaultmax)
		494	if revlog._sparserevlog and maxdist < revlog._srmingapsize:
		495	# In multiple place, we are ignoring irrelevant data range below a
		496	# certain size. Be also apply this tradeoff here and relax span
		497	# constraint for small enought content.
		498	maxdist = revlog._srmingapsize
		499
		500	# Bad delta from read span:
		501	#
		502	# If the span of data read is larger than the maximum allowed.
		503	if maxdist < distance:
		504	return False
		505
		506	# Bad delta from new delta size:
		507	#
		508	# If the delta size is larger than the target text, storing the
		509	# delta will be inefficient.
		510	if textlen < deltainfo.deltalen:
		511	return False
		512
		513	# Bad delta from cumulated payload size:
		514	#
		515	# If the sum of delta get larger than K * target text length.
		516	if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
		517	return False
		518
		519	# Bad delta from chain length:
		520	#
		521	# If the number of delta in the chain gets too high.
		522	if (revlog._maxchainlen
		523	and revlog._maxchainlen < deltainfo.chainlen):
		524	return False
		525
		526	# bad delta from intermediate snapshot size limit
		527	#
		528	# If an intermediate snapshot size is higher than the limit. The
		529	# limit exist to prevent endless chain of intermediate delta to be
		530	# created.
		531	if (deltainfo.snapshotdepth is not None and
		532	(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
		533	return False
		534
		535	# bad delta if new intermediate snapshot is larger than the previous
		536	# snapshot
		537	if (deltainfo.snapshotdepth
		538	and revlog.length(deltainfo.base) < deltainfo.deltalen):
		539	return False
		540
		541	return True
		542
		543	class deltacomputer(object):
	626	544	def __init__(self, revlog):
	627	545	self.revlog = revlog
	628	546
			@@ -807,2374 +725,10 b' class _deltacomputer(object):'
	807	725	if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
	808	726	continue
	809	727	candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
	810		if revlog.~~_isgooddeltainfo~~(candidatedelta, revinfo):
		728	if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
	811	729	nominateddeltas.append(candidatedelta)
	812	730	if nominateddeltas:
	813	731	deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
	814	732	break
	815	733
	816	734	return deltainfo
	817
	818		@attr.s(slots=True, frozen=True)
	819		class _revisioninfo(object):
	820		"""Information about a revision that allows building its fulltext
	821		node: expected hash of the revision
	822		p1, p2: parent revs of the revision
	823		btext: built text cache consisting of a one-element list
	824		cachedelta: (baserev, uncompressed_delta) or None
	825		flags: flags associated to the revision storage
	826
	827		One of btext[0] or cachedelta must be set.
	828		"""
	829		node = attr.ib()
	830		p1 = attr.ib()
	831		p2 = attr.ib()
	832		btext = attr.ib()
	833		textlen = attr.ib()
	834		cachedelta = attr.ib()
	835		flags = attr.ib()
	836
	837		@interfaceutil.implementer(repository.irevisiondelta)
	838		@attr.s(slots=True, frozen=True)
	839		class revlogrevisiondelta(object):
	840		node = attr.ib()
	841		p1node = attr.ib()
	842		p2node = attr.ib()
	843		basenode = attr.ib()
	844		linknode = attr.ib()
	845		flags = attr.ib()
	846		baserevisionsize = attr.ib()
	847		revision = attr.ib()
	848		delta = attr.ib()
	849
	850		# index v0:
	851		# 4 bytes: offset
	852		# 4 bytes: compressed length
	853		# 4 bytes: base rev
	854		# 4 bytes: link rev
	855		# 20 bytes: parent 1 nodeid
	856		# 20 bytes: parent 2 nodeid
	857		# 20 bytes: nodeid
	858		indexformatv0 = struct.Struct(">4l20s20s20s")
	859		indexformatv0_pack = indexformatv0.pack
	860		indexformatv0_unpack = indexformatv0.unpack
	861
	862		class revlogoldindex(list):
	863		def __getitem__(self, i):
	864		if i == -1:
	865		return (0, 0, 0, -1, -1, -1, -1, nullid)
	866		return list.__getitem__(self, i)
	867
	868		class revlogoldio(object):
	869		def __init__(self):
	870		self.size = indexformatv0.size
	871
	872		def parseindex(self, data, inline):
	873		s = self.size
	874		index = []
	875		nodemap = {nullid: nullrev}
	876		n = off = 0
	877		l = len(data)
	878		while off + s <= l:
	879		cur = data[off:off + s]
	880		off += s
	881		e = indexformatv0_unpack(cur)
	882		# transform to revlogv1 format
	883		e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
	884		nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
	885		index.append(e2)
	886		nodemap[e[6]] = n
	887		n += 1
	888
	889		return revlogoldindex(index), nodemap, None
	890
	891		def packentry(self, entry, node, version, rev):
	892		if gettype(entry[0]):
	893		raise RevlogError(_('index entry flags need revlog version 1'))
	894		e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
	895		node(entry[5]), node(entry[6]), entry[7])
	896		return indexformatv0_pack(*e2)
	897
	898		# index ng:
	899		# 6 bytes: offset
	900		# 2 bytes: flags
	901		# 4 bytes: compressed length
	902		# 4 bytes: uncompressed length
	903		# 4 bytes: base rev
	904		# 4 bytes: link rev
	905		# 4 bytes: parent 1 rev
	906		# 4 bytes: parent 2 rev
	907		# 32 bytes: nodeid
	908		indexformatng = struct.Struct(">Qiiiiii20s12x")
	909		indexformatng_pack = indexformatng.pack
	910		versionformat = struct.Struct(">I")
	911		versionformat_pack = versionformat.pack
	912		versionformat_unpack = versionformat.unpack
	913
	914		# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
	915		# signed integer)
	916		_maxentrysize = 0x7fffffff
	917
	918		class revlogio(object):
	919		def __init__(self):
	920		self.size = indexformatng.size
	921
	922		def parseindex(self, data, inline):
	923		# call the C implementation to parse the index data
	924		index, cache = parsers.parse_index2(data, inline)
	925		return index, getattr(index, 'nodemap', None), cache
	926
	927		def packentry(self, entry, node, version, rev):
	928		p = indexformatng_pack(*entry)
	929		if rev == 0:
	930		p = versionformat_pack(version) + p[4:]
	931		return p
	932
	933		class revlog(object):
	934		"""
	935		the underlying revision storage object
	936
	937		A revlog consists of two parts, an index and the revision data.
	938
	939		The index is a file with a fixed record size containing
	940		information on each revision, including its nodeid (hash), the
	941		nodeids of its parents, the position and offset of its data within
	942		the data file, and the revision it's based on. Finally, each entry
	943		contains a linkrev entry that can serve as a pointer to external
	944		data.
	945
	946		The revision data itself is a linear collection of data chunks.
	947		Each chunk represents a revision and is usually represented as a
	948		delta against the previous chunk. To bound lookup time, runs of
	949		deltas are limited to about 2 times the length of the original
	950		version data. This makes retrieval of a version proportional to
	951		its size, or O(1) relative to the number of revisions.
	952
	953		Both pieces of the revlog are written to in an append-only
	954		fashion, which means we never need to rewrite a file to insert or
	955		remove data, and can use some simple techniques to avoid the need
	956		for locking while reading.
	957
	958		If checkambig, indexfile is opened with checkambig=True at
	959		writing, to avoid file stat ambiguity.
	960
	961		If mmaplargeindex is True, and an mmapindexthreshold is set, the
	962		index will be mmapped rather than read if it is larger than the
	963		configured threshold.
	964
	965		If censorable is True, the revlog can have censored revisions.
	966		"""
	967		def __init__(self, opener, indexfile, datafile=None, checkambig=False,
	968		mmaplargeindex=False, censorable=False):
	969		"""
	970		create a revlog object
	971
	972		opener is a function that abstracts the file opening operation
	973		and can be used to implement COW semantics or the like.
	974		"""
	975		self.indexfile = indexfile
	976		self.datafile = datafile or (indexfile[:-2] + ".d")
	977		self.opener = opener
	978		# When True, indexfile is opened with checkambig=True at writing, to
	979		# avoid file stat ambiguity.
	980		self._checkambig = checkambig
	981		self._censorable = censorable
	982		# 3-tuple of (node, rev, text) for a raw revision.
	983		self._cache = None
	984		# Maps rev to chain base rev.
	985		self._chainbasecache = util.lrucachedict(100)
	986		# 2-tuple of (offset, data) of raw data from the revlog at an offset.
	987		self._chunkcache = (0, '')
	988		# How much data to read and cache into the raw revlog data cache.
	989		self._chunkcachesize = 65536
	990		self._maxchainlen = None
	991		self._deltabothparents = True
	992		self.index = []
	993		# Mapping of partial identifiers to full nodes.
	994		self._pcache = {}
	995		# Mapping of revision integer to full node.
	996		self._nodecache = {nullid: nullrev}
	997		self._nodepos = None
	998		self._compengine = 'zlib'
	999		self._maxdeltachainspan = -1
	1000		self._withsparseread = False
	1001		self._sparserevlog = False
	1002		self._srdensitythreshold = 0.50
	1003		self._srmingapsize = 262144
	1004
	1005		mmapindexthreshold = None
	1006		v = REVLOG_DEFAULT_VERSION
	1007		opts = getattr(opener, 'options', None)
	1008		if opts is not None:
	1009		if 'revlogv2' in opts:
	1010		# version 2 revlogs always use generaldelta.
	1011		v = REVLOGV2 \| FLAG_GENERALDELTA \| FLAG_INLINE_DATA
	1012		elif 'revlogv1' in opts:
	1013		if 'generaldelta' in opts:
	1014		v \|= FLAG_GENERALDELTA
	1015		else:
	1016		v = 0
	1017		if 'chunkcachesize' in opts:
	1018		self._chunkcachesize = opts['chunkcachesize']
	1019		if 'maxchainlen' in opts:
	1020		self._maxchainlen = opts['maxchainlen']
	1021		if 'deltabothparents' in opts:
	1022		self._deltabothparents = opts['deltabothparents']
	1023		self._lazydeltabase = bool(opts.get('lazydeltabase', False))
	1024		if 'compengine' in opts:
	1025		self._compengine = opts['compengine']
	1026		if 'maxdeltachainspan' in opts:
	1027		self._maxdeltachainspan = opts['maxdeltachainspan']
	1028		if mmaplargeindex and 'mmapindexthreshold' in opts:
	1029		mmapindexthreshold = opts['mmapindexthreshold']
	1030		self._sparserevlog = bool(opts.get('sparse-revlog', False))
	1031		withsparseread = bool(opts.get('with-sparse-read', False))
	1032		# sparse-revlog forces sparse-read
	1033		self._withsparseread = self._sparserevlog or withsparseread
	1034		if 'sparse-read-density-threshold' in opts:
	1035		self._srdensitythreshold = opts['sparse-read-density-threshold']
	1036		if 'sparse-read-min-gap-size' in opts:
	1037		self._srmingapsize = opts['sparse-read-min-gap-size']
	1038
	1039		if self._chunkcachesize <= 0:
	1040		raise RevlogError(_('revlog chunk cache size %r is not greater '
	1041		'than 0') % self._chunkcachesize)
	1042		elif self._chunkcachesize & (self._chunkcachesize - 1):
	1043		raise RevlogError(_('revlog chunk cache size %r is not a power '
	1044		'of 2') % self._chunkcachesize)
	1045
	1046		indexdata = ''
	1047		self._initempty = True
	1048		try:
	1049		with self._indexfp() as f:
	1050		if (mmapindexthreshold is not None and
	1051		self.opener.fstat(f).st_size >= mmapindexthreshold):
	1052		indexdata = util.buffer(util.mmapread(f))
	1053		else:
	1054		indexdata = f.read()
	1055		if len(indexdata) > 0:
	1056		v = versionformat_unpack(indexdata[:4])[0]
	1057		self._initempty = False
	1058		except IOError as inst:
	1059		if inst.errno != errno.ENOENT:
	1060		raise
	1061
	1062		self.version = v
	1063		self._inline = v & FLAG_INLINE_DATA
	1064		self._generaldelta = v & FLAG_GENERALDELTA
	1065		flags = v & ~0xFFFF
	1066		fmt = v & 0xFFFF
	1067		if fmt == REVLOGV0:
	1068		if flags:
	1069		raise RevlogError(_('unknown flags (%#04x) in version %d '
	1070		'revlog %s') %
	1071		(flags >> 16, fmt, self.indexfile))
	1072		elif fmt == REVLOGV1:
	1073		if flags & ~REVLOGV1_FLAGS:
	1074		raise RevlogError(_('unknown flags (%#04x) in version %d '
	1075		'revlog %s') %
	1076		(flags >> 16, fmt, self.indexfile))
	1077		elif fmt == REVLOGV2:
	1078		if flags & ~REVLOGV2_FLAGS:
	1079		raise RevlogError(_('unknown flags (%#04x) in version %d '
	1080		'revlog %s') %
	1081		(flags >> 16, fmt, self.indexfile))
	1082		else:
	1083		raise RevlogError(_('unknown version (%d) in revlog %s') %
	1084		(fmt, self.indexfile))
	1085
	1086		self._storedeltachains = True
	1087
	1088		self._io = revlogio()
	1089		if self.version == REVLOGV0:
	1090		self._io = revlogoldio()
	1091		try:
	1092		d = self._io.parseindex(indexdata, self._inline)
	1093		except (ValueError, IndexError):
	1094		raise RevlogError(_("index %s is corrupted") % (self.indexfile))
	1095		self.index, nodemap, self._chunkcache = d
	1096		if nodemap is not None:
	1097		self.nodemap = self._nodecache = nodemap
	1098		if not self._chunkcache:
	1099		self._chunkclear()
	1100		# revnum -> (chain-length, sum-delta-length)
	1101		self._chaininfocache = {}
	1102		# revlog header -> revlog compressor
	1103		self._decompressors = {}
	1104
	1105		@util.propertycache
	1106		def _compressor(self):
	1107		return util.compengines[self._compengine].revlogcompressor()
	1108
	1109		def _indexfp(self, mode='r'):
	1110		"""file object for the revlog's index file"""
	1111		args = {r'mode': mode}
	1112		if mode != 'r':
	1113		args[r'checkambig'] = self._checkambig
	1114		if mode == 'w':
	1115		args[r'atomictemp'] = True
	1116		return self.opener(self.indexfile, **args)
	1117
	1118		def _datafp(self, mode='r'):
	1119		"""file object for the revlog's data file"""
	1120		return self.opener(self.datafile, mode=mode)
	1121
	1122		@contextlib.contextmanager
	1123		def _datareadfp(self, existingfp=None):
	1124		"""file object suitable to read data"""
	1125		if existingfp is not None:
	1126		yield existingfp
	1127		else:
	1128		if self._inline:
	1129		func = self._indexfp
	1130		else:
	1131		func = self._datafp
	1132		with func() as fp:
	1133		yield fp
	1134
	1135		def tip(self):
	1136		return self.node(len(self.index) - 1)
	1137		def __contains__(self, rev):
	1138		return 0 <= rev < len(self)
	1139		def __len__(self):
	1140		return len(self.index)
	1141		def __iter__(self):
	1142		return iter(pycompat.xrange(len(self)))
	1143		def revs(self, start=0, stop=None):
	1144		"""iterate over all rev in this revlog (from start to stop)"""
	1145		step = 1
	1146		length = len(self)
	1147		if stop is not None:
	1148		if start > stop:
	1149		step = -1
	1150		stop += step
	1151		if stop > length:
	1152		stop = length
	1153		else:
	1154		stop = length
	1155		return pycompat.xrange(start, stop, step)
	1156
	1157		@util.propertycache
	1158		def nodemap(self):
	1159		if self.index:
	1160		# populate mapping down to the initial node
	1161		node0 = self.index[0][7] # get around changelog filtering
	1162		self.rev(node0)
	1163		return self._nodecache
	1164
	1165		def hasnode(self, node):
	1166		try:
	1167		self.rev(node)
	1168		return True
	1169		except KeyError:
	1170		return False
	1171
	1172		def candelta(self, baserev, rev):
	1173		"""whether two revisions (baserev, rev) can be delta-ed or not"""
	1174		# Disable delta if either rev requires a content-changing flag
	1175		# processor (ex. LFS). This is because such flag processor can alter
	1176		# the rawtext content that the delta will be based on, and two clients
	1177		# could have a same revlog node with different flags (i.e. different
	1178		# rawtext contents) and the delta could be incompatible.
	1179		if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
	1180		or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
	1181		return False
	1182		return True
	1183
	1184		def clearcaches(self):
	1185		self._cache = None
	1186		self._chainbasecache.clear()
	1187		self._chunkcache = (0, '')
	1188		self._pcache = {}
	1189
	1190		try:
	1191		self._nodecache.clearcaches()
	1192		except AttributeError:
	1193		self._nodecache = {nullid: nullrev}
	1194		self._nodepos = None
	1195
	1196		def rev(self, node):
	1197		try:
	1198		return self._nodecache[node]
	1199		except TypeError:
	1200		raise
	1201		except RevlogError:
	1202		# parsers.c radix tree lookup failed
	1203		if node == wdirid or node in wdirfilenodeids:
	1204		raise error.WdirUnsupported
	1205		raise LookupError(node, self.indexfile, _('no node'))
	1206		except KeyError:
	1207		# pure python cache lookup failed
	1208		n = self._nodecache
	1209		i = self.index
	1210		p = self._nodepos
	1211		if p is None:
	1212		p = len(i) - 1
	1213		else:
	1214		assert p < len(i)
	1215		for r in pycompat.xrange(p, -1, -1):
	1216		v = i[r][7]
	1217		n[v] = r
	1218		if v == node:
	1219		self._nodepos = r - 1
	1220		return r
	1221		if node == wdirid or node in wdirfilenodeids:
	1222		raise error.WdirUnsupported
	1223		raise LookupError(node, self.indexfile, _('no node'))
	1224
	1225		# Accessors for index entries.
	1226
	1227		# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
	1228		# are flags.
	1229		def start(self, rev):
	1230		return int(self.index[rev][0] >> 16)
	1231
	1232		def flags(self, rev):
	1233		return self.index[rev][0] & 0xFFFF
	1234
	1235		def length(self, rev):
	1236		return self.index[rev][1]
	1237
	1238		def rawsize(self, rev):
	1239		"""return the length of the uncompressed text for a given revision"""
	1240		l = self.index[rev][2]
	1241		if l >= 0:
	1242		return l
	1243
	1244		t = self.revision(rev, raw=True)
	1245		return len(t)
	1246
	1247		def size(self, rev):
	1248		"""length of non-raw text (processed by a "read" flag processor)"""
	1249		# fast path: if no "read" flag processor could change the content,
	1250		# size is rawsize. note: ELLIPSIS is known to not change the content.
	1251		flags = self.flags(rev)
	1252		if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
	1253		return self.rawsize(rev)
	1254
	1255		return len(self.revision(rev, raw=False))
	1256
	1257		def chainbase(self, rev):
	1258		base = self._chainbasecache.get(rev)
	1259		if base is not None:
	1260		return base
	1261
	1262		index = self.index
	1263		iterrev = rev
	1264		base = index[iterrev][3]
	1265		while base != iterrev:
	1266		iterrev = base
	1267		base = index[iterrev][3]
	1268
	1269		self._chainbasecache[rev] = base
	1270		return base
	1271
	1272		def linkrev(self, rev):
	1273		return self.index[rev][4]
	1274
	1275		def parentrevs(self, rev):
	1276		try:
	1277		entry = self.index[rev]
	1278		except IndexError:
	1279		if rev == wdirrev:
	1280		raise error.WdirUnsupported
	1281		raise
	1282
	1283		return entry[5], entry[6]
	1284
	1285		def node(self, rev):
	1286		try:
	1287		return self.index[rev][7]
	1288		except IndexError:
	1289		if rev == wdirrev:
	1290		raise error.WdirUnsupported
	1291		raise
	1292
	1293		# Derived from index values.
	1294
	1295		def end(self, rev):
	1296		return self.start(rev) + self.length(rev)
	1297
	1298		def parents(self, node):
	1299		i = self.index
	1300		d = i[self.rev(node)]
	1301		return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
	1302
	1303		def chainlen(self, rev):
	1304		return self._chaininfo(rev)[0]
	1305
	1306		def _chaininfo(self, rev):
	1307		chaininfocache = self._chaininfocache
	1308		if rev in chaininfocache:
	1309		return chaininfocache[rev]
	1310		index = self.index
	1311		generaldelta = self._generaldelta
	1312		iterrev = rev
	1313		e = index[iterrev]
	1314		clen = 0
	1315		compresseddeltalen = 0
	1316		while iterrev != e[3]:
	1317		clen += 1
	1318		compresseddeltalen += e[1]
	1319		if generaldelta:
	1320		iterrev = e[3]
	1321		else:
	1322		iterrev -= 1
	1323		if iterrev in chaininfocache:
	1324		t = chaininfocache[iterrev]
	1325		clen += t[0]
	1326		compresseddeltalen += t[1]
	1327		break
	1328		e = index[iterrev]
	1329		else:
	1330		# Add text length of base since decompressing that also takes
	1331		# work. For cache hits the length is already included.
	1332		compresseddeltalen += e[1]
	1333		r = (clen, compresseddeltalen)
	1334		chaininfocache[rev] = r
	1335		return r
	1336
	1337		def _deltachain(self, rev, stoprev=None):
	1338		"""Obtain the delta chain for a revision.
	1339
	1340		``stoprev`` specifies a revision to stop at. If not specified, we
	1341		stop at the base of the chain.
	1342
	1343		Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
	1344		revs in ascending order and ``stopped`` is a bool indicating whether
	1345		``stoprev`` was hit.
	1346		"""
	1347		# Try C implementation.
	1348		try:
	1349		return self.index.deltachain(rev, stoprev, self._generaldelta)
	1350		except AttributeError:
	1351		pass
	1352
	1353		chain = []
	1354
	1355		# Alias to prevent attribute lookup in tight loop.
	1356		index = self.index
	1357		generaldelta = self._generaldelta
	1358
	1359		iterrev = rev
	1360		e = index[iterrev]
	1361		while iterrev != e[3] and iterrev != stoprev:
	1362		chain.append(iterrev)
	1363		if generaldelta:
	1364		iterrev = e[3]
	1365		else:
	1366		iterrev -= 1
	1367		e = index[iterrev]
	1368
	1369		if iterrev == stoprev:
	1370		stopped = True
	1371		else:
	1372		chain.append(iterrev)
	1373		stopped = False
	1374
	1375		chain.reverse()
	1376		return chain, stopped
	1377
	1378		def ancestors(self, revs, stoprev=0, inclusive=False):
	1379		"""Generate the ancestors of 'revs' in reverse topological order.
	1380		Does not generate revs lower than stoprev.
	1381
	1382		See the documentation for ancestor.lazyancestors for more details."""
	1383
	1384		return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
	1385		inclusive=inclusive)
	1386
	1387		def descendants(self, revs):
	1388		"""Generate the descendants of 'revs' in revision order.
	1389
	1390		Yield a sequence of revision numbers starting with a child of
	1391		some rev in revs, i.e., each revision is not considered a
	1392		descendant of itself. Results are ordered by revision number (a
	1393		topological sort)."""
	1394		first = min(revs)
	1395		if first == nullrev:
	1396		for i in self:
	1397		yield i
	1398		return
	1399
	1400		seen = set(revs)
	1401		for i in self.revs(start=first + 1):
	1402		for x in self.parentrevs(i):
	1403		if x != nullrev and x in seen:
	1404		seen.add(i)
	1405		yield i
	1406		break
	1407
	1408		def findcommonmissing(self, common=None, heads=None):
	1409		"""Return a tuple of the ancestors of common and the ancestors of heads
	1410		that are not ancestors of common. In revset terminology, we return the
	1411		tuple:
	1412
	1413		::common, (::heads) - (::common)
	1414
	1415		The list is sorted by revision number, meaning it is
	1416		topologically sorted.
	1417
	1418		'heads' and 'common' are both lists of node IDs. If heads is
	1419		not supplied, uses all of the revlog's heads. If common is not
	1420		supplied, uses nullid."""
	1421		if common is None:
	1422		common = [nullid]
	1423		if heads is None:
	1424		heads = self.heads()
	1425
	1426		common = [self.rev(n) for n in common]
	1427		heads = [self.rev(n) for n in heads]
	1428
	1429		# we want the ancestors, but inclusive
	1430		class lazyset(object):
	1431		def __init__(self, lazyvalues):
	1432		self.addedvalues = set()
	1433		self.lazyvalues = lazyvalues
	1434
	1435		def __contains__(self, value):
	1436		return value in self.addedvalues or value in self.lazyvalues
	1437
	1438		def __iter__(self):
	1439		added = self.addedvalues
	1440		for r in added:
	1441		yield r
	1442		for r in self.lazyvalues:
	1443		if not r in added:
	1444		yield r
	1445
	1446		def add(self, value):
	1447		self.addedvalues.add(value)
	1448
	1449		def update(self, values):
	1450		self.addedvalues.update(values)
	1451
	1452		has = lazyset(self.ancestors(common))
	1453		has.add(nullrev)
	1454		has.update(common)
	1455
	1456		# take all ancestors from heads that aren't in has
	1457		missing = set()
	1458		visit = collections.deque(r for r in heads if r not in has)
	1459		while visit:
	1460		r = visit.popleft()
	1461		if r in missing:
	1462		continue
	1463		else:
	1464		missing.add(r)
	1465		for p in self.parentrevs(r):
	1466		if p not in has:
	1467		visit.append(p)
	1468		missing = list(missing)
	1469		missing.sort()
	1470		return has, [self.node(miss) for miss in missing]
	1471
	1472		def incrementalmissingrevs(self, common=None):
	1473		"""Return an object that can be used to incrementally compute the
	1474		revision numbers of the ancestors of arbitrary sets that are not
	1475		ancestors of common. This is an ancestor.incrementalmissingancestors
	1476		object.
	1477
	1478		'common' is a list of revision numbers. If common is not supplied, uses
	1479		nullrev.
	1480		"""
	1481		if common is None:
	1482		common = [nullrev]
	1483
	1484		return ancestor.incrementalmissingancestors(self.parentrevs, common)
	1485
	1486		def findmissingrevs(self, common=None, heads=None):
	1487		"""Return the revision numbers of the ancestors of heads that
	1488		are not ancestors of common.
	1489
	1490		More specifically, return a list of revision numbers corresponding to
	1491		nodes N such that every N satisfies the following constraints:
	1492
	1493		1. N is an ancestor of some node in 'heads'
	1494		2. N is not an ancestor of any node in 'common'
	1495
	1496		The list is sorted by revision number, meaning it is
	1497		topologically sorted.
	1498
	1499		'heads' and 'common' are both lists of revision numbers. If heads is
	1500		not supplied, uses all of the revlog's heads. If common is not
	1501		supplied, uses nullid."""
	1502		if common is None:
	1503		common = [nullrev]
	1504		if heads is None:
	1505		heads = self.headrevs()
	1506
	1507		inc = self.incrementalmissingrevs(common=common)
	1508		return inc.missingancestors(heads)
	1509
	1510		def findmissing(self, common=None, heads=None):
	1511		"""Return the ancestors of heads that are not ancestors of common.
	1512
	1513		More specifically, return a list of nodes N such that every N
	1514		satisfies the following constraints:
	1515
	1516		1. N is an ancestor of some node in 'heads'
	1517		2. N is not an ancestor of any node in 'common'
	1518
	1519		The list is sorted by revision number, meaning it is
	1520		topologically sorted.
	1521
	1522		'heads' and 'common' are both lists of node IDs. If heads is
	1523		not supplied, uses all of the revlog's heads. If common is not
	1524		supplied, uses nullid."""
	1525		if common is None:
	1526		common = [nullid]
	1527		if heads is None:
	1528		heads = self.heads()
	1529
	1530		common = [self.rev(n) for n in common]
	1531		heads = [self.rev(n) for n in heads]
	1532
	1533		inc = self.incrementalmissingrevs(common=common)
	1534		return [self.node(r) for r in inc.missingancestors(heads)]
	1535
	1536		def nodesbetween(self, roots=None, heads=None):
	1537		"""Return a topological path from 'roots' to 'heads'.
	1538
	1539		Return a tuple (nodes, outroots, outheads) where 'nodes' is a
	1540		topologically sorted list of all nodes N that satisfy both of
	1541		these constraints:
	1542
	1543		1. N is a descendant of some node in 'roots'
	1544		2. N is an ancestor of some node in 'heads'
	1545
	1546		Every node is considered to be both a descendant and an ancestor
	1547		of itself, so every reachable node in 'roots' and 'heads' will be
	1548		included in 'nodes'.
	1549
	1550		'outroots' is the list of reachable nodes in 'roots', i.e., the
	1551		subset of 'roots' that is returned in 'nodes'. Likewise,
	1552		'outheads' is the subset of 'heads' that is also in 'nodes'.
	1553
	1554		'roots' and 'heads' are both lists of node IDs. If 'roots' is
	1555		unspecified, uses nullid as the only root. If 'heads' is
	1556		unspecified, uses list of all of the revlog's heads."""
	1557		nonodes = ([], [], [])
	1558		if roots is not None:
	1559		roots = list(roots)
	1560		if not roots:
	1561		return nonodes
	1562		lowestrev = min([self.rev(n) for n in roots])
	1563		else:
	1564		roots = [nullid] # Everybody's a descendant of nullid
	1565		lowestrev = nullrev
	1566		if (lowestrev == nullrev) and (heads is None):
	1567		# We want _all_ the nodes!
	1568		return ([self.node(r) for r in self], [nullid], list(self.heads()))
	1569		if heads is None:
	1570		# All nodes are ancestors, so the latest ancestor is the last
	1571		# node.
	1572		highestrev = len(self) - 1
	1573		# Set ancestors to None to signal that every node is an ancestor.
	1574		ancestors = None
	1575		# Set heads to an empty dictionary for later discovery of heads
	1576		heads = {}
	1577		else:
	1578		heads = list(heads)
	1579		if not heads:
	1580		return nonodes
	1581		ancestors = set()
	1582		# Turn heads into a dictionary so we can remove 'fake' heads.
	1583		# Also, later we will be using it to filter out the heads we can't
	1584		# find from roots.
	1585		heads = dict.fromkeys(heads, False)
	1586		# Start at the top and keep marking parents until we're done.
	1587		nodestotag = set(heads)
	1588		# Remember where the top was so we can use it as a limit later.
	1589		highestrev = max([self.rev(n) for n in nodestotag])
	1590		while nodestotag:
	1591		# grab a node to tag
	1592		n = nodestotag.pop()
	1593		# Never tag nullid
	1594		if n == nullid:
	1595		continue
	1596		# A node's revision number represents its place in a
	1597		# topologically sorted list of nodes.
	1598		r = self.rev(n)
	1599		if r >= lowestrev:
	1600		if n not in ancestors:
	1601		# If we are possibly a descendant of one of the roots
	1602		# and we haven't already been marked as an ancestor
	1603		ancestors.add(n) # Mark as ancestor
	1604		# Add non-nullid parents to list of nodes to tag.
	1605		nodestotag.update([p for p in self.parents(n) if
	1606		p != nullid])
	1607		elif n in heads: # We've seen it before, is it a fake head?
	1608		# So it is, real heads should not be the ancestors of
	1609		# any other heads.
	1610		heads.pop(n)
	1611		if not ancestors:
	1612		return nonodes
	1613		# Now that we have our set of ancestors, we want to remove any
	1614		# roots that are not ancestors.
	1615
	1616		# If one of the roots was nullid, everything is included anyway.
	1617		if lowestrev > nullrev:
	1618		# But, since we weren't, let's recompute the lowest rev to not
	1619		# include roots that aren't ancestors.
	1620
	1621		# Filter out roots that aren't ancestors of heads
	1622		roots = [root for root in roots if root in ancestors]
	1623		# Recompute the lowest revision
	1624		if roots:
	1625		lowestrev = min([self.rev(root) for root in roots])
	1626		else:
	1627		# No more roots? Return empty list
	1628		return nonodes
	1629		else:
	1630		# We are descending from nullid, and don't need to care about
	1631		# any other roots.
	1632		lowestrev = nullrev
	1633		roots = [nullid]
	1634		# Transform our roots list into a set.
	1635		descendants = set(roots)
	1636		# Also, keep the original roots so we can filter out roots that aren't
	1637		# 'real' roots (i.e. are descended from other roots).
	1638		roots = descendants.copy()
	1639		# Our topologically sorted list of output nodes.
	1640		orderedout = []
	1641		# Don't start at nullid since we don't want nullid in our output list,
	1642		# and if nullid shows up in descendants, empty parents will look like
	1643		# they're descendants.
	1644		for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
	1645		n = self.node(r)
	1646		isdescendant = False
	1647		if lowestrev == nullrev: # Everybody is a descendant of nullid
	1648		isdescendant = True
	1649		elif n in descendants:
	1650		# n is already a descendant
	1651		isdescendant = True
	1652		# This check only needs to be done here because all the roots
	1653		# will start being marked is descendants before the loop.
	1654		if n in roots:
	1655		# If n was a root, check if it's a 'real' root.
	1656		p = tuple(self.parents(n))
	1657		# If any of its parents are descendants, it's not a root.
	1658		if (p[0] in descendants) or (p[1] in descendants):
	1659		roots.remove(n)
	1660		else:
	1661		p = tuple(self.parents(n))
	1662		# A node is a descendant if either of its parents are
	1663		# descendants. (We seeded the dependents list with the roots
	1664		# up there, remember?)
	1665		if (p[0] in descendants) or (p[1] in descendants):
	1666		descendants.add(n)
	1667		isdescendant = True
	1668		if isdescendant and ((ancestors is None) or (n in ancestors)):
	1669		# Only include nodes that are both descendants and ancestors.
	1670		orderedout.append(n)
	1671		if (ancestors is not None) and (n in heads):
	1672		# We're trying to figure out which heads are reachable
	1673		# from roots.
	1674		# Mark this head as having been reached
	1675		heads[n] = True
	1676		elif ancestors is None:
	1677		# Otherwise, we're trying to discover the heads.
	1678		# Assume this is a head because if it isn't, the next step
	1679		# will eventually remove it.
	1680		heads[n] = True
	1681		# But, obviously its parents aren't.
	1682		for p in self.parents(n):
	1683		heads.pop(p, None)
	1684		heads = [head for head, flag in heads.iteritems() if flag]
	1685		roots = list(roots)
	1686		assert orderedout
	1687		assert roots
	1688		assert heads
	1689		return (orderedout, roots, heads)
	1690
	1691		def headrevs(self):
	1692		try:
	1693		return self.index.headrevs()
	1694		except AttributeError:
	1695		return self._headrevs()
	1696
	1697		def computephases(self, roots):
	1698		return self.index.computephasesmapsets(roots)
	1699
	1700		def _headrevs(self):
	1701		count = len(self)
	1702		if not count:
	1703		return [nullrev]
	1704		# we won't iter over filtered rev so nobody is a head at start
	1705		ishead = [0] * (count + 1)
	1706		index = self.index
	1707		for r in self:
	1708		ishead[r] = 1 # I may be an head
	1709		e = index[r]
	1710		ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
	1711		return [r for r, val in enumerate(ishead) if val]
	1712
	1713		def heads(self, start=None, stop=None):
	1714		"""return the list of all nodes that have no children
	1715
	1716		if start is specified, only heads that are descendants of
	1717		start will be returned
	1718		if stop is specified, it will consider all the revs from stop
	1719		as if they had no children
	1720		"""
	1721		if start is None and stop is None:
	1722		if not len(self):
	1723		return [nullid]
	1724		return [self.node(r) for r in self.headrevs()]
	1725
	1726		if start is None:
	1727		start = nullid
	1728		if stop is None:
	1729		stop = []
	1730		stoprevs = set([self.rev(n) for n in stop])
	1731		startrev = self.rev(start)
	1732		reachable = {startrev}
	1733		heads = {startrev}
	1734
	1735		parentrevs = self.parentrevs
	1736		for r in self.revs(start=startrev + 1):
	1737		for p in parentrevs(r):
	1738		if p in reachable:
	1739		if r not in stoprevs:
	1740		reachable.add(r)
	1741		heads.add(r)
	1742		if p in heads and p not in stoprevs:
	1743		heads.remove(p)
	1744
	1745		return [self.node(r) for r in heads]
	1746
	1747		def children(self, node):
	1748		"""find the children of a given node"""
	1749		c = []
	1750		p = self.rev(node)
	1751		for r in self.revs(start=p + 1):
	1752		prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
	1753		if prevs:
	1754		for pr in prevs:
	1755		if pr == p:
	1756		c.append(self.node(r))
	1757		elif p == nullrev:
	1758		c.append(self.node(r))
	1759		return c
	1760
	1761		def commonancestorsheads(self, a, b):
	1762		"""calculate all the heads of the common ancestors of nodes a and b"""
	1763		a, b = self.rev(a), self.rev(b)
	1764		ancs = self._commonancestorsheads(a, b)
	1765		return pycompat.maplist(self.node, ancs)
	1766
	1767		def _commonancestorsheads(self, *revs):
	1768		"""calculate all the heads of the common ancestors of revs"""
	1769		try:
	1770		ancs = self.index.commonancestorsheads(*revs)
	1771		except (AttributeError, OverflowError): # C implementation failed
	1772		ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
	1773		return ancs
	1774
	1775		def isancestor(self, a, b):
	1776		"""return True if node a is an ancestor of node b
	1777
	1778		A revision is considered an ancestor of itself."""
	1779		a, b = self.rev(a), self.rev(b)
	1780		return self.isancestorrev(a, b)
	1781
	1782		def isancestorrev(self, a, b):
	1783		"""return True if revision a is an ancestor of revision b
	1784
	1785		A revision is considered an ancestor of itself.
	1786
	1787		The implementation of this is trivial but the use of
	1788		commonancestorsheads is not."""
	1789		if a == nullrev:
	1790		return True
	1791		elif a == b:
	1792		return True
	1793		elif a > b:
	1794		return False
	1795		return a in self._commonancestorsheads(a, b)
	1796
	1797		def ancestor(self, a, b):
	1798		"""calculate the "best" common ancestor of nodes a and b"""
	1799
	1800		a, b = self.rev(a), self.rev(b)
	1801		try:
	1802		ancs = self.index.ancestors(a, b)
	1803		except (AttributeError, OverflowError):
	1804		ancs = ancestor.ancestors(self.parentrevs, a, b)
	1805		if ancs:
	1806		# choose a consistent winner when there's a tie
	1807		return min(map(self.node, ancs))
	1808		return nullid
	1809
	1810		def _match(self, id):
	1811		if isinstance(id, int):
	1812		# rev
	1813		return self.node(id)
	1814		if len(id) == 20:
	1815		# possibly a binary node
	1816		# odds of a binary node being all hex in ASCII are 1 in 10**25
	1817		try:
	1818		node = id
	1819		self.rev(node) # quick search the index
	1820		return node
	1821		except LookupError:
	1822		pass # may be partial hex id
	1823		try:
	1824		# str(rev)
	1825		rev = int(id)
	1826		if "%d" % rev != id:
	1827		raise ValueError
	1828		if rev < 0:
	1829		rev = len(self) + rev
	1830		if rev < 0 or rev >= len(self):
	1831		raise ValueError
	1832		return self.node(rev)
	1833		except (ValueError, OverflowError):
	1834		pass
	1835		if len(id) == 40:
	1836		try:
	1837		# a full hex nodeid?
	1838		node = bin(id)
	1839		self.rev(node)
	1840		return node
	1841		except (TypeError, LookupError):
	1842		pass
	1843
	1844		def _partialmatch(self, id):
	1845		# we don't care wdirfilenodeids as they should be always full hash
	1846		maybewdir = wdirhex.startswith(id)
	1847		try:
	1848		partial = self.index.partialmatch(id)
	1849		if partial and self.hasnode(partial):
	1850		if maybewdir:
	1851		# single 'ff...' match in radix tree, ambiguous with wdir
	1852		raise RevlogError
	1853		return partial
	1854		if maybewdir:
	1855		# no 'ff...' match in radix tree, wdir identified
	1856		raise error.WdirUnsupported
	1857		return None
	1858		except RevlogError:
	1859		# parsers.c radix tree lookup gave multiple matches
	1860		# fast path: for unfiltered changelog, radix tree is accurate
	1861		if not getattr(self, 'filteredrevs', None):
	1862		raise AmbiguousPrefixLookupError(id, self.indexfile,
	1863		_('ambiguous identifier'))
	1864		# fall through to slow path that filters hidden revisions
	1865		except (AttributeError, ValueError):
	1866		# we are pure python, or key was too short to search radix tree
	1867		pass
	1868
	1869		if id in self._pcache:
	1870		return self._pcache[id]
	1871
	1872		if len(id) <= 40:
	1873		try:
	1874		# hex(node)[:...]
	1875		l = len(id) // 2 # grab an even number of digits
	1876		prefix = bin(id[:l * 2])
	1877		nl = [e[7] for e in self.index if e[7].startswith(prefix)]
	1878		nl = [n for n in nl if hex(n).startswith(id) and
	1879		self.hasnode(n)]
	1880		if nullhex.startswith(id):
	1881		nl.append(nullid)
	1882		if len(nl) > 0:
	1883		if len(nl) == 1 and not maybewdir:
	1884		self._pcache[id] = nl[0]
	1885		return nl[0]
	1886		raise AmbiguousPrefixLookupError(id, self.indexfile,
	1887		_('ambiguous identifier'))
	1888		if maybewdir:
	1889		raise error.WdirUnsupported
	1890		return None
	1891		except TypeError:
	1892		pass
	1893
	1894		def lookup(self, id):
	1895		"""locate a node based on:
	1896		- revision number or str(revision number)
	1897		- nodeid or subset of hex nodeid
	1898		"""
	1899		n = self._match(id)
	1900		if n is not None:
	1901		return n
	1902		n = self._partialmatch(id)
	1903		if n:
	1904		return n
	1905
	1906		raise LookupError(id, self.indexfile, _('no match found'))
	1907
	1908		def shortest(self, node, minlength=1):
	1909		"""Find the shortest unambiguous prefix that matches node."""
	1910		def isvalid(prefix):
	1911		try:
	1912		node = self._partialmatch(prefix)
	1913		except error.RevlogError:
	1914		return False
	1915		except error.WdirUnsupported:
	1916		# single 'ff...' match
	1917		return True
	1918		if node is None:
	1919		raise LookupError(node, self.indexfile, _('no node'))
	1920		return True
	1921
	1922		def maybewdir(prefix):
	1923		return all(c == 'f' for c in prefix)
	1924
	1925		hexnode = hex(node)
	1926
	1927		def disambiguate(hexnode, minlength):
	1928		"""Disambiguate against wdirid."""
	1929		for length in range(minlength, 41):
	1930		prefix = hexnode[:length]
	1931		if not maybewdir(prefix):
	1932		return prefix
	1933
	1934		if not getattr(self, 'filteredrevs', None):
	1935		try:
	1936		length = max(self.index.shortest(node), minlength)
	1937		return disambiguate(hexnode, length)
	1938		except RevlogError:
	1939		if node != wdirid:
	1940		raise LookupError(node, self.indexfile, _('no node'))
	1941		except AttributeError:
	1942		# Fall through to pure code
	1943		pass
	1944
	1945		if node == wdirid:
	1946		for length in range(minlength, 41):
	1947		prefix = hexnode[:length]
	1948		if isvalid(prefix):
	1949		return prefix
	1950
	1951		for length in range(minlength, 41):
	1952		prefix = hexnode[:length]
	1953		if isvalid(prefix):
	1954		return disambiguate(hexnode, length)
	1955
	1956		def cmp(self, node, text):
	1957		"""compare text with a given file revision
	1958
	1959		returns True if text is different than what is stored.
	1960		"""
	1961		p1, p2 = self.parents(node)
	1962		return hash(text, p1, p2) != node
	1963
	1964		def _cachesegment(self, offset, data):
	1965		"""Add a segment to the revlog cache.
	1966
	1967		Accepts an absolute offset and the data that is at that location.
	1968		"""
	1969		o, d = self._chunkcache
	1970		# try to add to existing cache
	1971		if o + len(d) == offset and len(d) + len(data) < _chunksize:
	1972		self._chunkcache = o, d + data
	1973		else:
	1974		self._chunkcache = offset, data
	1975
	1976		def _readsegment(self, offset, length, df=None):
	1977		"""Load a segment of raw data from the revlog.
	1978
	1979		Accepts an absolute offset, length to read, and an optional existing
	1980		file handle to read from.
	1981
	1982		If an existing file handle is passed, it will be seeked and the
	1983		original seek position will NOT be restored.
	1984
	1985		Returns a str or buffer of raw byte data.
	1986		"""
	1987		# Cache data both forward and backward around the requested
	1988		# data, in a fixed size window. This helps speed up operations
	1989		# involving reading the revlog backwards.
	1990		cachesize = self._chunkcachesize
	1991		realoffset = offset & ~(cachesize - 1)
	1992		reallength = (((offset + length + cachesize) & ~(cachesize - 1))
	1993		- realoffset)
	1994		with self._datareadfp(df) as df:
	1995		df.seek(realoffset)
	1996		d = df.read(reallength)
	1997		self._cachesegment(realoffset, d)
	1998		if offset != realoffset or reallength != length:
	1999		return util.buffer(d, offset - realoffset, length)
	2000		return d
	2001
	2002		def _getsegment(self, offset, length, df=None):
	2003		"""Obtain a segment of raw data from the revlog.
	2004
	2005		Accepts an absolute offset, length of bytes to obtain, and an
	2006		optional file handle to the already-opened revlog. If the file
	2007		handle is used, it's original seek position will not be preserved.
	2008
	2009		Requests for data may be returned from a cache.
	2010
	2011		Returns a str or a buffer instance of raw byte data.
	2012		"""
	2013		o, d = self._chunkcache
	2014		l = len(d)
	2015
	2016		# is it in the cache?
	2017		cachestart = offset - o
	2018		cacheend = cachestart + length
	2019		if cachestart >= 0 and cacheend <= l:
	2020		if cachestart == 0 and cacheend == l:
	2021		return d # avoid a copy
	2022		return util.buffer(d, cachestart, cacheend - cachestart)
	2023
	2024		return self._readsegment(offset, length, df=df)
	2025
	2026		def _getsegmentforrevs(self, startrev, endrev, df=None):
	2027		"""Obtain a segment of raw data corresponding to a range of revisions.
	2028
	2029		Accepts the start and end revisions and an optional already-open
	2030		file handle to be used for reading. If the file handle is read, its
	2031		seek position will not be preserved.
	2032
	2033		Requests for data may be satisfied by a cache.
	2034
	2035		Returns a 2-tuple of (offset, data) for the requested range of
	2036		revisions. Offset is the integer offset from the beginning of the
	2037		revlog and data is a str or buffer of the raw byte data.
	2038
	2039		Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
	2040		to determine where each revision's data begins and ends.
	2041		"""
	2042		# Inlined self.start(startrev) & self.end(endrev) for perf reasons
	2043		# (functions are expensive).
	2044		index = self.index
	2045		istart = index[startrev]
	2046		start = int(istart[0] >> 16)
	2047		if startrev == endrev:
	2048		end = start + istart[1]
	2049		else:
	2050		iend = index[endrev]
	2051		end = int(iend[0] >> 16) + iend[1]
	2052
	2053		if self._inline:
	2054		start += (startrev + 1) * self._io.size
	2055		end += (endrev + 1) * self._io.size
	2056		length = end - start
	2057
	2058		return start, self._getsegment(start, length, df=df)
	2059
	2060		def _chunk(self, rev, df=None):
	2061		"""Obtain a single decompressed chunk for a revision.
	2062
	2063		Accepts an integer revision and an optional already-open file handle
	2064		to be used for reading. If used, the seek position of the file will not
	2065		be preserved.
	2066
	2067		Returns a str holding uncompressed data for the requested revision.
	2068		"""
	2069		return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
	2070
	2071		def _chunks(self, revs, df=None, targetsize=None):
	2072		"""Obtain decompressed chunks for the specified revisions.
	2073
	2074		Accepts an iterable of numeric revisions that are assumed to be in
	2075		ascending order. Also accepts an optional already-open file handle
	2076		to be used for reading. If used, the seek position of the file will
	2077		not be preserved.
	2078
	2079		This function is similar to calling ``self._chunk()`` multiple times,
	2080		but is faster.
	2081
	2082		Returns a list with decompressed data for each requested revision.
	2083		"""
	2084		if not revs:
	2085		return []
	2086		start = self.start
	2087		length = self.length
	2088		inline = self._inline
	2089		iosize = self._io.size
	2090		buffer = util.buffer
	2091
	2092		l = []
	2093		ladd = l.append
	2094
	2095		if not self._withsparseread:
	2096		slicedchunks = (revs,)
	2097		else:
	2098		slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
	2099
	2100		for revschunk in slicedchunks:
	2101		firstrev = revschunk[0]
	2102		# Skip trailing revisions with empty diff
	2103		for lastrev in revschunk[::-1]:
	2104		if length(lastrev) != 0:
	2105		break
	2106
	2107		try:
	2108		offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
	2109		except OverflowError:
	2110		# issue4215 - we can't cache a run of chunks greater than
	2111		# 2G on Windows
	2112		return [self._chunk(rev, df=df) for rev in revschunk]
	2113
	2114		decomp = self.decompress
	2115		for rev in revschunk:
	2116		chunkstart = start(rev)
	2117		if inline:
	2118		chunkstart += (rev + 1) * iosize
	2119		chunklength = length(rev)
	2120		ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
	2121
	2122		return l
	2123
	2124		def _chunkclear(self):
	2125		"""Clear the raw chunk cache."""
	2126		self._chunkcache = (0, '')
	2127
	2128		def deltaparent(self, rev):
	2129		"""return deltaparent of the given revision"""
	2130		base = self.index[rev][3]
	2131		if base == rev:
	2132		return nullrev
	2133		elif self._generaldelta:
	2134		return base
	2135		else:
	2136		return rev - 1
	2137
	2138		def issnapshot(self, rev):
	2139		"""tells whether rev is a snapshot
	2140		"""
	2141		if rev == nullrev:
	2142		return True
	2143		deltap = self.deltaparent(rev)
	2144		if deltap == nullrev:
	2145		return True
	2146		p1, p2 = self.parentrevs(rev)
	2147		if deltap in (p1, p2):
	2148		return False
	2149		return self.issnapshot(deltap)
	2150
	2151		def snapshotdepth(self, rev):
	2152		"""number of snapshot in the chain before this one"""
	2153		if not self.issnapshot(rev):
	2154		raise ProgrammingError('revision %d not a snapshot')
	2155		return len(self._deltachain(rev)[0]) - 1
	2156
	2157		def revdiff(self, rev1, rev2):
	2158		"""return or calculate a delta between two revisions
	2159
	2160		The delta calculated is in binary form and is intended to be written to
	2161		revlog data directly. So this function needs raw revision data.
	2162		"""
	2163		if rev1 != nullrev and self.deltaparent(rev2) == rev1:
	2164		return bytes(self._chunk(rev2))
	2165
	2166		return mdiff.textdiff(self.revision(rev1, raw=True),
	2167		self.revision(rev2, raw=True))
	2168
	2169		def revision(self, nodeorrev, _df=None, raw=False):
	2170		"""return an uncompressed revision of a given node or revision
	2171		number.
	2172
	2173		_df - an existing file handle to read from. (internal-only)
	2174		raw - an optional argument specifying if the revision data is to be
	2175		treated as raw data when applying flag transforms. 'raw' should be set
	2176		to True when generating changegroups or in debug commands.
	2177		"""
	2178		if isinstance(nodeorrev, int):
	2179		rev = nodeorrev
	2180		node = self.node(rev)
	2181		else:
	2182		node = nodeorrev
	2183		rev = None
	2184
	2185		cachedrev = None
	2186		flags = None
	2187		rawtext = None
	2188		if node == nullid:
	2189		return ""
	2190		if self._cache:
	2191		if self._cache[0] == node:
	2192		# _cache only stores rawtext
	2193		if raw:
	2194		return self._cache[2]
	2195		# duplicated, but good for perf
	2196		if rev is None:
	2197		rev = self.rev(node)
	2198		if flags is None:
	2199		flags = self.flags(rev)
	2200		# no extra flags set, no flag processor runs, text = rawtext
	2201		if flags == REVIDX_DEFAULT_FLAGS:
	2202		return self._cache[2]
	2203		# rawtext is reusable. need to run flag processor
	2204		rawtext = self._cache[2]
	2205
	2206		cachedrev = self._cache[1]
	2207
	2208		# look up what we need to read
	2209		if rawtext is None:
	2210		if rev is None:
	2211		rev = self.rev(node)
	2212
	2213		chain, stopped = self._deltachain(rev, stoprev=cachedrev)
	2214		if stopped:
	2215		rawtext = self._cache[2]
	2216
	2217		# drop cache to save memory
	2218		self._cache = None
	2219
	2220		targetsize = None
	2221		rawsize = self.index[rev][2]
	2222		if 0 <= rawsize:
	2223		targetsize = 4 * rawsize
	2224
	2225		bins = self._chunks(chain, df=_df, targetsize=targetsize)
	2226		if rawtext is None:
	2227		rawtext = bytes(bins[0])
	2228		bins = bins[1:]
	2229
	2230		rawtext = mdiff.patches(rawtext, bins)
	2231		self._cache = (node, rev, rawtext)
	2232
	2233		if flags is None:
	2234		if rev is None:
	2235		rev = self.rev(node)
	2236		flags = self.flags(rev)
	2237
	2238		text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
	2239		if validatehash:
	2240		self.checkhash(text, node, rev=rev)
	2241
	2242		return text
	2243
	2244		def hash(self, text, p1, p2):
	2245		"""Compute a node hash.
	2246
	2247		Available as a function so that subclasses can replace the hash
	2248		as needed.
	2249		"""
	2250		return hash(text, p1, p2)
	2251
	2252		def _processflags(self, text, flags, operation, raw=False):
	2253		"""Inspect revision data flags and applies transforms defined by
	2254		registered flag processors.
	2255
	2256		``text`` - the revision data to process
	2257		``flags`` - the revision flags
	2258		``operation`` - the operation being performed (read or write)
	2259		``raw`` - an optional argument describing if the raw transform should be
	2260		applied.
	2261
	2262		This method processes the flags in the order (or reverse order if
	2263		``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
	2264		flag processors registered for present flags. The order of flags defined
	2265		in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
	2266
	2267		Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
	2268		processed text and ``validatehash`` is a bool indicating whether the
	2269		returned text should be checked for hash integrity.
	2270
	2271		Note: If the ``raw`` argument is set, it has precedence over the
	2272		operation and will only update the value of ``validatehash``.
	2273		"""
	2274		# fast path: no flag processors will run
	2275		if flags == 0:
	2276		return text, True
	2277		if not operation in ('read', 'write'):
	2278		raise ProgrammingError(_("invalid '%s' operation ") % (operation))
	2279		# Check all flags are known.
	2280		if flags & ~REVIDX_KNOWN_FLAGS:
	2281		raise RevlogError(_("incompatible revision flag '%#x'") %
	2282		(flags & ~REVIDX_KNOWN_FLAGS))
	2283		validatehash = True
	2284		# Depending on the operation (read or write), the order might be
	2285		# reversed due to non-commutative transforms.
	2286		orderedflags = REVIDX_FLAGS_ORDER
	2287		if operation == 'write':
	2288		orderedflags = reversed(orderedflags)
	2289
	2290		for flag in orderedflags:
	2291		# If a flagprocessor has been registered for a known flag, apply the
	2292		# related operation transform and update result tuple.
	2293		if flag & flags:
	2294		vhash = True
	2295
	2296		if flag not in _flagprocessors:
	2297		message = _("missing processor for flag '%#x'") % (flag)
	2298		raise RevlogError(message)
	2299
	2300		processor = _flagprocessors[flag]
	2301		if processor is not None:
	2302		readtransform, writetransform, rawtransform = processor
	2303
	2304		if raw:
	2305		vhash = rawtransform(self, text)
	2306		elif operation == 'read':
	2307		text, vhash = readtransform(self, text)
	2308		else: # write operation
	2309		text, vhash = writetransform(self, text)
	2310		validatehash = validatehash and vhash
	2311
	2312		return text, validatehash
	2313
	2314		def checkhash(self, text, node, p1=None, p2=None, rev=None):
	2315		"""Check node hash integrity.
	2316
	2317		Available as a function so that subclasses can extend hash mismatch
	2318		behaviors as needed.
	2319		"""
	2320		try:
	2321		if p1 is None and p2 is None:
	2322		p1, p2 = self.parents(node)
	2323		if node != self.hash(text, p1, p2):
	2324		revornode = rev
	2325		if revornode is None:
	2326		revornode = templatefilters.short(hex(node))
	2327		raise RevlogError(_("integrity check failed on %s:%s")
	2328		% (self.indexfile, pycompat.bytestr(revornode)))
	2329		except RevlogError:
	2330		if self._censorable and _censoredtext(text):
	2331		raise error.CensoredNodeError(self.indexfile, node, text)
	2332		raise
	2333
	2334		def _enforceinlinesize(self, tr, fp=None):
	2335		"""Check if the revlog is too big for inline and convert if so.
	2336
	2337		This should be called after revisions are added to the revlog. If the
	2338		revlog has grown too large to be an inline revlog, it will convert it
	2339		to use multiple index and data files.
	2340		"""
	2341		tiprev = len(self) - 1
	2342		if (not self._inline or
	2343		(self.start(tiprev) + self.length(tiprev)) < _maxinline):
	2344		return
	2345
	2346		trinfo = tr.find(self.indexfile)
	2347		if trinfo is None:
	2348		raise RevlogError(_("%s not found in the transaction")
	2349		% self.indexfile)
	2350
	2351		trindex = trinfo[2]
	2352		if trindex is not None:
	2353		dataoff = self.start(trindex)
	2354		else:
	2355		# revlog was stripped at start of transaction, use all leftover data
	2356		trindex = len(self) - 1
	2357		dataoff = self.end(tiprev)
	2358
	2359		tr.add(self.datafile, dataoff)
	2360
	2361		if fp:
	2362		fp.flush()
	2363		fp.close()
	2364
	2365		with self._datafp('w') as df:
	2366		for r in self:
	2367		df.write(self._getsegmentforrevs(r, r)[1])
	2368
	2369		with self._indexfp('w') as fp:
	2370		self.version &= ~FLAG_INLINE_DATA
	2371		self._inline = False
	2372		io = self._io
	2373		for i in self:
	2374		e = io.packentry(self.index[i], self.node, self.version, i)
	2375		fp.write(e)
	2376
	2377		# the temp file replace the real index when we exit the context
	2378		# manager
	2379
	2380		tr.replace(self.indexfile, trindex * self._io.size)
	2381		self._chunkclear()
	2382
	2383		def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
	2384		node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
	2385		"""add a revision to the log
	2386
	2387		text - the revision data to add
	2388		transaction - the transaction object used for rollback
	2389		link - the linkrev data to add
	2390		p1, p2 - the parent nodeids of the revision
	2391		cachedelta - an optional precomputed delta
	2392		node - nodeid of revision; typically node is not specified, and it is
	2393		computed by default as hash(text, p1, p2), however subclasses might
	2394		use different hashing method (and override checkhash() in such case)
	2395		flags - the known flags to set on the revision
	2396		deltacomputer - an optional _deltacomputer instance shared between
	2397		multiple calls
	2398		"""
	2399		if link == nullrev:
	2400		raise RevlogError(_("attempted to add linkrev -1 to %s")
	2401		% self.indexfile)
	2402
	2403		if flags:
	2404		node = node or self.hash(text, p1, p2)
	2405
	2406		rawtext, validatehash = self._processflags(text, flags, 'write')
	2407
	2408		# If the flag processor modifies the revision data, ignore any provided
	2409		# cachedelta.
	2410		if rawtext != text:
	2411		cachedelta = None
	2412
	2413		if len(rawtext) > _maxentrysize:
	2414		raise RevlogError(
	2415		_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
	2416		% (self.indexfile, len(rawtext)))
	2417
	2418		node = node or self.hash(rawtext, p1, p2)
	2419		if node in self.nodemap:
	2420		return node
	2421
	2422		if validatehash:
	2423		self.checkhash(rawtext, node, p1=p1, p2=p2)
	2424
	2425		return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
	2426		flags, cachedelta=cachedelta,
	2427		deltacomputer=deltacomputer)
	2428
	2429		def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
	2430		cachedelta=None, deltacomputer=None):
	2431		"""add a raw revision with known flags, node and parents
	2432		useful when reusing a revision not stored in this revlog (ex: received
	2433		over wire, or read from an external bundle).
	2434		"""
	2435		dfh = None
	2436		if not self._inline:
	2437		dfh = self._datafp("a+")
	2438		ifh = self._indexfp("a+")
	2439		try:
	2440		return self._addrevision(node, rawtext, transaction, link, p1, p2,
	2441		flags, cachedelta, ifh, dfh,
	2442		deltacomputer=deltacomputer)
	2443		finally:
	2444		if dfh:
	2445		dfh.close()
	2446		ifh.close()
	2447
	2448		def compress(self, data):
	2449		"""Generate a possibly-compressed representation of data."""
	2450		if not data:
	2451		return '', data
	2452
	2453		compressed = self._compressor.compress(data)
	2454
	2455		if compressed:
	2456		# The revlog compressor added the header in the returned data.
	2457		return '', compressed
	2458
	2459		if data[0:1] == '\0':
	2460		return '', data
	2461		return 'u', data
	2462
	2463		def decompress(self, data):
	2464		"""Decompress a revlog chunk.
	2465
	2466		The chunk is expected to begin with a header identifying the
	2467		format type so it can be routed to an appropriate decompressor.
	2468		"""
	2469		if not data:
	2470		return data
	2471
	2472		# Revlogs are read much more frequently than they are written and many
	2473		# chunks only take microseconds to decompress, so performance is
	2474		# important here.
	2475		#
	2476		# We can make a few assumptions about revlogs:
	2477		#
	2478		# 1) the majority of chunks will be compressed (as opposed to inline
	2479		# raw data).
	2480		# 2) decompressing any data will likely by at least 10x slower than
	2481		# returning raw inline data.
	2482		# 3) we want to prioritize common and officially supported compression
	2483		# engines
	2484		#
	2485		# It follows that we want to optimize for "decompress compressed data
	2486		# when encoded with common and officially supported compression engines"
	2487		# case over "raw data" and "data encoded by less common or non-official
	2488		# compression engines." That is why we have the inline lookup first
	2489		# followed by the compengines lookup.
	2490		#
	2491		# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
	2492		# compressed chunks. And this matters for changelog and manifest reads.
	2493		t = data[0:1]
	2494
	2495		if t == 'x':
	2496		try:
	2497		return _zlibdecompress(data)
	2498		except zlib.error as e:
	2499		raise RevlogError(_('revlog decompress error: %s') %
	2500		stringutil.forcebytestr(e))
	2501		# '\0' is more common than 'u' so it goes first.
	2502		elif t == '\0':
	2503		return data
	2504		elif t == 'u':
	2505		return util.buffer(data, 1)
	2506
	2507		try:
	2508		compressor = self._decompressors[t]
	2509		except KeyError:
	2510		try:
	2511		engine = util.compengines.forrevlogheader(t)
	2512		compressor = engine.revlogcompressor()
	2513		self._decompressors[t] = compressor
	2514		except KeyError:
	2515		raise RevlogError(_('unknown compression type %r') % t)
	2516
	2517		return compressor.decompress(data)
	2518
	2519		def _isgooddeltainfo(self, deltainfo, revinfo):
	2520		"""Returns True if the given delta is good. Good means that it is within
	2521		the disk span, disk size, and chain length bounds that we know to be
	2522		performant."""
	2523		if deltainfo is None:
	2524		return False
	2525
	2526		# - 'deltainfo.distance' is the distance from the base revision --
	2527		# bounding it limits the amount of I/O we need to do.
	2528		# - 'deltainfo.compresseddeltalen' is the sum of the total size of
	2529		# deltas we need to apply -- bounding it limits the amount of CPU
	2530		# we consume.
	2531
	2532		if self._sparserevlog:
	2533		# As sparse-read will be used, we can consider that the distance,
	2534		# instead of being the span of the whole chunk,
	2535		# is the span of the largest read chunk
	2536		base = deltainfo.base
	2537
	2538		if base != nullrev:
	2539		deltachain = self._deltachain(base)[0]
	2540		else:
	2541		deltachain = []
	2542
	2543		# search for the first non-snapshot revision
	2544		for idx, r in enumerate(deltachain):
	2545		if not self.issnapshot(r):
	2546		break
	2547		deltachain = deltachain[idx:]
	2548		chunks = _slicechunk(self, deltachain, deltainfo)
	2549		all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
	2550		distance = max(all_span)
	2551		else:
	2552		distance = deltainfo.distance
	2553
	2554		textlen = revinfo.textlen
	2555		defaultmax = textlen * 4
	2556		maxdist = self._maxdeltachainspan
	2557		if not maxdist:
	2558		maxdist = distance # ensure the conditional pass
	2559		maxdist = max(maxdist, defaultmax)
	2560		if self._sparserevlog and maxdist < self._srmingapsize:
	2561		# In multiple place, we are ignoring irrelevant data range below a
	2562		# certain size. Be also apply this tradeoff here and relax span
	2563		# constraint for small enought content.
	2564		maxdist = self._srmingapsize
	2565
	2566		# Bad delta from read span:
	2567		#
	2568		# If the span of data read is larger than the maximum allowed.
	2569		if maxdist < distance:
	2570		return False
	2571
	2572		# Bad delta from new delta size:
	2573		#
	2574		# If the delta size is larger than the target text, storing the
	2575		# delta will be inefficient.
	2576		if textlen < deltainfo.deltalen:
	2577		return False
	2578
	2579		# Bad delta from cumulated payload size:
	2580		#
	2581		# If the sum of delta get larger than K * target text length.
	2582		if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
	2583		return False
	2584
	2585		# Bad delta from chain length:
	2586		#
	2587		# If the number of delta in the chain gets too high.
	2588		if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
	2589		return False
	2590
	2591		# bad delta from intermediate snapshot size limit
	2592		#
	2593		# If an intermediate snapshot size is higher than the limit. The
	2594		# limit exist to prevent endless chain of intermediate delta to be
	2595		# created.
	2596		if (deltainfo.snapshotdepth is not None and
	2597		(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
	2598		return False
	2599
	2600		# bad delta if new intermediate snapshot is larger than the previous
	2601		# snapshot
	2602		if (deltainfo.snapshotdepth
	2603		and self.length(deltainfo.base) < deltainfo.deltalen):
	2604		return False
	2605
	2606		return True
	2607
	2608		def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
	2609		cachedelta, ifh, dfh, alwayscache=False,
	2610		deltacomputer=None):
	2611		"""internal function to add revisions to the log
	2612
	2613		see addrevision for argument descriptions.
	2614
	2615		note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
	2616
	2617		if "deltacomputer" is not provided or None, a defaultdeltacomputer will
	2618		be used.
	2619
	2620		invariants:
	2621		- rawtext is optional (can be None); if not set, cachedelta must be set.
	2622		if both are set, they must correspond to each other.
	2623		"""
	2624		if node == nullid:
	2625		raise RevlogError(_("%s: attempt to add null revision") %
	2626		(self.indexfile))
	2627		if node == wdirid or node in wdirfilenodeids:
	2628		raise RevlogError(_("%s: attempt to add wdir revision") %
	2629		(self.indexfile))
	2630
	2631		if self._inline:
	2632		fh = ifh
	2633		else:
	2634		fh = dfh
	2635
	2636		btext = [rawtext]
	2637
	2638		curr = len(self)
	2639		prev = curr - 1
	2640		offset = self.end(prev)
	2641		p1r, p2r = self.rev(p1), self.rev(p2)
	2642
	2643		# full versions are inserted when the needed deltas
	2644		# become comparable to the uncompressed text
	2645		if rawtext is None:
	2646		# need rawtext size, before changed by flag processors, which is
	2647		# the non-raw size. use revlog explicitly to avoid filelog's extra
	2648		# logic that might remove metadata size.
	2649		textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
	2650		cachedelta[1])
	2651		else:
	2652		textlen = len(rawtext)
	2653
	2654		if deltacomputer is None:
	2655		deltacomputer = _deltacomputer(self)
	2656
	2657		revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
	2658
	2659		# no delta for flag processor revision (see "candelta" for why)
	2660		# not calling candelta since only one revision needs test, also to
	2661		# avoid overhead fetching flags again.
	2662		if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
	2663		deltainfo = None
	2664		else:
	2665		deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
	2666
	2667		if deltainfo is not None:
	2668		base = deltainfo.base
	2669		chainbase = deltainfo.chainbase
	2670		data = deltainfo.data
	2671		l = deltainfo.deltalen
	2672		else:
	2673		rawtext = deltacomputer.buildtext(revinfo, fh)
	2674		data = self.compress(rawtext)
	2675		l = len(data[1]) + len(data[0])
	2676		base = chainbase = curr
	2677
	2678		e = (offset_type(offset, flags), l, textlen,
	2679		base, link, p1r, p2r, node)
	2680		self.index.append(e)
	2681		self.nodemap[node] = curr
	2682
	2683		entry = self._io.packentry(e, self.node, self.version, curr)
	2684		self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
	2685
	2686		if alwayscache and rawtext is None:
	2687		rawtext = deltacomputer.buildtext(revinfo, fh)
	2688
	2689		if type(rawtext) == bytes: # only accept immutable objects
	2690		self._cache = (node, curr, rawtext)
	2691		self._chainbasecache[curr] = chainbase
	2692		return node
	2693
	2694		def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
	2695		# Files opened in a+ mode have inconsistent behavior on various
	2696		# platforms. Windows requires that a file positioning call be made
	2697		# when the file handle transitions between reads and writes. See
	2698		# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
	2699		# platforms, Python or the platform itself can be buggy. Some versions
	2700		# of Solaris have been observed to not append at the end of the file
	2701		# if the file was seeked to before the end. See issue4943 for more.
	2702		#
	2703		# We work around this issue by inserting a seek() before writing.
	2704		# Note: This is likely not necessary on Python 3.
	2705		ifh.seek(0, os.SEEK_END)
	2706		if dfh:
	2707		dfh.seek(0, os.SEEK_END)
	2708
	2709		curr = len(self) - 1
	2710		if not self._inline:
	2711		transaction.add(self.datafile, offset)
	2712		transaction.add(self.indexfile, curr * len(entry))
	2713		if data[0]:
	2714		dfh.write(data[0])
	2715		dfh.write(data[1])
	2716		ifh.write(entry)
	2717		else:
	2718		offset += curr * self._io.size
	2719		transaction.add(self.indexfile, offset, curr)
	2720		ifh.write(entry)
	2721		ifh.write(data[0])
	2722		ifh.write(data[1])
	2723		self._enforceinlinesize(transaction, ifh)
	2724
	2725		def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
	2726		"""
	2727		add a delta group
	2728
	2729		given a set of deltas, add them to the revision log. the
	2730		first delta is against its parent, which should be in our
	2731		log, the rest are against the previous delta.
	2732
	2733		If ``addrevisioncb`` is defined, it will be called with arguments of
	2734		this revlog and the node that was added.
	2735		"""
	2736
	2737		nodes = []
	2738
	2739		r = len(self)
	2740		end = 0
	2741		if r:
	2742		end = self.end(r - 1)
	2743		ifh = self._indexfp("a+")
	2744		isize = r * self._io.size
	2745		if self._inline:
	2746		transaction.add(self.indexfile, end + isize, r)
	2747		dfh = None
	2748		else:
	2749		transaction.add(self.indexfile, isize, r)
	2750		transaction.add(self.datafile, end)
	2751		dfh = self._datafp("a+")
	2752		def flush():
	2753		if dfh:
	2754		dfh.flush()
	2755		ifh.flush()
	2756		try:
	2757		deltacomputer = _deltacomputer(self)
	2758		# loop through our set of deltas
	2759		for data in deltas:
	2760		node, p1, p2, linknode, deltabase, delta, flags = data
	2761		link = linkmapper(linknode)
	2762		flags = flags or REVIDX_DEFAULT_FLAGS
	2763
	2764		nodes.append(node)
	2765
	2766		if node in self.nodemap:
	2767		# this can happen if two branches make the same change
	2768		continue
	2769
	2770		for p in (p1, p2):
	2771		if p not in self.nodemap:
	2772		raise LookupError(p, self.indexfile,
	2773		_('unknown parent'))
	2774
	2775		if deltabase not in self.nodemap:
	2776		raise LookupError(deltabase, self.indexfile,
	2777		_('unknown delta base'))
	2778
	2779		baserev = self.rev(deltabase)
	2780
	2781		if baserev != nullrev and self.iscensored(baserev):
	2782		# if base is censored, delta must be full replacement in a
	2783		# single patch operation
	2784		hlen = struct.calcsize(">lll")
	2785		oldlen = self.rawsize(baserev)
	2786		newlen = len(delta) - hlen
	2787		if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
	2788		raise error.CensoredBaseError(self.indexfile,
	2789		self.node(baserev))
	2790
	2791		if not flags and self._peek_iscensored(baserev, delta, flush):
	2792		flags \|= REVIDX_ISCENSORED
	2793
	2794		# We assume consumers of addrevisioncb will want to retrieve
	2795		# the added revision, which will require a call to
	2796		# revision(). revision() will fast path if there is a cache
	2797		# hit. So, we tell _addrevision() to always cache in this case.
	2798		# We're only using addgroup() in the context of changegroup
	2799		# generation so the revision data can always be handled as raw
	2800		# by the flagprocessor.
	2801		self._addrevision(node, None, transaction, link,
	2802		p1, p2, flags, (baserev, delta),
	2803		ifh, dfh,
	2804		alwayscache=bool(addrevisioncb),
	2805		deltacomputer=deltacomputer)
	2806
	2807		if addrevisioncb:
	2808		addrevisioncb(self, node)
	2809
	2810		if not dfh and not self._inline:
	2811		# addrevision switched from inline to conventional
	2812		# reopen the index
	2813		ifh.close()
	2814		dfh = self._datafp("a+")
	2815		ifh = self._indexfp("a+")
	2816		finally:
	2817		if dfh:
	2818		dfh.close()
	2819		ifh.close()
	2820
	2821		return nodes
	2822
	2823		def iscensored(self, rev):
	2824		"""Check if a file revision is censored."""
	2825		if not self._censorable:
	2826		return False
	2827
	2828		return self.flags(rev) & REVIDX_ISCENSORED
	2829
	2830		def _peek_iscensored(self, baserev, delta, flush):
	2831		"""Quickly check if a delta produces a censored revision."""
	2832		if not self._censorable:
	2833		return False
	2834
	2835		# Fragile heuristic: unless new file meta keys are added alphabetically
	2836		# preceding "censored", all censored revisions are prefixed by
	2837		# "\1\ncensored:". A delta producing such a censored revision must be a
	2838		# full-replacement delta, so we inspect the first and only patch in the
	2839		# delta for this prefix.
	2840		hlen = struct.calcsize(">lll")
	2841		if len(delta) <= hlen:
	2842		return False
	2843
	2844		oldlen = self.rawsize(baserev)
	2845		newlen = len(delta) - hlen
	2846		if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
	2847		return False
	2848
	2849		add = "\1\ncensored:"
	2850		addlen = len(add)
	2851		return newlen >= addlen and delta[hlen:hlen + addlen] == add
	2852
	2853		def getstrippoint(self, minlink):
	2854		"""find the minimum rev that must be stripped to strip the linkrev
	2855
	2856		Returns a tuple containing the minimum rev and a set of all revs that
	2857		have linkrevs that will be broken by this strip.
	2858		"""
	2859		brokenrevs = set()
	2860		strippoint = len(self)
	2861
	2862		heads = {}
	2863		futurelargelinkrevs = set()
	2864		for head in self.headrevs():
	2865		headlinkrev = self.linkrev(head)
	2866		heads[head] = headlinkrev
	2867		if headlinkrev >= minlink:
	2868		futurelargelinkrevs.add(headlinkrev)
	2869
	2870		# This algorithm involves walking down the rev graph, starting at the
	2871		# heads. Since the revs are topologically sorted according to linkrev,
	2872		# once all head linkrevs are below the minlink, we know there are
	2873		# no more revs that could have a linkrev greater than minlink.
	2874		# So we can stop walking.
	2875		while futurelargelinkrevs:
	2876		strippoint -= 1
	2877		linkrev = heads.pop(strippoint)
	2878
	2879		if linkrev < minlink:
	2880		brokenrevs.add(strippoint)
	2881		else:
	2882		futurelargelinkrevs.remove(linkrev)
	2883
	2884		for p in self.parentrevs(strippoint):
	2885		if p != nullrev:
	2886		plinkrev = self.linkrev(p)
	2887		heads[p] = plinkrev
	2888		if plinkrev >= minlink:
	2889		futurelargelinkrevs.add(plinkrev)
	2890
	2891		return strippoint, brokenrevs
	2892
	2893		def strip(self, minlink, transaction):
	2894		"""truncate the revlog on the first revision with a linkrev >= minlink
	2895
	2896		This function is called when we're stripping revision minlink and
	2897		its descendants from the repository.
	2898
	2899		We have to remove all revisions with linkrev >= minlink, because
	2900		the equivalent changelog revisions will be renumbered after the
	2901		strip.
	2902
	2903		So we truncate the revlog on the first of these revisions, and
	2904		trust that the caller has saved the revisions that shouldn't be
	2905		removed and that it'll re-add them after this truncation.
	2906		"""
	2907		if len(self) == 0:
	2908		return
	2909
	2910		rev, _ = self.getstrippoint(minlink)
	2911		if rev == len(self):
	2912		return
	2913
	2914		# first truncate the files on disk
	2915		end = self.start(rev)
	2916		if not self._inline:
	2917		transaction.add(self.datafile, end)
	2918		end = rev * self._io.size
	2919		else:
	2920		end += rev * self._io.size
	2921
	2922		transaction.add(self.indexfile, end)
	2923
	2924		# then reset internal state in memory to forget those revisions
	2925		self._cache = None
	2926		self._chaininfocache = {}
	2927		self._chunkclear()
	2928		for x in pycompat.xrange(rev, len(self)):
	2929		del self.nodemap[self.node(x)]
	2930
	2931		del self.index[rev:-1]
	2932		self._nodepos = None
	2933
	2934		def checksize(self):
	2935		expected = 0
	2936		if len(self):
	2937		expected = max(0, self.end(len(self) - 1))
	2938
	2939		try:
	2940		with self._datafp() as f:
	2941		f.seek(0, 2)
	2942		actual = f.tell()
	2943		dd = actual - expected
	2944		except IOError as inst:
	2945		if inst.errno != errno.ENOENT:
	2946		raise
	2947		dd = 0
	2948
	2949		try:
	2950		f = self.opener(self.indexfile)
	2951		f.seek(0, 2)
	2952		actual = f.tell()
	2953		f.close()
	2954		s = self._io.size
	2955		i = max(0, actual // s)
	2956		di = actual - (i * s)
	2957		if self._inline:
	2958		databytes = 0
	2959		for r in self:
	2960		databytes += max(0, self.length(r))
	2961		dd = 0
	2962		di = actual - len(self) * s - databytes
	2963		except IOError as inst:
	2964		if inst.errno != errno.ENOENT:
	2965		raise
	2966		di = 0
	2967
	2968		return (dd, di)
	2969
	2970		def files(self):
	2971		res = [self.indexfile]
	2972		if not self._inline:
	2973		res.append(self.datafile)
	2974		return res
	2975
	2976		def emitrevisiondeltas(self, requests):
	2977		frev = self.rev
	2978
	2979		prevrev = None
	2980		for request in requests:
	2981		node = request.node
	2982		rev = frev(node)
	2983
	2984		if prevrev is None:
	2985		prevrev = self.index[rev][5]
	2986
	2987		# Requesting a full revision.
	2988		if request.basenode == nullid:
	2989		baserev = nullrev
	2990		# Requesting an explicit revision.
	2991		elif request.basenode is not None:
	2992		baserev = frev(request.basenode)
	2993		# Allowing us to choose.
	2994		else:
	2995		p1rev, p2rev = self.parentrevs(rev)
	2996		deltaparentrev = self.deltaparent(rev)
	2997
	2998		# Avoid sending full revisions when delta parent is null. Pick
	2999		# prev in that case. It's tempting to pick p1 in this case, as
	3000		# p1 will be smaller in the common case. However, computing a
	3001		# delta against p1 may require resolving the raw text of p1,
	3002		# which could be expensive. The revlog caches should have prev
	3003		# cached, meaning less CPU for delta generation. There is
	3004		# likely room to add a flag and/or config option to control this
	3005		# behavior.
	3006		if deltaparentrev == nullrev and self._storedeltachains:
	3007		baserev = prevrev
	3008
	3009		# Revlog is configured to use full snapshot for a reason.
	3010		# Stick to full snapshot.
	3011		elif deltaparentrev == nullrev:
	3012		baserev = nullrev
	3013
	3014		# Pick previous when we can't be sure the base is available
	3015		# on consumer.
	3016		elif deltaparentrev not in (p1rev, p2rev, prevrev):
	3017		baserev = prevrev
	3018		else:
	3019		baserev = deltaparentrev
	3020
	3021		if baserev != nullrev and not self.candelta(baserev, rev):
	3022		baserev = nullrev
	3023
	3024		revision = None
	3025		delta = None
	3026		baserevisionsize = None
	3027
	3028		if self.iscensored(baserev) or self.iscensored(rev):
	3029		try:
	3030		revision = self.revision(node, raw=True)
	3031		except error.CensoredNodeError as e:
	3032		revision = e.tombstone
	3033
	3034		if baserev != nullrev:
	3035		baserevisionsize = self.rawsize(baserev)
	3036
	3037		elif baserev == nullrev:
	3038		revision = self.revision(node, raw=True)
	3039		else:
	3040		delta = self.revdiff(baserev, rev)
	3041
	3042		extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
	3043
	3044		yield revlogrevisiondelta(
	3045		node=node,
	3046		p1node=request.p1node,
	3047		p2node=request.p2node,
	3048		linknode=request.linknode,
	3049		basenode=self.node(baserev),
	3050		flags=self.flags(rev) \| extraflags,
	3051		baserevisionsize=baserevisionsize,
	3052		revision=revision,
	3053		delta=delta)
	3054
	3055		prevrev = rev
	3056
	3057		DELTAREUSEALWAYS = 'always'
	3058		DELTAREUSESAMEREVS = 'samerevs'
	3059		DELTAREUSENEVER = 'never'
	3060
	3061		DELTAREUSEFULLADD = 'fulladd'
	3062
	3063		DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
	3064
	3065		def clone(self, tr, destrevlog, addrevisioncb=None,
	3066		deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
	3067		"""Copy this revlog to another, possibly with format changes.
	3068
	3069		The destination revlog will contain the same revisions and nodes.
	3070		However, it may not be bit-for-bit identical due to e.g. delta encoding
	3071		differences.
	3072
	3073		The ``deltareuse`` argument control how deltas from the existing revlog
	3074		are preserved in the destination revlog. The argument can have the
	3075		following values:
	3076
	3077		DELTAREUSEALWAYS
	3078		Deltas will always be reused (if possible), even if the destination
	3079		revlog would not select the same revisions for the delta. This is the
	3080		fastest mode of operation.
	3081		DELTAREUSESAMEREVS
	3082		Deltas will be reused if the destination revlog would pick the same
	3083		revisions for the delta. This mode strikes a balance between speed
	3084		and optimization.
	3085		DELTAREUSENEVER
	3086		Deltas will never be reused. This is the slowest mode of execution.
	3087		This mode can be used to recompute deltas (e.g. if the diff/delta
	3088		algorithm changes).
	3089
	3090		Delta computation can be slow, so the choice of delta reuse policy can
	3091		significantly affect run time.
	3092
	3093		The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
	3094		two extremes. Deltas will be reused if they are appropriate. But if the
	3095		delta could choose a better revision, it will do so. This means if you
	3096		are converting a non-generaldelta revlog to a generaldelta revlog,
	3097		deltas will be recomputed if the delta's parent isn't a parent of the
	3098		revision.
	3099
	3100		In addition to the delta policy, the ``deltabothparents`` argument
	3101		controls whether to compute deltas against both parents for merges.
	3102		By default, the current default is used.
	3103		"""
	3104		if deltareuse not in self.DELTAREUSEALL:
	3105		raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
	3106
	3107		if len(destrevlog):
	3108		raise ValueError(_('destination revlog is not empty'))
	3109
	3110		if getattr(self, 'filteredrevs', None):
	3111		raise ValueError(_('source revlog has filtered revisions'))
	3112		if getattr(destrevlog, 'filteredrevs', None):
	3113		raise ValueError(_('destination revlog has filtered revisions'))
	3114
	3115		# lazydeltabase controls whether to reuse a cached delta, if possible.
	3116		oldlazydeltabase = destrevlog._lazydeltabase
	3117		oldamd = destrevlog._deltabothparents
	3118
	3119		try:
	3120		if deltareuse == self.DELTAREUSEALWAYS:
	3121		destrevlog._lazydeltabase = True
	3122		elif deltareuse == self.DELTAREUSESAMEREVS:
	3123		destrevlog._lazydeltabase = False
	3124
	3125		destrevlog._deltabothparents = deltabothparents or oldamd
	3126
	3127		populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
	3128		self.DELTAREUSESAMEREVS)
	3129
	3130		deltacomputer = _deltacomputer(destrevlog)
	3131		index = self.index
	3132		for rev in self:
	3133		entry = index[rev]
	3134
	3135		# Some classes override linkrev to take filtered revs into
	3136		# account. Use raw entry from index.
	3137		flags = entry[0] & 0xffff
	3138		linkrev = entry[4]
	3139		p1 = index[entry[5]][7]
	3140		p2 = index[entry[6]][7]
	3141		node = entry[7]
	3142
	3143		# (Possibly) reuse the delta from the revlog if allowed and
	3144		# the revlog chunk is a delta.
	3145		cachedelta = None
	3146		rawtext = None
	3147		if populatecachedelta:
	3148		dp = self.deltaparent(rev)
	3149		if dp != nullrev:
	3150		cachedelta = (dp, bytes(self._chunk(rev)))
	3151
	3152		if not cachedelta:
	3153		rawtext = self.revision(rev, raw=True)
	3154
	3155
	3156		if deltareuse == self.DELTAREUSEFULLADD:
	3157		destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
	3158		cachedelta=cachedelta,
	3159		node=node, flags=flags,
	3160		deltacomputer=deltacomputer)
	3161		else:
	3162		ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
	3163		checkambig=False)
	3164		dfh = None
	3165		if not destrevlog._inline:
	3166		dfh = destrevlog.opener(destrevlog.datafile, 'a+')
	3167		try:
	3168		destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
	3169		p2, flags, cachedelta, ifh, dfh,
	3170		deltacomputer=deltacomputer)
	3171		finally:
	3172		if dfh:
	3173		dfh.close()
	3174		ifh.close()
	3175
	3176		if addrevisioncb:
	3177		addrevisioncb(self, rev, node)
	3178		finally:
	3179		destrevlog._lazydeltabase = oldlazydeltabase
	3180		destrevlog._deltabothparents = oldamd

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages