##// END OF EJS Templates
revlog: group delta computation methods under _deltacomputer object...
Paul Morelle -
r35756:f90f6fd1 default
parent child Browse files
Show More
@@ -264,6 +264,155 b' class _deltainfo(object):'
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 class _deltacomputer(object):
268 def __init__(self, revlog):
269 self.revlog = revlog
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
275 """
276 revlog = self.revlog
277 curr = len(revlog)
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
293
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
302 tested.update(parents)
303 yield parents
304
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
308 yield (prev,)
309
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
312
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
316 """
317 btext = revinfo.btext
318 if btext[0] is not None:
319 return btext[0]
320
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
324 node = revinfo.node
325
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
338
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
349 raise
350 return btext[0]
351
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
363
364 return delta
365
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
379 deltabase = base
380 else:
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
387
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
390
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
394
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
397 """
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
400 p2 = revinfo.p2
401 revlog = self.revlog
402
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
413
414 return deltainfo
415
267 416 @attr.s(slots=True, frozen=True)
268 417 class _revisioninfo(object):
269 418 """Information about a revision that allows building its fulltext
@@ -1721,7 +1870,7 b' class revlog(object):'
1721 1870 self._chunkclear()
1722 1871
1723 1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1724 node=None, flags=REVIDX_DEFAULT_FLAGS):
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1725 1874 """add a revision to the log
1726 1875
1727 1876 text - the revision data to add
@@ -1733,6 +1882,8 b' class revlog(object):'
1733 1882 computed by default as hash(text, p1, p2), however subclasses might
1734 1883 use different hashing method (and override checkhash() in such case)
1735 1884 flags - the known flags to set on the revision
1885 deltacomputer - an optional _deltacomputer instance shared between
1886 multiple calls
1736 1887 """
1737 1888 if link == nullrev:
1738 1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
@@ -1761,10 +1912,11 b' class revlog(object):'
1761 1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1762 1913
1763 1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1764 flags, cachedelta=cachedelta)
1915 flags, cachedelta=cachedelta,
1916 deltacomputer=deltacomputer)
1765 1917
1766 1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1767 cachedelta=None):
1919 cachedelta=None, deltacomputer=None):
1768 1920 """add a raw revision with known flags, node and parents
1769 1921 useful when reusing a revision not stored in this revlog (ex: received
1770 1922 over wire, or read from an external bundle).
@@ -1775,7 +1927,8 b' class revlog(object):'
1775 1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1776 1928 try:
1777 1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1778 flags, cachedelta, ifh, dfh)
1930 flags, cachedelta, ifh, dfh,
1931 deltacomputer=deltacomputer)
1779 1932 finally:
1780 1933 if dfh:
1781 1934 dfh.close()
@@ -1875,154 +2028,18 b' class revlog(object):'
1875 2028
1876 2029 return True
1877 2030
1878 def _getcandidaterevs(self, p1, p2, cachedelta):
1879 """
1880 Provides revisions that present an interest to be diffed against,
1881 grouped by level of easiness.
1882 """
1883 curr = len(self)
1884 prev = curr - 1
1885 p1r, p2r = self.rev(p1), self.rev(p2)
1886
1887 # should we try to build a delta?
1888 if prev != nullrev and self.storedeltachains:
1889 tested = set()
1890 # This condition is true most of the time when processing
1891 # changegroup data into a generaldelta repo. The only time it
1892 # isn't true is if this is the first revision in a delta chain
1893 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1894 if cachedelta and self._generaldelta and self._lazydeltabase:
1895 # Assume what we received from the server is a good choice
1896 # build delta will reuse the cache
1897 yield (cachedelta[0],)
1898 tested.add(cachedelta[0])
1899
1900 if self._generaldelta:
1901 # exclude already lazy tested base if any
1902 parents = [p for p in (p1r, p2r)
1903 if p != nullrev and p not in tested]
1904 if parents and not self._aggressivemergedeltas:
1905 # Pick whichever parent is closer to us (to minimize the
1906 # chance of having to build a fulltext).
1907 parents = [max(parents)]
1908 tested.update(parents)
1909 yield parents
1910
1911 if prev not in tested:
1912 # other approach failed try against prev to hopefully save us a
1913 # fulltext.
1914 yield (prev,)
1915
1916 def _buildtext(self, revinfo, fh):
1917 """Builds a fulltext version of a revision
1918
1919 revinfo: _revisioninfo instance that contains all needed info
1920 fh: file handle to either the .i or the .d revlog file,
1921 depending on whether it is inlined or not
1922 """
1923 btext = revinfo.btext
1924 if btext[0] is not None:
1925 return btext[0]
1926
1927 cachedelta = revinfo.cachedelta
1928 flags = revinfo.flags
1929 node = revinfo.node
1930
1931 baserev = cachedelta[0]
1932 delta = cachedelta[1]
1933 # special case deltas which replace entire base; no need to decode
1934 # base revision. this neatly avoids censored bases, which throw when
1935 # they're decoded.
1936 hlen = struct.calcsize(">lll")
1937 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1938 len(delta) - hlen):
1939 btext[0] = delta[hlen:]
1940 else:
1941 basetext = self.revision(baserev, _df=fh, raw=True)
1942 btext[0] = mdiff.patch(basetext, delta)
1943
1944 try:
1945 res = self._processflags(btext[0], flags, 'read', raw=True)
1946 btext[0], validatehash = res
1947 if validatehash:
1948 self.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
1949 if flags & REVIDX_ISCENSORED:
1950 raise RevlogError(_('node %s is not censored') % node)
1951 except CensoredNodeError:
1952 # must pass the censored index flag to add censored revisions
1953 if not flags & REVIDX_ISCENSORED:
1954 raise
1955 return btext[0]
1956
1957 def _builddeltadiff(self, base, revinfo, fh):
1958 t = self._buildtext(revinfo, fh)
1959 if self.iscensored(base):
1960 # deltas based on a censored revision must replace the
1961 # full content in one patch, so delta works everywhere
1962 header = mdiff.replacediffheader(self.rawsize(base), len(t))
1963 delta = header + t
1964 else:
1965 ptext = self.revision(base, _df=fh, raw=True)
1966 delta = mdiff.textdiff(ptext, t)
1967
1968 return delta
1969
1970 def _builddeltainfo(self, revinfo, base, fh):
1971 # can we use the cached delta?
1972 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
1973 delta = revinfo.cachedelta[1]
1974 else:
1975 delta = self._builddeltadiff(base, revinfo, fh)
1976 header, data = self.compress(delta)
1977 deltalen = len(header) + len(data)
1978 chainbase = self.chainbase(base)
1979 offset = self.end(len(self) - 1)
1980 dist = deltalen + offset - self.start(chainbase)
1981 if self._generaldelta:
1982 deltabase = base
1983 else:
1984 deltabase = chainbase
1985 chainlen, compresseddeltalen = self._chaininfo(base)
1986 chainlen += 1
1987 compresseddeltalen += deltalen
1988 return _deltainfo(dist, deltalen, (header, data), deltabase,
1989 chainbase, chainlen, compresseddeltalen)
1990
1991 def _finddeltainfo(self, revinfo, fh):
1992 """Find an acceptable delta against a candidate revision
1993
1994 revinfo: information about the revision (instance of _revisioninfo)
1995 fh: file handle to either the .i or the .d revlog file,
1996 depending on whether it is inlined or not
1997
1998 Returns the first acceptable candidate revision, as ordered by
1999 _getcandidaterevs
2000 """
2001 cachedelta = revinfo.cachedelta
2002 p1 = revinfo.p1
2003 p2 = revinfo.p2
2004
2005 deltainfo = None
2006 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
2007 nominateddeltas = []
2008 for candidaterev in candidaterevs:
2009 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
2010 if self._isgooddeltainfo(candidatedelta, revinfo.textlen):
2011 nominateddeltas.append(candidatedelta)
2012 if nominateddeltas:
2013 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
2014 break
2015
2016 return deltainfo
2017
2018 2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2019 cachedelta, ifh, dfh, alwayscache=False):
2032 cachedelta, ifh, dfh, alwayscache=False,
2033 deltacomputer=None):
2020 2034 """internal function to add revisions to the log
2021 2035
2022 2036 see addrevision for argument descriptions.
2023 2037
2024 2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2025 2039
2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2041 be used.
2042
2026 2043 invariants:
2027 2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2028 2045 if both are set, they must correspond to each other.
@@ -2054,8 +2071,11 b' class revlog(object):'
2054 2071 else:
2055 2072 textlen = len(rawtext)
2056 2073
2074 if deltacomputer is None:
2075 deltacomputer = _deltacomputer(self)
2076
2057 2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2058 deltainfo = self._finddeltainfo(revinfo, fh)
2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2059 2079
2060 2080 if deltainfo is not None:
2061 2081 base = deltainfo.base
@@ -2063,7 +2083,7 b' class revlog(object):'
2063 2083 data = deltainfo.data
2064 2084 l = deltainfo.deltalen
2065 2085 else:
2066 rawtext = self._buildtext(revinfo, fh)
2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2067 2087 data = self.compress(rawtext)
2068 2088 l = len(data[1]) + len(data[0])
2069 2089 base = chainbase = curr
@@ -2077,7 +2097,7 b' class revlog(object):'
2077 2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2078 2098
2079 2099 if alwayscache and rawtext is None:
2080 rawtext = self._buildtext(revinfo, fh)
2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2081 2101
2082 2102 if type(rawtext) == str: # only accept immutable objects
2083 2103 self._cache = (node, curr, rawtext)
@@ -2147,6 +2167,7 b' class revlog(object):'
2147 2167 dfh.flush()
2148 2168 ifh.flush()
2149 2169 try:
2170 deltacomputer = _deltacomputer(self)
2150 2171 # loop through our set of deltas
2151 2172 for data in deltas:
2152 2173 node, p1, p2, linknode, deltabase, delta, flags = data
@@ -2193,7 +2214,8 b' class revlog(object):'
2193 2214 self._addrevision(node, None, transaction, link,
2194 2215 p1, p2, flags, (baserev, delta),
2195 2216 ifh, dfh,
2196 alwayscache=bool(addrevisioncb))
2217 alwayscache=bool(addrevisioncb),
2218 deltacomputer=deltacomputer)
2197 2219
2198 2220 if addrevisioncb:
2199 2221 addrevisioncb(self, node)
@@ -2416,6 +2438,7 b' class revlog(object):'
2416 2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2417 2439 self.DELTAREUSESAMEREVS)
2418 2440
2441 deltacomputer = _deltacomputer(destrevlog)
2419 2442 index = self.index
2420 2443 for rev in self:
2421 2444 entry = index[rev]
@@ -2444,7 +2467,8 b' class revlog(object):'
2444 2467 if deltareuse == self.DELTAREUSEFULLADD:
2445 2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2446 2469 cachedelta=cachedelta,
2447 node=node, flags=flags)
2470 node=node, flags=flags,
2471 deltacomputer=deltacomputer)
2448 2472 else:
2449 2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2450 2474 checkambig=False)
@@ -2453,7 +2477,8 b' class revlog(object):'
2453 2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2454 2478 try:
2455 2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2456 p2, flags, cachedelta, ifh, dfh)
2480 p2, flags, cachedelta, ifh, dfh,
2481 deltacomputer=deltacomputer)
2457 2482 finally:
2458 2483 if dfh:
2459 2484 dfh.close()
General Comments 0
You need to be logged in to leave comments. Login now