##// END OF EJS Templates
tests: enable pytype checking on `mercurial/bundlerepo.py`
tests: enable pytype checking on `mercurial/bundlerepo.py`

File last commit:

r52756:f4733654 default
r52766:0afd58c7 default
Show More
storageutil.py
648 lines | 21.1 KiB | text/x-python | PythonLexer
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 # storageutil.py - Storage functionality agnostic of backend implementation.
#
# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Harbison
typing: add `from __future__ import annotations` to most files...
r52756 from __future__ import annotations
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 import re
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 import struct
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 from ..i18n import _
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 from ..node import (
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 bin,
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040 nullrev,
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 sha1nodeconstants,
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 )
Gregory Szorc
storageutil: extract revision number iteration...
r39917 from .. import (
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 dagop,
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 error,
Gregory Szorc
storageutil: make all callables optional...
r40045 mdiff,
Gregory Szorc
storageutil: extract revision number iteration...
r39917 )
Pulkit Goyal
interfaces: create a new folder for interfaces and move repository.py in it...
r43078 from ..interfaces import repository
Raphaël Gomès
changegroupv4: add sidedata helpers...
r47449 from ..revlogutils import sidedata as sidedatamod
Augie Fackler
core: migrate uses of hashlib.sha1 to hashutil.sha1...
r44517 from ..utils import hashutil
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Raphaël Gomès
cg4: introduce protocol flag to signify the presence of sidedata...
r47843 # revision data contains extra metadata not part of the official digest
# Only used in changegroup >= v4.
CG_FLAG_SIDEDATA = 1
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 def hashrevisionsha1(text, p1, p2):
"""Compute the SHA-1 for revision data and its parents.
This hash combines both the current file contents and its history
in a manner that makes it easy to distinguish nodes with the same
content in the revision graph.
"""
# As of now, if one of the parent node is null, p2 is null
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p2 == sha1nodeconstants.nullid:
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 # deep copy of a hash is faster than creating one
s = _nullhash.copy()
s.update(p1)
else:
# none of the parent nodes are nullid
if p1 < p2:
a = p1
b = p2
else:
a = p2
b = p1
Augie Fackler
core: migrate uses of hashlib.sha1 to hashutil.sha1...
r44517 s = hashutil.sha1(a)
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 s.update(b)
s.update(text)
return s.digest()
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 METADATA_RE = re.compile(b'\x01\n')
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 def parsemeta(text):
"""Parse metadata header from revision data.
Returns a 2-tuple of (metadata, offset), where both can be None if there
is no metadata.
"""
# text can be buffer, so we can't use .startswith or .index
if text[:2] != b'\x01\n':
return None, None
s = METADATA_RE.search(text, 2).start()
mtext = text[2:s]
meta = {}
for l in mtext.splitlines():
k, v = l.split(b': ', 1)
meta[k] = v
return meta, s + 2
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 def packmeta(meta, text):
"""Add metadata to fulltext to produce revision text."""
keys = sorted(meta)
metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
return b'\x01\n%s\x01\n%s' % (metatext, text)
Gregory Szorc
storageutil: move _censoredtext() from revlog...
r39915
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move _censoredtext() from revlog...
r39915 def iscensoredtext(text):
meta = parsemeta(text)[0]
return meta and b'censored' in meta
Gregory Szorc
storageutil: new function for extracting metadata-less content from text...
r39916
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: new function for extracting metadata-less content from text...
r39916 def filtermetadata(text):
"""Extract just the revision data from source text.
Returns ``text`` unless it has a metadata header, in which case we return
a new buffer without hte metadata.
"""
if not text.startswith(b'\x01\n'):
return text
offset = text.index(b'\x01\n', 2)
Augie Fackler
formatting: blacken the codebase...
r43346 return text[offset + 2 :]
Gregory Szorc
storageutil: extract revision number iteration...
r39917
Gregory Szorc
storageutil: extract copy metadata retrieval out of filelog...
r40041 def filerevisioncopied(store, node):
"""Resolve file revision copy metadata.
Returns ``False`` if the file has no copy metadata. Otherwise a
2-tuple of the source filename and node.
"""
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if store.parents(node)[0] != sha1nodeconstants.nullid:
Simon Sapin
filelog: add a comment explaining a fast path in filerevisioncopied()...
r49377 # When creating a copy or move we set filelog parents to null,
# because contents are probably unrelated and making a delta
# would not be useful.
# Conversely, if filelog p1 is non-null we know
# there is no copy metadata.
# In the presence of merges, this reasoning becomes invalid
# if we reorder parents. See tests/test-issue6528.t.
Gregory Szorc
storageutil: extract copy metadata retrieval out of filelog...
r40041 return False
meta = parsemeta(store.revision(node))[0]
# copy and copyrev occur in pairs. In rare cases due to old bugs,
# one can occur without the other. So ensure both are present to flag
# as a copy.
if meta and b'copy' in meta and b'copyrev' in meta:
return meta[b'copy'], bin(meta[b'copyrev'])
return False
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 def filedataequivalent(store, node, filedata):
"""Determines whether file data is equivalent to a stored node.
Returns True if the passed file data would hash to the same value
as a stored revision and False otherwise.
When a stored revision is censored, filedata must be empty to have
equivalence.
When a stored revision has copy metadata, it is ignored as part
of the compare.
"""
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
if filedata.startswith(b'\x01\n'):
revisiontext = b'\x01\n\x01\n' + filedata
else:
revisiontext = filedata
p1, p2 = store.parents(node)
computednode = hashrevisionsha1(revisiontext, p1, p2)
if computednode == node:
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return True
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
# Censored files compare against the empty file.
if store.iscensored(store.rev(node)):
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return filedata == b''
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
# Renaming a file produces a different hash, even if the data
# remains unchanged. Check if that's the case.
if store.renamed(node):
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return store.read(node) == filedata
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return False
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract revision number iteration...
r39917 def iterrevs(storelen, start=0, stop=None):
"""Iterate over revision numbers in a store."""
step = 1
if stop is not None:
if start > stop:
step = -1
stop += step
if stop > storelen:
stop = storelen
else:
stop = storelen
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 return range(start, stop, step)
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 def fileidlookup(store, fileid, identifier):
"""Resolve the file node for a value.
``store`` is an object implementing the ``ifileindex`` interface.
``fileid`` can be:
Joerg Sonnenberger
storageutil: match node length with repository
r50862 * A binary node of appropiate size (e.g. 20/32 Bytes).
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 * An integer revision number
Joerg Sonnenberger
storageutil: match node length with repository
r50862 * A hex node of appropiate size (e.g. 40/64 Bytes).
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 * A bytes that can be parsed as an integer representing a revision number.
``identifier`` is used to populate ``error.LookupError`` with an identifier
for the store.
Raises ``error.LookupError`` on failure.
"""
if isinstance(fileid, int):
Gregory Szorc
storageutil: consistently raise LookupError (API)...
r40039 try:
return store.node(fileid)
except IndexError:
Augie Fackler
formatting: blacken the codebase...
r43346 raise error.LookupError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'%d' % fileid, identifier, _(b'no match found')
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038
Joerg Sonnenberger
storageutil: match node length with repository
r50862 if len(fileid) == len(store.nullid):
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 try:
store.rev(fileid)
return fileid
except error.LookupError:
pass
Joerg Sonnenberger
storageutil: match node length with repository
r50862 if len(fileid) == 2 * len(store.nullid):
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 try:
rawnode = bin(fileid)
store.rev(rawnode)
return rawnode
except TypeError:
pass
try:
rev = int(fileid)
if b'%d' % rev != fileid:
raise ValueError
try:
return store.node(rev)
except (IndexError, TypeError):
pass
except (ValueError, OverflowError):
pass
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.LookupError(fileid, identifier, _(b'no match found'))
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
"""Resolve information needed to strip revisions.
Finds the minimum revision number that must be stripped in order to
strip ``minlinkrev``.
Returns a 2-tuple of the minimum revision number to do that and a set
of all revision numbers that have linkrevs that would be broken
by that strip.
``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
``headrevs`` is an iterable of head revisions.
``linkrevfn`` is a callable that receives a revision and returns a linked
revision.
``parentrevsfn`` is a callable that receives a revision number and returns
an iterable of its parent revision numbers.
"""
brokenrevs = set()
strippoint = tiprev + 1
heads = {}
futurelargelinkrevs = set()
for head in headrevs:
headlinkrev = linkrevfn(head)
heads[head] = headlinkrev
if headlinkrev >= minlinkrev:
futurelargelinkrevs.add(headlinkrev)
# This algorithm involves walking down the rev graph, starting at the
# heads. Since the revs are topologically sorted according to linkrev,
# once all head linkrevs are below the minlink, we know there are
# no more revs that could have a linkrev greater than minlink.
# So we can stop walking.
while futurelargelinkrevs:
strippoint -= 1
linkrev = heads.pop(strippoint)
if linkrev < minlinkrev:
brokenrevs.add(strippoint)
else:
futurelargelinkrevs.remove(linkrev)
for p in parentrevsfn(strippoint):
if p != nullrev:
plinkrev = linkrevfn(p)
heads[p] = plinkrev
if plinkrev >= minlinkrev:
futurelargelinkrevs.add(plinkrev)
return strippoint, brokenrevs
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Augie Fackler
formatting: blacken the codebase...
r43346
def emitrevisions(
store,
nodes,
nodesorder,
resultcls,
deltaparentfn=None,
candeltafn=None,
rawsizefn=None,
revdifffn=None,
flagsfn=None,
deltamode=repository.CG_DELTAMODE_STD,
revisiondata=False,
assumehaveparentrevisions=False,
Raphaël Gomès
changegroupv4: add sidedata helpers...
r47449 sidedata_helpers=None,
debug: add an option to display statistic about a bundling operation...
r50505 debug_info=None,
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 """Generic implementation of ifiledata.emitrevisions().
Emitting revision data is subtly complex. This function attempts to
encapsulate all the logic for doing so in a backend-agnostic way.
``store``
Object conforming to ``ifilestorage`` interface.
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 ``nodes``
List of revision nodes whose data to emit.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
``resultcls``
A type implementing the ``irevisiondelta`` interface that will be
constructed and returned.
Gregory Szorc
storageutil: make all callables optional...
r40045 ``deltaparentfn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returning the revision number
of a revision that the internal delta is stored against. This delta
will be preferred over computing a new arbitrary delta.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, a delta will always be computed from raw revision
data.
``candeltafn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a pair of revision numbers that returns a bool
indicating whether a delta between them can be produced.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, it is assumed that any two revisions can delta with
each other.
``rawsizefn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returning the length of the
rawdata: update callers in storageutils...
r43049 ``store.rawdata(rev)``.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
rawdata: update callers in storageutils...
r43049 If not defined, ``len(store.rawdata(rev))`` will be called.
Gregory Szorc
storageutil: make all callables optional...
r40045
``revdifffn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a pair of revision numbers that returns a delta
between them.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, a delta will be computed by invoking mdiff code
on ``store.revision()`` results.
Defining this function allows a precomputed or stored delta to be
used without having to compute on.
``flagsfn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returns the integer flags
Gregory Szorc
storageutil: make all callables optional...
r40045 value for it. If not defined, flags value will be 0.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 ``deltamode``
constaint on delta to be sent:
* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
* CG_DELTAMODE_PREV - only delta against "prev",
* CG_DELTAMODE_FULL - only issue full snapshot.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Whether to send fulltext revisions instead of deltas, if allowed.
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 ``nodesorder``
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 ``revisiondata``
``assumehaveparentrevisions``
Raphaël Gomès
changegroupv4: add sidedata helpers...
r47449 ``sidedata_helpers`` (optional)
If not None, means that sidedata should be included.
Raphaël Gomès
sidedata: move documentation about sidedata helpers to sidedata module...
r47849 See `revlogutil.sidedata.get_sidedata_helpers`.
debug: add an option to display statistic about a bundling operation...
r50505
``debug_info`
An optionnal dictionnary to gather information about the bundling
process (if present, see config: debug.bundling.stats.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 """
fnode = store.node
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 frev = store.rev
emitrevision: consider ancestors revision to emit as available base...
r50685 parents = store.parentrevs
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if nodesorder == b'nodes':
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 revs = [frev(n) for n in nodes]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif nodesorder == b'linear':
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 revs = {frev(n) for n in nodes}
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 revs = dagop.linearize(revs, store.parentrevs)
Augie Fackler
formatting: blacken the codebase...
r43346 else: # storage and default
Boris Feld
changegroup: restore default node ordering (issue6001)...
r40484 revs = sorted(frev(n) for n in nodes)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
prevrev = None
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
emitrevision: consider ancestors revision to emit as available base...
r50685 prevrev = parents(revs[0])[0]
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
emitrevision: consider ancestors revision to emit as available base...
r50685 # Sets of revs available to delta against.
emitted = set()
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 available = set()
emitrevision: consider ancestors revision to emit as available base...
r50685 if assumehaveparentrevisions:
common_heads = set(p for r in revs for p in parents(r))
common_heads.difference_update(revs)
available = store.ancestors(common_heads, inclusive=True)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
emitrevision: add a small closure to check if a base is usable...
r50680 def is_usable_base(rev):
emitrevision: also check the parents in the availability closure...
r50681 """Is a delta against this revision usable over the wire"""
if rev == nullrev:
return False
emitrevision: consider ancestors revision to emit as available base...
r50685 return rev in emitted or rev in available
emitrevision: add a small closure to check if a base is usable...
r50680
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 for rev in revs:
if rev == nullrev:
continue
debug: add an option to display statistic about a bundling operation...
r50505 debug_delta_source = None
if debug_info is not None:
debug_info['revision-total'] += 1
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 node = fnode(rev)
emitrevision: consider ancestors revision to emit as available base...
r50685 p1rev, p2rev = parents(rev)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
debug_info['merge-total'] += 1
Gregory Szorc
storageutil: make all callables optional...
r40045 if deltaparentfn:
deltaparentrev = deltaparentfn(rev)
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
if deltaparentrev == nullrev:
debug_info['available-full'] += 1
else:
debug_info['available-delta'] += 1
Gregory Szorc
storageutil: make all callables optional...
r40045 else:
deltaparentrev = nullrev
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 # Forced delta against previous mode.
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 if deltamode == repository.CG_DELTAMODE_PREV:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "prev"
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = prevrev
# We're instructed to send fulltext. Honor that.
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 elif deltamode == repository.CG_DELTAMODE_FULL:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "full"
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = nullrev
Boris Feld
changegroup: allow to force delta to be against p1...
r40458 # We're instructed to use p1. Honor that
elif deltamode == repository.CG_DELTAMODE_P1:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "p1"
Boris Feld
changegroup: allow to force delta to be against p1...
r40458 baserev = p1rev
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
# There is a delta in storage. We try to use that because it
# amounts to effectively copying data from storage and is
# therefore the fastest.
emitrevision: simplify the fallback to computed delta...
r50682 elif is_usable_base(deltaparentrev):
emitrevision: simplify the fallback to computed delta...
r50564 if debug_info is not None:
debug_delta_source = "storage"
baserev = deltaparentrev
bundle: emit full snapshot as is, without doing a redelta...
r50678 elif deltaparentrev == nullrev:
if debug_info is not None:
debug_delta_source = "storage"
emitrevision: simplify the fallback to computed delta...
r50682 baserev = deltaparentrev
else:
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50565 if deltaparentrev != nullrev and debug_info is not None:
debug_info['denied-base-not-available'] += 1
emitrevision: simplify the fallback to computed delta...
r50682 # No guarantee the receiver has the delta parent, or Storage has a
# fulltext revision.
#
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50683 # We compute a delta on the fly to send over the wire.
#
# We start with a try against p1, which in the common case should
# be close to this revision content.
#
# note: we could optimize between p1 and p2 in merges cases.
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50565 elif is_usable_base(p1rev):
if debug_info is not None:
debug_delta_source = "p1"
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50683 baserev = p1rev
# if p1 was not an option, try p2
elif is_usable_base(p2rev):
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50565 if debug_info is not None:
debug_delta_source = "p2"
emitrevision: if we need to compute a delta on the fly, try p1 or p2 first...
r50683 baserev = p2rev
# Send delta against prev in despair
#
# using the closest available ancestors first might be better?
elif prevrev is not None:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "prev"
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = prevrev
else:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "full"
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = nullrev
# But we can't actually use our chosen delta base for whatever
# reason. Reset to fulltext.
debug: add an option to display statistic about a bundling operation...
r50505 if (
baserev != nullrev
and candeltafn is not None
and not candeltafn(baserev, rev)
):
if debug_info is not None:
debug_delta_source = "full"
debug_info['denied-delta-candeltafn'] += 1
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = nullrev
revision = None
delta = None
baserevisionsize = None
if revisiondata:
if store.iscensored(baserev) or store.iscensored(rev):
try:
rawdata: update callers in storageutils...
r43049 revision = store.rawdata(node)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 except error.CensoredNodeError as e:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_delta_source = "full"
debug_info['denied-delta-not-available'] += 1
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 revision = e.tombstone
if baserev != nullrev:
Gregory Szorc
storageutil: make all callables optional...
r40045 if rawsizefn:
baserevisionsize = rawsizefn(baserev)
else:
rawdata: update callers in storageutils...
r43049 baserevisionsize = len(store.rawdata(baserev))
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Augie Fackler
formatting: blacken the codebase...
r43346 elif (
baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
):
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
debug_info['computed-delta'] += 1 # close enough
debug_info['delta-full'] += 1
rawdata: update callers in storageutils...
r43049 revision = store.rawdata(node)
emitrevision: consider ancestors revision to emit as available base...
r50685 emitted.add(rev)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 else:
Gregory Szorc
storageutil: make all callables optional...
r40045 if revdifffn:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
if debug_delta_source == "full":
debug_info['computed-delta'] += 1
debug_info['delta-full'] += 1
elif debug_delta_source == "prev":
debug_info['computed-delta'] += 1
debug_info['delta-against-prev'] += 1
elif debug_delta_source == "p1":
debug_info['computed-delta'] += 1
debug_info['delta-against-p1'] += 1
elif debug_delta_source == "storage":
debug_info['reused-storage-delta'] += 1
else:
assert False, 'unreachable'
Gregory Szorc
storageutil: make all callables optional...
r40045 delta = revdifffn(baserev, rev)
else:
debug: add an option to display statistic about a bundling operation...
r50505 if debug_info is not None:
if debug_delta_source == "full":
debug_info['computed-delta'] += 1
debug_info['delta-full'] += 1
elif debug_delta_source == "prev":
debug_info['computed-delta'] += 1
debug_info['delta-against-prev'] += 1
elif debug_delta_source == "p1":
debug_info['computed-delta'] += 1
debug_info['delta-against-p1'] += 1
elif debug_delta_source == "storage":
# seem quite unlikelry to happens
debug_info['computed-delta'] += 1
debug_info['reused-storage-delta'] += 1
else:
assert False, 'unreachable'
Augie Fackler
formatting: blacken the codebase...
r43346 delta = mdiff.textdiff(
store.rawdata(baserev), store.rawdata(rev)
)
Gregory Szorc
storageutil: make all callables optional...
r40045
emitrevision: consider ancestors revision to emit as available base...
r50685 emitted.add(rev)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Raphaël Gomès
cg4: introduce protocol flag to signify the presence of sidedata...
r47843 serialized_sidedata = None
Raphaël Gomès
sidedata: enable sidedata computers to optionally rewrite flags...
r47844 sidedata_flags = (0, 0)
Raphaël Gomès
changegroupv4: add sidedata helpers...
r47449 if sidedata_helpers:
censor: do not process sidedata of censored revision while bundling...
r48132 try:
old_sidedata = store.sidedata(rev)
except error.CensoredNodeError:
# skip any potential sidedata of the censored revision
sidedata = {}
else:
sidedata, sidedata_flags = sidedatamod.run_sidedata_helpers(
store=store,
sidedata_helpers=sidedata_helpers,
sidedata=old_sidedata,
rev=rev,
)
Raphaël Gomès
cg4: introduce protocol flag to signify the presence of sidedata...
r47843 if sidedata:
serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
flags = flagsfn(rev) if flagsfn else 0
protocol_flags = 0
if serialized_sidedata:
# Advertise that sidedata exists to the other side
protocol_flags |= CG_FLAG_SIDEDATA
Raphaël Gomès
sidedata: enable sidedata computers to optionally rewrite flags...
r47844 # Computers and removers can return flags to add and/or remove
flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
Raphaël Gomès
changegroupv4: add sidedata helpers...
r47449
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 yield resultcls(
node=node,
p1node=fnode(p1rev),
p2node=fnode(p2rev),
basenode=fnode(baserev),
Raphaël Gomès
cg4: introduce protocol flag to signify the presence of sidedata...
r47843 flags=flags,
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserevisionsize=baserevisionsize,
revision=revision,
Augie Fackler
formatting: blacken the codebase...
r43346 delta=delta,
Raphaël Gomès
cg4: introduce protocol flag to signify the presence of sidedata...
r47843 sidedata=serialized_sidedata,
protocol_flags=protocol_flags,
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
prevrev = rev
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 def deltaiscensored(delta, baserev, baselenfn):
"""Determine if a delta represents censored revision data.
``baserev`` is the base revision this delta is encoded against.
``baselenfn`` is a callable receiving a revision number that resolves the
length of the revision fulltext.
Returns a bool indicating if the result of the delta represents a censored
revision.
"""
# Fragile heuristic: unless new file meta keys are added alphabetically
# preceding "censored", all censored revisions are prefixed by
# "\1\ncensored:". A delta producing such a censored revision must be a
# full-replacement delta, so we inspect the first and only patch in the
# delta for this prefix.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 hlen = struct.calcsize(b">lll")
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 if len(delta) <= hlen:
return False
oldlen = baselenfn(baserev)
newlen = len(delta) - hlen
if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
return False
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 add = b"\1\ncensored:"
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 addlen = len(add)
Augie Fackler
formatting: blacken the codebase...
r43346 return newlen >= addlen and delta[hlen : hlen + addlen] == add