##// END OF EJS Templates
nodemap: write nodemap data on disk...
nodemap: write nodemap data on disk Let us start writing data on disk (so that we can read it from there later). This series of changeset is going to focus first on having data on disk and updating it. Right now the data is written right next to the revlog data, in the store. We might move it to cache (with proper cache validation mechanism) later, but for now revlog have a storevfs instance and it is simpler to us it. The right location for this data is not the focus of this series. Differential Revision: https://phab.mercurial-scm.org/D7835

File last commit:

r44512:4ebd162f default
r44789:5962fd0d default
Show More
storageutil.py
512 lines | 15.1 KiB | text/x-python | PythonLexer
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 # storageutil.py - Storage functionality agnostic of backend implementation.
#
# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 import re
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 import struct
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 from ..i18n import _
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 from ..node import (
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 bin,
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 nullid,
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040 nullrev,
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 )
Gregory Szorc
storageutil: extract revision number iteration...
r39917 from .. import (
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 dagop,
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 error,
Gregory Szorc
storageutil: make all callables optional...
r40045 mdiff,
Gregory Szorc
storageutil: extract revision number iteration...
r39917 pycompat,
)
Pulkit Goyal
interfaces: create a new folder for interfaces and move repository.py in it...
r43078 from ..interfaces import repository
Augie Fackler
core: migrate uses of hashlib.sha1 to hashutil.sha1...
r44517 from ..utils import hashutil
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Augie Fackler
core: migrate uses of hashlib.sha1 to hashutil.sha1...
r44517 _nullhash = hashutil.sha1(nullid)
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 def hashrevisionsha1(text, p1, p2):
"""Compute the SHA-1 for revision data and its parents.
This hash combines both the current file contents and its history
in a manner that makes it easy to distinguish nodes with the same
content in the revision graph.
"""
# As of now, if one of the parent node is null, p2 is null
if p2 == nullid:
# deep copy of a hash is faster than creating one
s = _nullhash.copy()
s.update(p1)
else:
# none of the parent nodes are nullid
if p1 < p2:
a = p1
b = p2
else:
a = p2
b = p1
Augie Fackler
core: migrate uses of hashlib.sha1 to hashutil.sha1...
r44517 s = hashutil.sha1(a)
Gregory Szorc
storageutil: new module for storage primitives (API)...
r39913 s.update(b)
s.update(text)
return s.digest()
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 METADATA_RE = re.compile(b'\x01\n')
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 def parsemeta(text):
"""Parse metadata header from revision data.
Returns a 2-tuple of (metadata, offset), where both can be None if there
is no metadata.
"""
# text can be buffer, so we can't use .startswith or .index
if text[:2] != b'\x01\n':
return None, None
s = METADATA_RE.search(text, 2).start()
mtext = text[2:s]
meta = {}
for l in mtext.splitlines():
k, v = l.split(b': ', 1)
meta[k] = v
return meta, s + 2
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move metadata parsing and packing from revlog (API)...
r39914 def packmeta(meta, text):
"""Add metadata to fulltext to produce revision text."""
keys = sorted(meta)
metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
return b'\x01\n%s\x01\n%s' % (metatext, text)
Gregory Szorc
storageutil: move _censoredtext() from revlog...
r39915
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: move _censoredtext() from revlog...
r39915 def iscensoredtext(text):
meta = parsemeta(text)[0]
return meta and b'censored' in meta
Gregory Szorc
storageutil: new function for extracting metadata-less content from text...
r39916
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: new function for extracting metadata-less content from text...
r39916 def filtermetadata(text):
"""Extract just the revision data from source text.
Returns ``text`` unless it has a metadata header, in which case we return
a new buffer without hte metadata.
"""
if not text.startswith(b'\x01\n'):
return text
offset = text.index(b'\x01\n', 2)
Augie Fackler
formatting: blacken the codebase...
r43346 return text[offset + 2 :]
Gregory Szorc
storageutil: extract revision number iteration...
r39917
Gregory Szorc
storageutil: extract copy metadata retrieval out of filelog...
r40041 def filerevisioncopied(store, node):
"""Resolve file revision copy metadata.
Returns ``False`` if the file has no copy metadata. Otherwise a
2-tuple of the source filename and node.
"""
if store.parents(node)[0] != nullid:
return False
meta = parsemeta(store.revision(node))[0]
# copy and copyrev occur in pairs. In rare cases due to old bugs,
# one can occur without the other. So ensure both are present to flag
# as a copy.
if meta and b'copy' in meta and b'copyrev' in meta:
return meta[b'copy'], bin(meta[b'copyrev'])
return False
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 def filedataequivalent(store, node, filedata):
"""Determines whether file data is equivalent to a stored node.
Returns True if the passed file data would hash to the same value
as a stored revision and False otherwise.
When a stored revision is censored, filedata must be empty to have
equivalence.
When a stored revision has copy metadata, it is ignored as part
of the compare.
"""
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
if filedata.startswith(b'\x01\n'):
revisiontext = b'\x01\n\x01\n' + filedata
else:
revisiontext = filedata
p1, p2 = store.parents(node)
computednode = hashrevisionsha1(revisiontext, p1, p2)
if computednode == node:
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return True
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
# Censored files compare against the empty file.
if store.iscensored(store.rev(node)):
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return filedata == b''
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
# Renaming a file produces a different hash, even if the data
# remains unchanged. Check if that's the case.
if store.renamed(node):
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return store.read(node) == filedata
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
Gregory Szorc
storageutil: invert logic of file data comparison...
r40043 return False
Gregory Szorc
storageutil: extract filelog.cmp() to a standalone function...
r40042
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract revision number iteration...
r39917 def iterrevs(storelen, start=0, stop=None):
"""Iterate over revision numbers in a store."""
step = 1
if stop is not None:
if start > stop:
step = -1
stop += step
if stop > storelen:
stop = storelen
else:
stop = storelen
return pycompat.xrange(start, stop, step)
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038 def fileidlookup(store, fileid, identifier):
"""Resolve the file node for a value.
``store`` is an object implementing the ``ifileindex`` interface.
``fileid`` can be:
* A 20 byte binary node.
* An integer revision number
* A 40 byte hex node.
* A bytes that can be parsed as an integer representing a revision number.
``identifier`` is used to populate ``error.LookupError`` with an identifier
for the store.
Raises ``error.LookupError`` on failure.
"""
if isinstance(fileid, int):
Gregory Szorc
storageutil: consistently raise LookupError (API)...
r40039 try:
return store.node(fileid)
except IndexError:
Augie Fackler
formatting: blacken the codebase...
r43346 raise error.LookupError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'%d' % fileid, identifier, _(b'no match found')
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
storageutil: implement file identifier resolution method (BC)...
r40038
if len(fileid) == 20:
try:
store.rev(fileid)
return fileid
except error.LookupError:
pass
if len(fileid) == 40:
try:
rawnode = bin(fileid)
store.rev(rawnode)
return rawnode
except TypeError:
pass
try:
rev = int(fileid)
if b'%d' % rev != fileid:
raise ValueError
try:
return store.node(rev)
except (IndexError, TypeError):
pass
except (ValueError, OverflowError):
pass
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.LookupError(fileid, identifier, _(b'no match found'))
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract functionality for resolving strip revisions...
r40040 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
"""Resolve information needed to strip revisions.
Finds the minimum revision number that must be stripped in order to
strip ``minlinkrev``.
Returns a 2-tuple of the minimum revision number to do that and a set
of all revision numbers that have linkrevs that would be broken
by that strip.
``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
``headrevs`` is an iterable of head revisions.
``linkrevfn`` is a callable that receives a revision and returns a linked
revision.
``parentrevsfn`` is a callable that receives a revision number and returns
an iterable of its parent revision numbers.
"""
brokenrevs = set()
strippoint = tiprev + 1
heads = {}
futurelargelinkrevs = set()
for head in headrevs:
headlinkrev = linkrevfn(head)
heads[head] = headlinkrev
if headlinkrev >= minlinkrev:
futurelargelinkrevs.add(headlinkrev)
# This algorithm involves walking down the rev graph, starting at the
# heads. Since the revs are topologically sorted according to linkrev,
# once all head linkrevs are below the minlink, we know there are
# no more revs that could have a linkrev greater than minlink.
# So we can stop walking.
while futurelargelinkrevs:
strippoint -= 1
linkrev = heads.pop(strippoint)
if linkrev < minlinkrev:
brokenrevs.add(strippoint)
else:
futurelargelinkrevs.remove(linkrev)
for p in parentrevsfn(strippoint):
if p != nullrev:
plinkrev = linkrevfn(p)
heads[p] = plinkrev
if plinkrev >= minlinkrev:
futurelargelinkrevs.add(plinkrev)
return strippoint, brokenrevs
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Augie Fackler
formatting: blacken the codebase...
r43346
def emitrevisions(
store,
nodes,
nodesorder,
resultcls,
deltaparentfn=None,
candeltafn=None,
rawsizefn=None,
revdifffn=None,
flagsfn=None,
deltamode=repository.CG_DELTAMODE_STD,
revisiondata=False,
assumehaveparentrevisions=False,
):
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 """Generic implementation of ifiledata.emitrevisions().
Emitting revision data is subtly complex. This function attempts to
encapsulate all the logic for doing so in a backend-agnostic way.
``store``
Object conforming to ``ifilestorage`` interface.
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 ``nodes``
List of revision nodes whose data to emit.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
``resultcls``
A type implementing the ``irevisiondelta`` interface that will be
constructed and returned.
Gregory Szorc
storageutil: make all callables optional...
r40045 ``deltaparentfn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returning the revision number
of a revision that the internal delta is stored against. This delta
will be preferred over computing a new arbitrary delta.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, a delta will always be computed from raw revision
data.
``candeltafn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a pair of revision numbers that returns a bool
indicating whether a delta between them can be produced.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, it is assumed that any two revisions can delta with
each other.
``rawsizefn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returning the length of the
rawdata: update callers in storageutils...
r43049 ``store.rawdata(rev)``.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
rawdata: update callers in storageutils...
r43049 If not defined, ``len(store.rawdata(rev))`` will be called.
Gregory Szorc
storageutil: make all callables optional...
r40045
``revdifffn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a pair of revision numbers that returns a delta
between them.
Gregory Szorc
storageutil: make all callables optional...
r40045 If not defined, a delta will be computed by invoking mdiff code
on ``store.revision()`` results.
Defining this function allows a precomputed or stored delta to be
used without having to compute on.
``flagsfn`` (optional)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Callable receiving a revision number and returns the integer flags
Gregory Szorc
storageutil: make all callables optional...
r40045 value for it. If not defined, flags value will be 0.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 ``deltamode``
constaint on delta to be sent:
* CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
* CG_DELTAMODE_PREV - only delta against "prev",
* CG_DELTAMODE_FULL - only issue full snapshot.
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 Whether to send fulltext revisions instead of deltas, if allowed.
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 ``nodesorder``
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 ``revisiondata``
``assumehaveparentrevisions``
"""
fnode = store.node
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 frev = store.rev
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if nodesorder == b'nodes':
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 revs = [frev(n) for n in nodes]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif nodesorder == b'linear':
Gregory Szorc
storageutil: pass nodes into emitrevisions()...
r40046 revs = set(frev(n) for n in nodes)
revs = dagop.linearize(revs, store.parentrevs)
Augie Fackler
formatting: blacken the codebase...
r43346 else: # storage and default
Boris Feld
changegroup: restore default node ordering (issue6001)...
r40484 revs = sorted(frev(n) for n in nodes)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
prevrev = None
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 prevrev = store.parentrevs(revs[0])[0]
# Set of revs available to delta against.
available = set()
for rev in revs:
if rev == nullrev:
continue
node = fnode(rev)
p1rev, p2rev = store.parentrevs(rev)
Gregory Szorc
storageutil: make all callables optional...
r40045 if deltaparentfn:
deltaparentrev = deltaparentfn(rev)
else:
deltaparentrev = nullrev
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 # Forced delta against previous mode.
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 if deltamode == repository.CG_DELTAMODE_PREV:
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = prevrev
# We're instructed to send fulltext. Honor that.
Boris Feld
changegroup: refactor emitrevision to use a `deltamode` argument...
r40456 elif deltamode == repository.CG_DELTAMODE_FULL:
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = nullrev
Boris Feld
changegroup: allow to force delta to be against p1...
r40458 # We're instructed to use p1. Honor that
elif deltamode == repository.CG_DELTAMODE_P1:
baserev = p1rev
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
# There is a delta in storage. We try to use that because it
# amounts to effectively copying data from storage and is
# therefore the fastest.
elif deltaparentrev != nullrev:
# Base revision was already emitted in this group. We can
# always safely use the delta.
if deltaparentrev in available:
baserev = deltaparentrev
# Base revision is a parent that hasn't been emitted already.
# Use it if we can assume the receiver has the parent revision.
Augie Fackler
formatting: blacken the codebase...
r43346 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = deltaparentrev
# No guarantee the receiver has the delta parent. Send delta
# against last revision (if possible), which in the common case
# should be similar enough to this revision that the delta is
# reasonable.
elif prevrev is not None:
baserev = prevrev
else:
baserev = nullrev
# Storage has a fulltext revision.
# Let's use the previous revision, which is as good a guess as any.
# There is definitely room to improve this logic.
elif prevrev is not None:
baserev = prevrev
else:
baserev = nullrev
# But we can't actually use our chosen delta base for whatever
# reason. Reset to fulltext.
Gregory Szorc
storageutil: make all callables optional...
r40045 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserev = nullrev
revision = None
delta = None
baserevisionsize = None
if revisiondata:
if store.iscensored(baserev) or store.iscensored(rev):
try:
rawdata: update callers in storageutils...
r43049 revision = store.rawdata(node)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 except error.CensoredNodeError as e:
revision = e.tombstone
if baserev != nullrev:
Gregory Szorc
storageutil: make all callables optional...
r40045 if rawsizefn:
baserevisionsize = rawsizefn(baserev)
else:
rawdata: update callers in storageutils...
r43049 baserevisionsize = len(store.rawdata(baserev))
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
Augie Fackler
formatting: blacken the codebase...
r43346 elif (
baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
):
rawdata: update callers in storageutils...
r43049 revision = store.rawdata(node)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 available.add(rev)
else:
Gregory Szorc
storageutil: make all callables optional...
r40045 if revdifffn:
delta = revdifffn(baserev, rev)
else:
Augie Fackler
formatting: blacken the codebase...
r43346 delta = mdiff.textdiff(
store.rawdata(baserev), store.rawdata(rev)
)
Gregory Szorc
storageutil: make all callables optional...
r40045
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 available.add(rev)
yield resultcls(
node=node,
p1node=fnode(p1rev),
p2node=fnode(p2rev),
basenode=fnode(baserev),
Gregory Szorc
storageutil: make all callables optional...
r40045 flags=flagsfn(rev) if flagsfn else 0,
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044 baserevisionsize=baserevisionsize,
revision=revision,
Augie Fackler
formatting: blacken the codebase...
r43346 delta=delta,
)
Gregory Szorc
storageutil: extract most of emitrevisions() to standalone function...
r40044
prevrev = rev
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 def deltaiscensored(delta, baserev, baselenfn):
"""Determine if a delta represents censored revision data.
``baserev`` is the base revision this delta is encoded against.
``baselenfn`` is a callable receiving a revision number that resolves the
length of the revision fulltext.
Returns a bool indicating if the result of the delta represents a censored
revision.
"""
# Fragile heuristic: unless new file meta keys are added alphabetically
# preceding "censored", all censored revisions are prefixed by
# "\1\ncensored:". A delta producing such a censored revision must be a
# full-replacement delta, so we inspect the first and only patch in the
# delta for this prefix.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 hlen = struct.calcsize(b">lll")
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 if len(delta) <= hlen:
return False
oldlen = baselenfn(baserev)
newlen = len(delta) - hlen
if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
return False
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 add = b"\1\ncensored:"
Gregory Szorc
storageutil: extract most of peek_censored from revlog...
r40361 addlen = len(add)
Augie Fackler
formatting: blacken the codebase...
r43346 return newlen >= addlen and delta[hlen : hlen + addlen] == add