upstream/mercurial-mirror Commit - r40042:422beffd

storageutil: extract filelog.cmp() to a standalone function...

Gregory Szorc -

r40042:422beffd default

parent child

mercurial/filelog.py

0 +1 -20

              # filelog.py - file history class for mercurial
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              from . import (
                  error,
                  repository,
                  revlog,
              )
              from .utils import (
                  interfaceutil,
                  storageutil,
              )
              @interfaceutil.implementer(repository.ifilestorage)
              class filelog(object):
                  def __init__(self, opener, path):
                      self._revlog = revlog.revlog(opener,
                                                   '/'.join(('data', path + '.i')),
                                                   censorable=True)
                      # Full name of the user visible file, relative to the repository root.
                      # Used by LFS.
                      self._revlog.filename = path
                  def __len__(self):
                      return len(self._revlog)
                  def __iter__(self):
                      return self._revlog.__iter__()
                  def revs(self, start=0, stop=None):
                      return self._revlog.revs(start=start, stop=stop)
                  def parents(self, node):
                      return self._revlog.parents(node)
                  def parentrevs(self, rev):
                      return self._revlog.parentrevs(rev)
                  def rev(self, node):
                      return self._revlog.rev(node)
                  def node(self, rev):
                      return self._revlog.node(rev)
                  def lookup(self, node):
                      return storageutil.fileidlookup(self._revlog, node,
                                                      self._revlog.indexfile)
                  def linkrev(self, rev):
                      return self._revlog.linkrev(rev)
                  def commonancestorsheads(self, node1, node2):
                      return self._revlog.commonancestorsheads(node1, node2)
                  # Used by dagop.blockdescendants().
                  def descendants(self, revs):
                      return self._revlog.descendants(revs)
                  def heads(self, start=None, stop=None):
                      return self._revlog.heads(start, stop)
                  # Used by hgweb, children extension.
                  def children(self, node):
                      return self._revlog.children(node)
                  def iscensored(self, rev):
                      return self._revlog.iscensored(rev)
                  def revision(self, node, _df=None, raw=False):
                      return self._revlog.revision(node, _df=_df, raw=raw)
                  def emitrevisions(self, nodes, nodesorder=None,
                                    revisiondata=False, assumehaveparentrevisions=False,
                                    deltaprevious=False):
                      return self._revlog.emitrevisions(
                          nodes, nodesorder=nodesorder, revisiondata=revisiondata,
                          assumehaveparentrevisions=assumehaveparentrevisions,
                          deltaprevious=deltaprevious)
                  def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
                                  node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
                                  cachedelta=None):
                      return self._revlog.addrevision(revisiondata, transaction, linkrev,
                                                  p1, p2, node=node, flags=flags,
                                                  cachedelta=cachedelta)
                  def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                      return self._revlog.addgroup(deltas, linkmapper, transaction,
                                               addrevisioncb=addrevisioncb)
                  def getstrippoint(self, minlink):
                      return self._revlog.getstrippoint(minlink)
                  def strip(self, minlink, transaction):
                      return self._revlog.strip(minlink, transaction)
                  def censorrevision(self, tr, node, tombstone=b''):
                      return self._revlog.censorrevision(node, tombstone=tombstone)
                  def files(self):
                      return self._revlog.files()
                  def read(self, node):
                      return storageutil.filtermetadata(self.revision(node))
                  def add(self, text, meta, transaction, link, p1=None, p2=None):
                      if meta or text.startswith('\1\n'):
                          text = storageutil.packmeta(meta, text)
                      return self.addrevision(text, transaction, link, p1, p2)
                  def renamed(self, node):
                      return storageutil.filerevisioncopied(self, node)
                  def size(self, rev):
                      """return the size of a given revision"""
                      # for revisions with renames, we have to go the slow way
                      node = self.node(rev)
                      if self.renamed(node):
                          return len(self.read(node))
                      if self.iscensored(rev):
                          return 0
                      # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
                      return self._revlog.size(rev)
                  def cmp(self, node, text):
                      """compare text with a given file revision
                      returns True if text is different than what is stored.
                      """
-                     t = text
-                     if text.startswith('\1\n'):
-                         t = '\1\n\1\n' + text
-                     samehashes = not self._revlog.cmp(node, t)
-                     if samehashes:
-                         return False
-                     # censored files compare against the empty file
-                     if self.iscensored(self.rev(node)):
-                         return text != ''
-                     # renaming a file produces a different hash, even if the data
-                     # remains unchanged. Check if it's the case (slow):
-                     if self.renamed(node):
-                         t2 = self.read(node)
-                         return t2 != text
-                     return True
+                     return storageutil.filerevisiondifferent(self, node, text)
                  def verifyintegrity(self, state):
                      return self._revlog.verifyintegrity(state)
                  def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                                  revisionscount=False, trackedsize=False,
                                  storedsize=False):
                      return self._revlog.storageinfo(
                          exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
                          revisionscount=revisionscount, trackedsize=trackedsize,
                          storedsize=storedsize)
                  # TODO these aren't part of the interface and aren't internal methods.
                  # Callers should be fixed to not use them.
                  # Used by bundlefilelog, unionfilelog.
                  @property
                  def indexfile(self):
                      return self._revlog.indexfile
                  @indexfile.setter
                  def indexfile(self, value):
                      self._revlog.indexfile = value
                  # Used by repo upgrade.
                  def clone(self, tr, destrevlog, **kwargs):
                      if not isinstance(destrevlog, filelog):
                          raise error.ProgrammingError('expected filelog to clone()')
                      return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
              class narrowfilelog(filelog):
                  """Filelog variation to be used with narrow stores."""
                  def __init__(self, opener, path, narrowmatch):
                      super(narrowfilelog, self).__init__(opener, path)
                      self._narrowmatch = narrowmatch
                  def renamed(self, node):
                      res = super(narrowfilelog, self).renamed(node)
                      # Renames that come from outside the narrowspec are problematic
                      # because we may lack the base text for the rename. This can result
                      # in code attempting to walk the ancestry or compute a diff
                      # encountering a missing revision. We address this by silently
                      # removing rename metadata if the source file is outside the
                      # narrow spec.
                      #
                      # A better solution would be to see if the base revision is available,
                      # rather than assuming it isn't.
                      #
                      # An even better solution would be to teach all consumers of rename
                      # metadata that the base revision may not be available.
                      #
                      # TODO consider better ways of doing this.
                      if res and not self._narrowmatch(res[0]):
                          return None
                      return res
                  def size(self, rev):
                      # Because we have a custom renamed() that may lie, we need to call
                      # the base renamed() to report accurate results.
                      node = self.node(rev)
                      if super(narrowfilelog, self).renamed(node):
                          return len(self.read(node))
                      else:
                          return super(narrowfilelog, self).size(rev)
                  def cmp(self, node, text):
                      different = super(narrowfilelog, self).cmp(node, text)
                      # Because renamed() may lie, we may get false positives for
                      # different content. Check for this by comparing against the original
                      # renamed() implementation.
                      if different:
                          if super(narrowfilelog, self).renamed(node):
                              t2 = self.read(node)
                              return t2 != text
                      return different

mercurial/utils/storageutil.py

0 +26 0

              # storageutil.py - Storage functionality agnostic of backend implementation.
              #
              # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import hashlib
              import re
              from ..i18n import _
              from ..node import (
                  bin,
                  nullid,
                  nullrev,
              )
              from .. import (
                  error,
                  pycompat,
              )
              _nullhash = hashlib.sha1(nullid)
              def hashrevisionsha1(text, p1, p2):
                  """Compute the SHA-1 for revision data and its parents.
                  This hash combines both the current file contents and its history
                  in a manner that makes it easy to distinguish nodes with the same
                  content in the revision graph.
                  """
                  # As of now, if one of the parent node is null, p2 is null
                  if p2 == nullid:
                      # deep copy of a hash is faster than creating one
                      s = _nullhash.copy()
                      s.update(p1)
                  else:
                      # none of the parent nodes are nullid
                      if p1 < p2:
                          a = p1
                          b = p2
                      else:
                          a = p2
                          b = p1
                      s = hashlib.sha1(a)
                      s.update(b)
                  s.update(text)
                  return s.digest()
              METADATA_RE = re.compile(b'\x01\n')
              def parsemeta(text):
                  """Parse metadata header from revision data.
                  Returns a 2-tuple of (metadata, offset), where both can be None if there
                  is no metadata.
                  """
                  # text can be buffer, so we can't use .startswith or .index
                  if text[:2] != b'\x01\n':
                      return None, None
                  s = METADATA_RE.search(text, 2).start()
                  mtext = text[2:s]
                  meta = {}
                  for l in mtext.splitlines():
                      k, v = l.split(b': ', 1)
                      meta[k] = v
                  return meta, s + 2
              def packmeta(meta, text):
                  """Add metadata to fulltext to produce revision text."""
                  keys = sorted(meta)
                  metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
                  return b'\x01\n%s\x01\n%s' % (metatext, text)
              def iscensoredtext(text):
                  meta = parsemeta(text)[0]
                  return meta and b'censored' in meta
              def filtermetadata(text):
                  """Extract just the revision data from source text.
                  Returns ``text`` unless it has a metadata header, in which case we return
                  a new buffer without hte metadata.
                  """
                  if not text.startswith(b'\x01\n'):
                      return text
                  offset = text.index(b'\x01\n', 2)
                  return text[offset + 2:]
              def filerevisioncopied(store, node):
                  """Resolve file revision copy metadata.
                  Returns ``False`` if the file has no copy metadata. Otherwise a
 -tuple of the source filename and node.
                  """
                  if store.parents(node)[0] != nullid:
                      return False
                  meta = parsemeta(store.revision(node))[0]
                  # copy and copyrev occur in pairs. In rare cases due to old bugs,
                  # one can occur without the other. So ensure both are present to flag
                  # as a copy.
                  if meta and b'copy' in meta and b'copyrev' in meta:
                      return meta[b'copy'], bin(meta[b'copyrev'])
                  return False
+             def filerevisiondifferent(store, node, filedata):
+                 """Determines whether file data is equivalent to a stored node."""
+                 if filedata.startswith(b'\x01\n'):
+                     revisiontext = b'\x01\n\x01\n' + filedata
+                 else:
+                     revisiontext = filedata
+                 p1, p2 = store.parents(node)
+                 computednode = hashrevisionsha1(revisiontext, p1, p2)
+                 if computednode == node:
+                     return False
+                 # Censored files compare against the empty file.
+                 if store.iscensored(store.rev(node)):
+                     return filedata != b''
+                 # Renaming a file produces a different hash, even if the data
+                 # remains unchanged. Check if that's the case.
+                 if store.renamed(node):
+                     return store.read(node) != filedata
+                 return True
              def iterrevs(storelen, start=0, stop=None):
                  """Iterate over revision numbers in a store."""
                  step = 1
                  if stop is not None:
                      if start > stop:
                          step = -1
                      stop += step
                      if stop > storelen:
                          stop = storelen
                  else:
                      stop = storelen
                  return pycompat.xrange(start, stop, step)
              def fileidlookup(store, fileid, identifier):
                  """Resolve the file node for a value.
                  ``store`` is an object implementing the ``ifileindex`` interface.
                  ``fileid`` can be:
                  * A 20 byte binary node.
                  * An integer revision number
                  * A 40 byte hex node.
                  * A bytes that can be parsed as an integer representing a revision number.
                  ``identifier`` is used to populate ``error.LookupError`` with an identifier
                  for the store.
                  Raises ``error.LookupError`` on failure.
                  """
                  if isinstance(fileid, int):
                      try:
                          return store.node(fileid)
                      except IndexError:
                          raise error.LookupError(fileid, identifier, _('no match found'))
                  if len(fileid) == 20:
                      try:
                          store.rev(fileid)
                          return fileid
                      except error.LookupError:
                          pass
                  if len(fileid) == 40:
                      try:
                          rawnode = bin(fileid)
                          store.rev(rawnode)
                          return rawnode
                      except TypeError:
                          pass
                  try:
                      rev = int(fileid)
                      if b'%d' % rev != fileid:
                          raise ValueError
                      try:
                          return store.node(rev)
                      except (IndexError, TypeError):
                          pass
                  except (ValueError, OverflowError):
                      pass
                  raise error.LookupError(fileid, identifier, _('no match found'))
              def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
                  """Resolve information needed to strip revisions.
                  Finds the minimum revision number that must be stripped in order to
                  strip ``minlinkrev``.
                  Returns a 2-tuple of the minimum revision number to do that and a set
                  of all revision numbers that have linkrevs that would be broken
                  by that strip.
                  ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
                  ``headrevs`` is an iterable of head revisions.
                  ``linkrevfn`` is a callable that receives a revision and returns a linked
                  revision.
                  ``parentrevsfn`` is a callable that receives a revision number and returns
                  an iterable of its parent revision numbers.
                  """
                  brokenrevs = set()
                  strippoint = tiprev + 1
                  heads = {}
                  futurelargelinkrevs = set()
                  for head in headrevs:
                      headlinkrev = linkrevfn(head)
                      heads[head] = headlinkrev
                      if headlinkrev >= minlinkrev:
                          futurelargelinkrevs.add(headlinkrev)
                  # This algorithm involves walking down the rev graph, starting at the
                  # heads. Since the revs are topologically sorted according to linkrev,
                  # once all head linkrevs are below the minlink, we know there are
                  # no more revs that could have a linkrev greater than minlink.
                  # So we can stop walking.
                  while futurelargelinkrevs:
                      strippoint -= 1
                      linkrev = heads.pop(strippoint)
                      if linkrev < minlinkrev:
                          brokenrevs.add(strippoint)
                      else:
                          futurelargelinkrevs.remove(linkrev)
                      for p in parentrevsfn(strippoint):
                          if p != nullrev:
                              plinkrev = linkrevfn(p)
                              heads[p] = plinkrev
                              if plinkrev >= minlinkrev:
                                  futurelargelinkrevs.add(plinkrev)
                  return strippoint, brokenrevs

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages