upstream/mercurial-mirror Commit - r40529:9aeb9e2d

store: introduce _matchtrackedpath() and use it to filter store files...

Pulkit Goyal -

r40529:9aeb9e2d default

parent child

mercurial/store.py

0 +18 0

              # store.py - repository store handling for Mercurial
              #
              # Copyright 2008 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import errno
              import hashlib
              import os
              import stat
              from .i18n import _
              from . import (
                  error,
                  node,
                  policy,
                  pycompat,
                  util,
                  vfs as vfsmod,
              )
              parsers = policy.importmod(r'parsers')
+             def _matchtrackedpath(path, matcher):
+                 """parses a fncache entry and returns whether the entry is tracking a path
+                 matched by matcher or not.
+                 If matcher is None, returns True"""
+                 if matcher is None:
+                     return True
+                 path = decodedir(path)
+                 if path.startswith('data/'):
+                     return matcher(path[len('data/'):-len('.i')])
+                 elif path.startswith('meta/'):
+                     return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')] or '.')
              # This avoids a collision between a file named foo and a dir named
              # foo.i or foo.d
              def _encodedir(path):
                  '''
                  >>> _encodedir(b'data/foo.i')
                  'data/foo.i'
                  >>> _encodedir(b'data/foo.i/bla.i')
                  'data/foo.i.hg/bla.i'
                  >>> _encodedir(b'data/foo.i.hg/bla.i')
                  'data/foo.i.hg.hg/bla.i'
                  >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
                  'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
                  '''
                  return (path
                          .replace(".hg/", ".hg.hg/")
                          .replace(".i/", ".i.hg/")
                          .replace(".d/", ".d.hg/"))
              encodedir = getattr(parsers, 'encodedir', _encodedir)
              def decodedir(path):
                  '''
                  >>> decodedir(b'data/foo.i')
                  'data/foo.i'
                  >>> decodedir(b'data/foo.i.hg/bla.i')
                  'data/foo.i/bla.i'
                  >>> decodedir(b'data/foo.i.hg.hg/bla.i')
                  'data/foo.i.hg/bla.i'
                  '''
                  if ".hg/" not in path:
                      return path
                  return (path
                          .replace(".d.hg/", ".d/")
                          .replace(".i.hg/", ".i/")
                          .replace(".hg.hg/", ".hg/"))
              def _reserved():
                  ''' characters that are problematic for filesystems
                  * ascii escapes (0..31)
                  * ascii hi (126..255)
                  * windows specials
                  these characters will be escaped by encodefunctions
                  '''
                  winreserved = [ord(x) for x in u'\\:*?"<>|']
                  for x in range(32):
                      yield x
                  for x in range(126, 256):
                      yield x
                  for x in winreserved:
                      yield x
              def _buildencodefun():
                  '''
                  >>> enc, dec = _buildencodefun()
                  >>> enc(b'nothing/special.txt')
                  'nothing/special.txt'
                  >>> dec(b'nothing/special.txt')
                  'nothing/special.txt'
                  >>> enc(b'HELLO')
                  '_h_e_l_l_o'
                  >>> dec(b'_h_e_l_l_o')
                  'HELLO'
                  >>> enc(b'hello:world?')
                  'hello~3aworld~3f'
                  >>> dec(b'hello~3aworld~3f')
                  'hello:world?'
                  >>> enc(b'the\\x07quick\\xADshot')
                  'the~07quick~adshot'
                  >>> dec(b'the~07quick~adshot')
                  'the\\x07quick\\xadshot'
                  '''
                  e = '_'
                  xchr = pycompat.bytechr
                  asciistr = list(map(xchr, range(127)))
                  capitals = list(range(ord("A"), ord("Z") + 1))
                  cmap = dict((x, x) for x in asciistr)
                  for x in _reserved():
                      cmap[xchr(x)] = "~%02x" % x
                  for x in capitals + [ord(e)]:
                      cmap[xchr(x)] = e + xchr(x).lower()
                  dmap = {}
                  for k, v in cmap.iteritems():
                      dmap[v] = k
                  def decode(s):
                      i = 0
                      while i < len(s):
                          for l in pycompat.xrange(1, 4):
                              try:
                                  yield dmap[s[i:i + l]]
                                  i += l
                                  break
                              except KeyError:
                                  pass
                          else:
                              raise KeyError
                  return (lambda s: ''.join([cmap[s[c:c + 1]]
                                             for c in pycompat.xrange(len(s))]),
                          lambda s: ''.join(list(decode(s))))
              _encodefname, _decodefname = _buildencodefun()
              def encodefilename(s):
                  '''
                  >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
                  'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
                  '''
                  return _encodefname(encodedir(s))
              def decodefilename(s):
                  '''
                  >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
                  'foo.i/bar.d/bla.hg/hi:world?/HELLO'
                  '''
                  return decodedir(_decodefname(s))
              def _buildlowerencodefun():
                  '''
                  >>> f = _buildlowerencodefun()
                  >>> f(b'nothing/special.txt')
                  'nothing/special.txt'
                  >>> f(b'HELLO')
                  'hello'
                  >>> f(b'hello:world?')
                  'hello~3aworld~3f'
                  >>> f(b'the\\x07quick\\xADshot')
                  'the~07quick~adshot'
                  '''
                  xchr = pycompat.bytechr
                  cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
                  for x in _reserved():
                      cmap[xchr(x)] = "~%02x" % x
                  for x in range(ord("A"), ord("Z") + 1):
                      cmap[xchr(x)] = xchr(x).lower()
                  def lowerencode(s):
                      return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
                  return lowerencode
              lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
              # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
              _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
              _winres4 = ('com', 'lpt')               # length 4 (with trailing 1..9)
              def _auxencode(path, dotencode):
                  '''
                  Encodes filenames containing names reserved by Windows or which end in
                  period or space. Does not touch other single reserved characters c.
                  Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
                  Additionally encodes space or period at the beginning, if dotencode is
                  True. Parameter path is assumed to be all lowercase.
                  A segment only needs encoding if a reserved name appears as a
                  basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
                  doesn't need encoding.
                  >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
                  >>> _auxencode(s.split(b'/'), True)
                  ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
                  >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
                  >>> _auxencode(s.split(b'/'), False)
                  ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
                  >>> _auxencode([b'foo. '], True)
                  ['foo.~20']
                  >>> _auxencode([b' .foo'], True)
                  ['~20.foo']
                  '''
                  for i, n in enumerate(path):
                      if not n:
                          continue
                      if dotencode and n[0] in '. ':
                          n = "~%02x" % ord(n[0:1]) + n[1:]
                          path[i] = n
                      else:
                          l = n.find('.')
                          if l == -1:
                              l = len(n)
                          if ((l == 3 and n[:3] in _winres3) or
                              (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
                                      and n[:3] in _winres4)):
                              # encode third letter ('aux' -> 'au~78')
                              ec = "~%02x" % ord(n[2:3])
                              n = n[0:2] + ec + n[3:]
                              path[i] = n
                      if n[-1] in '. ':
                          # encode last period or space ('foo...' -> 'foo..~2e')
                          path[i] = n[:-1] + "~%02x" % ord(n[-1:])
                  return path
              _maxstorepathlen = 120
              _dirprefixlen = 8
              _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
              def _hashencode(path, dotencode):
                  digest = node.hex(hashlib.sha1(path).digest())
                  le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
                  parts = _auxencode(le, dotencode)
                  basename = parts[-1]
                  _root, ext = os.path.splitext(basename)
                  sdirs = []
                  sdirslen = 0
                  for p in parts[:-1]:
                      d = p[:_dirprefixlen]
                      if d[-1] in '. ':
                          # Windows can't access dirs ending in period or space
                          d = d[:-1] + '_'
                      if sdirslen == 0:
                          t = len(d)
                      else:
                          t = sdirslen + 1 + len(d)
                          if t > _maxshortdirslen:
                              break
                      sdirs.append(d)
                      sdirslen = t
                  dirs = '/'.join(sdirs)
                  if len(dirs) > 0:
                      dirs += '/'
                  res = 'dh/' + dirs + digest + ext
                  spaceleft = _maxstorepathlen - len(res)
                  if spaceleft > 0:
                      filler = basename[:spaceleft]
                      res = 'dh/' + dirs + filler + digest + ext
                  return res
              def _hybridencode(path, dotencode):
                  '''encodes path with a length limit
                  Encodes all paths that begin with 'data/', according to the following.
                  Default encoding (reversible):
                  Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
                  characters are encoded as '~xx', where xx is the two digit hex code
                  of the character (see encodefilename).
                  Relevant path components consisting of Windows reserved filenames are
                  masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
                  Hashed encoding (not reversible):
                  If the default-encoded path is longer than _maxstorepathlen, a
                  non-reversible hybrid hashing of the path is done instead.
                  This encoding uses up to _dirprefixlen characters of all directory
                  levels of the lowerencoded path, but not more levels than can fit into
                  _maxshortdirslen.
                  Then follows the filler followed by the sha digest of the full path.
                  The filler is the beginning of the basename of the lowerencoded path
                  (the basename is everything after the last path separator). The filler
                  is as long as possible, filling in characters from the basename until
                  the encoded path has _maxstorepathlen characters (or all chars of the
                  basename have been taken).
                  The extension (e.g. '.i' or '.d') is preserved.
                  The string 'data/' at the beginning is replaced with 'dh/', if the hashed
                  encoding was used.
                  '''
                  path = encodedir(path)
                  ef = _encodefname(path).split('/')
                  res = '/'.join(_auxencode(ef, dotencode))
                  if len(res) > _maxstorepathlen:
                      res = _hashencode(path, dotencode)
                  return res
              def _pathencode(path):
                  de = encodedir(path)
                  if len(path) > _maxstorepathlen:
                      return _hashencode(de, True)
                  ef = _encodefname(de).split('/')
                  res = '/'.join(_auxencode(ef, True))
                  if len(res) > _maxstorepathlen:
                      return _hashencode(de, True)
                  return res
              _pathencode = getattr(parsers, 'pathencode', _pathencode)
              def _plainhybridencode(f):
                  return _hybridencode(f, False)
              def _calcmode(vfs):
                  try:
                      # files in .hg/ will be created using this mode
                      mode = vfs.stat().st_mode
                          # avoid some useless chmods
                      if (0o777 & ~util.umask) == (0o777 & mode):
                          mode = None
                  except OSError:
                      mode = None
                  return mode
              _data = ('narrowspec data meta 00manifest.d 00manifest.i'
                       ' 00changelog.d 00changelog.i phaseroots obsstore')
              def isrevlog(f, kind, st):
                  return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
              class basicstore(object):
                  '''base class for local repository stores'''
                  def __init__(self, path, vfstype):
                      vfs = vfstype(path)
                      self.path = vfs.base
                      self.createmode = _calcmode(vfs)
                      vfs.createmode = self.createmode
                      self.rawvfs = vfs
                      self.vfs = vfsmod.filtervfs(vfs, encodedir)
                      self.opener = self.vfs
                  def join(self, f):
                      return self.path + '/' + encodedir(f)
                  def _walk(self, relpath, recurse, filefilter=isrevlog):
                      '''yields (unencoded, encoded, size)'''
                      path = self.path
                      if relpath:
                          path += '/' + relpath
                      striplen = len(self.path) + 1
                      l = []
                      if self.rawvfs.isdir(path):
                          visit = [path]
                          readdir = self.rawvfs.readdir
                          while visit:
                              p = visit.pop()
                              for f, kind, st in readdir(p, stat=True):
                                  fp = p + '/' + f
                                  if filefilter(f, kind, st):
                                      n = util.pconvert(fp[striplen:])
                                      l.append((decodedir(n), n, st.st_size))
                                  elif kind == stat.S_IFDIR and recurse:
                                      visit.append(fp)
                      l.sort()
                      return l
                  def datafiles(self, matcher=None):
                      return self._walk('data', True) + self._walk('meta', True)
                  def topfiles(self):
                      # yield manifest before changelog
                      return reversed(self._walk('', False))
                  def walk(self, matcher=None):
                      '''yields (unencoded, encoded, size)
                      if a matcher is passed, storage files of only those tracked paths
                      are passed with matches the matcher
                      '''
                      # yield data files first
                      for x in self.datafiles(matcher):
                          yield x
                      for x in self.topfiles():
                          yield x
                  def copylist(self):
                      return ['requires'] + _data.split()
                  def write(self, tr):
                      pass
                  def invalidatecaches(self):
                      pass
                  def markremoved(self, fn):
                      pass
                  def __contains__(self, path):
                      '''Checks if the store contains path'''
                      path = "/".join(("data", path))
                      # file?
                      if self.vfs.exists(path + ".i"):
                          return True
                      # dir?
                      if not path.endswith("/"):
                          path = path + "/"
                      return self.vfs.exists(path)
              class encodedstore(basicstore):
                  def __init__(self, path, vfstype):
                      vfs = vfstype(path + '/store')
                      self.path = vfs.base
                      self.createmode = _calcmode(vfs)
                      vfs.createmode = self.createmode
                      self.rawvfs = vfs
                      self.vfs = vfsmod.filtervfs(vfs, encodefilename)
                      self.opener = self.vfs
                  def datafiles(self, matcher=None):
                      for a, b, size in super(encodedstore, self).datafiles():
+                         if not _matchtrackedpath(a, matcher):
+                             continue
                          try:
                              a = decodefilename(a)
                          except KeyError:
                              a = None
                          yield a, b, size
                  def join(self, f):
                      return self.path + '/' + encodefilename(f)
                  def copylist(self):
                      return (['requires', '00changelog.i'] +
                              ['store/' + f for f in _data.split()])
              class fncache(object):
                  # the filename used to be partially encoded
                  # hence the encodedir/decodedir dance
                  def __init__(self, vfs):
                      self.vfs = vfs
                      self.entries = None
                      self._dirty = False
                  def _load(self):
                      '''fill the entries from the fncache file'''
                      self._dirty = False
                      try:
                          fp = self.vfs('fncache', mode='rb')
                      except IOError:
                          # skip nonexistent file
                          self.entries = set()
                          return
                      self.entries = set(decodedir(fp.read()).splitlines())
                      if '' in self.entries:
                          fp.seek(0)
                          for n, line in enumerate(util.iterfile(fp)):
                              if not line.rstrip('\n'):
                                  t = _('invalid entry in fncache, line %d') % (n + 1)
                                  raise error.Abort(t)
                      fp.close()
                  def write(self, tr):
                      if self._dirty:
                          assert self.entries is not None
                          tr.addbackup('fncache')
                          fp = self.vfs('fncache', mode='wb', atomictemp=True)
                          if self.entries:
                              fp.write(encodedir('\n'.join(self.entries) + '\n'))
                          fp.close()
                          self._dirty = False
                  def add(self, fn):
                      if self.entries is None:
                          self._load()
                      if fn not in self.entries:
                          self._dirty = True
                          self.entries.add(fn)
                  def remove(self, fn):
                      if self.entries is None:
                          self._load()
                      try:
                          self.entries.remove(fn)
                          self._dirty = True
                      except KeyError:
                          pass
                  def __contains__(self, fn):
                      if self.entries is None:
                          self._load()
                      return fn in self.entries
                  def __iter__(self):
                      if self.entries is None:
                          self._load()
                      return iter(self.entries)
              class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
                  def __init__(self, vfs, fnc, encode):
                      vfsmod.proxyvfs.__init__(self, vfs)
                      self.fncache = fnc
                      self.encode = encode
                  def __call__(self, path, mode='r', *args, **kw):
                      encoded = self.encode(path)
                      if mode not in ('r', 'rb') and (path.startswith('data/') or
                                                      path.startswith('meta/')):
                          # do not trigger a fncache load when adding a file that already is
                          # known to exist.
                          notload = self.fncache.entries is None and self.vfs.exists(encoded)
                          if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
                              # when appending to an existing file, if the file has size zero,
                              # it should be considered as missing. Such zero-size files are
                              # the result of truncation when a transaction is aborted.
                              notload = False
                          if not notload:
                              self.fncache.add(path)
                      return self.vfs(encoded, mode, *args, **kw)
                  def join(self, path):
                      if path:
                          return self.vfs.join(self.encode(path))
                      else:
                          return self.vfs.join(path)
              class fncachestore(basicstore):
                  def __init__(self, path, vfstype, dotencode):
                      if dotencode:
                          encode = _pathencode
                      else:
                          encode = _plainhybridencode
                      self.encode = encode
                      vfs = vfstype(path + '/store')
                      self.path = vfs.base
                      self.pathsep = self.path + '/'
                      self.createmode = _calcmode(vfs)
                      vfs.createmode = self.createmode
                      self.rawvfs = vfs
                      fnc = fncache(vfs)
                      self.fncache = fnc
                      self.vfs = _fncachevfs(vfs, fnc, encode)
                      self.opener = self.vfs
                  def join(self, f):
                      return self.pathsep + self.encode(f)
                  def getsize(self, path):
                      return self.rawvfs.stat(path).st_size
                  def datafiles(self, matcher=None):
                      for f in sorted(self.fncache):
+                         if not _matchtrackedpath(f, matcher):
+                             continue
                          ef = self.encode(f)
                          try:
                              yield f, ef, self.getsize(ef)
                          except OSError as err:
                              if err.errno != errno.ENOENT:
                                  raise
                  def copylist(self):
                      d = ('narrowspec data meta dh fncache phaseroots obsstore'
                           ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
                      return (['requires', '00changelog.i'] +
                              ['store/' + f for f in d.split()])
                  def write(self, tr):
                      self.fncache.write(tr)
                  def invalidatecaches(self):
                      self.fncache.entries = None
                  def markremoved(self, fn):
                      self.fncache.remove(fn)
                  def _exists(self, f):
                      ef = self.encode(f)
                      try:
                          self.getsize(ef)
                          return True
                      except OSError as err:
                          if err.errno != errno.ENOENT:
                              raise
                          # nonexistent entry
                          return False
                  def __contains__(self, path):
                      '''Checks if the store contains path'''
                      path = "/".join(("data", path))
                      # check for files (exact match)
                      e = path + '.i'
                      if e in self.fncache and self._exists(e):
                          return True
                      # now check for directories (prefix match)
                      if not path.endswith('/'):
                          path += '/'
                      for e in self.fncache:
                          if e.startswith(path) and self._exists(e):
                              return True
                      return False

mercurial/streamclone.py

0 0 -4

              # streamclone.py - producing and consuming streaming repository data
              #
              # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import contextlib
              import os
              import struct
              from .i18n import _
              from . import (
                  branchmap,
                  cacheutil,
                  error,
                  narrowspec,
                  phases,
                  pycompat,
                  repository,
                  store,
                  util,
              )
              def canperformstreamclone(pullop, bundle2=False):
                  """Whether it is possible to perform a streaming clone as part of pull.
                  ``bundle2`` will cause the function to consider stream clone through
                  bundle2 and only through bundle2.
                  Returns a tuple of (supported, requirements). ``supported`` is True if
                  streaming clone is supported and False otherwise. ``requirements`` is
                  a set of repo requirements from the remote, or ``None`` if stream clone
                  isn't supported.
                  """
                  repo = pullop.repo
                  remote = pullop.remote
                  bundle2supported = False
                  if pullop.canusebundle2:
                      if 'v2' in pullop.remotebundle2caps.get('stream', []):
                          bundle2supported = True
                      # else
                          # Server doesn't support bundle2 stream clone or doesn't support
                          # the versions we support. Fall back and possibly allow legacy.
                  # Ensures legacy code path uses available bundle2.
                  if bundle2supported and not bundle2:
                      return False, None
                  # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
                  elif bundle2 and not bundle2supported:
                      return False, None
                  # Streaming clone only works on empty repositories.
                  if len(repo):
                      return False, None
                  # Streaming clone only works if all data is being requested.
                  if pullop.heads:
                      return False, None
                  streamrequested = pullop.streamclonerequested
                  # If we don't have a preference, let the server decide for us. This
                  # likely only comes into play in LANs.
                  if streamrequested is None:
                      # The server can advertise whether to prefer streaming clone.
                      streamrequested = remote.capable('stream-preferred')
                  if not streamrequested:
                      return False, None
                  # In order for stream clone to work, the client has to support all the
                  # requirements advertised by the server.
                  #
                  # The server advertises its requirements via the "stream" and "streamreqs"
                  # capability. "stream" (a value-less capability) is advertised if and only
                  # if the only requirement is "revlogv1." Else, the "streamreqs" capability
                  # is advertised and contains a comma-delimited list of requirements.
                  requirements = set()
                  if remote.capable('stream'):
                      requirements.add('revlogv1')
                  else:
                      streamreqs = remote.capable('streamreqs')
                      # This is weird and shouldn't happen with modern servers.
                      if not streamreqs:
                          pullop.repo.ui.warn(_(
                              'warning: stream clone requested but server has them '
                              'disabled\n'))
                          return False, None
                      streamreqs = set(streamreqs.split(','))
                      # Server requires something we don't support. Bail.
                      missingreqs = streamreqs - repo.supportedformats
                      if missingreqs:
                          pullop.repo.ui.warn(_(
                              'warning: stream clone requested but client is missing '
                              'requirements: %s\n') % ', '.join(sorted(missingreqs)))
                          pullop.repo.ui.warn(
                              _('(see https://www.mercurial-scm.org/wiki/MissingRequirement '
                                'for more information)\n'))
                          return False, None
                      requirements = streamreqs
                  return True, requirements
              def maybeperformlegacystreamclone(pullop):
                  """Possibly perform a legacy stream clone operation.
                  Legacy stream clones are performed as part of pull but before all other
                  operations.
                  A legacy stream clone will not be performed if a bundle2 stream clone is
                  supported.
                  """
                  from . import localrepo
                  supported, requirements = canperformstreamclone(pullop)
                  if not supported:
                      return
                  repo = pullop.repo
                  remote = pullop.remote
                  # Save remote branchmap. We will use it later to speed up branchcache
                  # creation.
                  rbranchmap = None
                  if remote.capable('branchmap'):
                      with remote.commandexecutor() as e:
                          rbranchmap = e.callcommand('branchmap', {}).result()
                  repo.ui.status(_('streaming all changes\n'))
                  with remote.commandexecutor() as e:
                      fp = e.callcommand('stream_out', {}).result()
                  # TODO strictly speaking, this code should all be inside the context
                  # manager because the context manager is supposed to ensure all wire state
                  # is flushed when exiting. But the legacy peers don't do this, so it
                  # doesn't matter.
                  l = fp.readline()
                  try:
                      resp = int(l)
                  except ValueError:
                      raise error.ResponseError(
                          _('unexpected response from remote server:'), l)
                  if resp == 1:
                      raise error.Abort(_('operation forbidden by server'))
                  elif resp == 2:
                      raise error.Abort(_('locking the remote repository failed'))
                  elif resp != 0:
                      raise error.Abort(_('the server sent an unknown error code'))
                  l = fp.readline()
                  try:
                      filecount, bytecount = map(int, l.split(' ', 1))
                  except (ValueError, TypeError):
                      raise error.ResponseError(
                          _('unexpected response from remote server:'), l)
                  with repo.lock():
                      consumev1(repo, fp, filecount, bytecount)
                      # new requirements = old non-format requirements +
                      #                    new format-related remote requirements
                      # requirements from the streamed-in repository
                      repo.requirements = requirements | (
                              repo.requirements - repo.supportedformats)
                      repo.svfs.options = localrepo.resolvestorevfsoptions(
                          repo.ui, repo.requirements, repo.features)
                      repo._writerequirements()
                      if rbranchmap:
                          branchmap.replacecache(repo, rbranchmap)
                      repo.invalidate()
              def allowservergeneration(repo):
                  """Whether streaming clones are allowed from the server."""
                  if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
                      return False
                  if not repo.ui.configbool('server', 'uncompressed', untrusted=True):
                      return False
                  # The way stream clone works makes it impossible to hide secret changesets.
                  # So don't allow this by default.
                  secret = phases.hassecret(repo)
                  if secret:
                      return repo.ui.configbool('server', 'uncompressedallowsecret')
                  return True
              # This is it's own function so extensions can override it.
              def _walkstreamfiles(repo, matcher=None):
                  return repo.store.walk(matcher)
              def generatev1(repo):
                  """Emit content for version 1 of a streaming clone.
                  This returns a 3-tuple of (file count, byte size, data iterator).
                  The data iterator consists of N entries for each file being transferred.
                  Each file entry starts as a line with the file name and integer size
                  delimited by a null byte.
                  The raw file data follows. Following the raw file data is the next file
                  entry, or EOF.
                  When used on the wire protocol, an additional line indicating protocol
                  success will be prepended to the stream. This function is not responsible
                  for adding it.
                  This function will obtain a repository lock to ensure a consistent view of
                  the store is captured. It therefore may raise LockError.
                  """
                  entries = []
                  total_bytes = 0
                  # Get consistent snapshot of repo, lock during scan.
                  with repo.lock():
                      repo.ui.debug('scanning\n')
                      for name, ename, size in _walkstreamfiles(repo):
                          if size:
                              entries.append((name, size))
                              total_bytes += size
                  repo.ui.debug('%d files, %d bytes to transfer\n' %
                                (len(entries), total_bytes))
                  svfs = repo.svfs
                  debugflag = repo.ui.debugflag
                  def emitrevlogdata():
                      for name, size in entries:
                          if debugflag:
                              repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
                          # partially encode name over the wire for backwards compat
                          yield '%s\0%d\n' % (store.encodedir(name), size)
                          # auditing at this stage is both pointless (paths are already
                          # trusted by the local repo) and expensive
                          with svfs(name, 'rb', auditpath=False) as fp:
                              if size <= 65536:
                                  yield fp.read(size)
                              else:
                                  for chunk in util.filechunkiter(fp, limit=size):
                                      yield chunk
                  return len(entries), total_bytes, emitrevlogdata()
              def generatev1wireproto(repo):
                  """Emit content for version 1 of streaming clone suitable for the wire.
                  This is the data output from ``generatev1()`` with 2 header lines. The
                  first line indicates overall success. The 2nd contains the file count and
                  byte size of payload.
                  The success line contains "0" for success, "1" for stream generation not
                  allowed, and "2" for error locking the repository (possibly indicating
                  a permissions error for the server process).
                  """
                  if not allowservergeneration(repo):
                      yield '1\n'
                      return
                  try:
                      filecount, bytecount, it = generatev1(repo)
                  except error.LockError:
                      yield '2\n'
                      return
                  # Indicates successful response.
                  yield '0\n'
                  yield '%d %d\n' % (filecount, bytecount)
                  for chunk in it:
                      yield chunk
              def generatebundlev1(repo, compression='UN'):
                  """Emit content for version 1 of a stream clone bundle.
                  The first 4 bytes of the output ("HGS1") denote this as stream clone
                  bundle version 1.
                  The next 2 bytes indicate the compression type. Only "UN" is currently
                  supported.
                  The next 16 bytes are two 64-bit big endian unsigned integers indicating
                  file count and byte count, respectively.
                  The next 2 bytes is a 16-bit big endian unsigned short declaring the length
                  of the requirements string, including a trailing \0. The following N bytes
                  are the requirements string, which is ASCII containing a comma-delimited
                  list of repo requirements that are needed to support the data.
                  The remaining content is the output of ``generatev1()`` (which may be
                  compressed in the future).
                  Returns a tuple of (requirements, data generator).
                  """
                  if compression != 'UN':
                      raise ValueError('we do not support the compression argument yet')
                  requirements = repo.requirements & repo.supportedformats
                  requires = ','.join(sorted(requirements))
                  def gen():
                      yield 'HGS1'
                      yield compression
                      filecount, bytecount, it = generatev1(repo)
                      repo.ui.status(_('writing %d bytes for %d files\n') %
                                       (bytecount, filecount))
                      yield struct.pack('>QQ', filecount, bytecount)
                      yield struct.pack('>H', len(requires) + 1)
                      yield requires + '\0'
                      # This is where we'll add compression in the future.
                      assert compression == 'UN'
                      progress = repo.ui.makeprogress(_('bundle'), total=bytecount,
                                                      unit=_('bytes'))
                      progress.update(0)
                      for chunk in it:
                          progress.increment(step=len(chunk))
                          yield chunk
                      progress.complete()
                  return requirements, gen()
              def consumev1(repo, fp, filecount, bytecount):
                  """Apply the contents from version 1 of a streaming clone file handle.
                  This takes the output from "stream_out" and applies it to the specified
                  repository.
                  Like "stream_out," the status line added by the wire protocol is not
                  handled by this function.
                  """
                  with repo.lock():
                      repo.ui.status(_('%d files to transfer, %s of data\n') %
                                     (filecount, util.bytecount(bytecount)))
                      progress = repo.ui.makeprogress(_('clone'), total=bytecount,
                                                      unit=_('bytes'))
                      progress.update(0)
                      start = util.timer()
                      # TODO: get rid of (potential) inconsistency
                      #
                      # If transaction is started and any @filecache property is
                      # changed at this point, it causes inconsistency between
                      # in-memory cached property and streamclone-ed file on the
                      # disk. Nested transaction prevents transaction scope "clone"
                      # below from writing in-memory changes out at the end of it,
                      # even though in-memory changes are discarded at the end of it
                      # regardless of transaction nesting.
                      #
                      # But transaction nesting can't be simply prohibited, because
                      # nesting occurs also in ordinary case (e.g. enabling
                      # clonebundles).
                      with repo.transaction('clone'):
                          with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
                              for i in pycompat.xrange(filecount):
                                  # XXX doesn't support '\n' or '\r' in filenames
                                  l = fp.readline()
                                  try:
                                      name, size = l.split('\0', 1)
                                      size = int(size)
                                  except (ValueError, TypeError):
                                      raise error.ResponseError(
                                          _('unexpected response from remote server:'), l)
                                  if repo.ui.debugflag:
                                      repo.ui.debug('adding %s (%s)\n' %
                                                    (name, util.bytecount(size)))
                                  # for backwards compat, name was partially encoded
                                  path = store.decodedir(name)
                                  with repo.svfs(path, 'w', backgroundclose=True) as ofp:
                                      for chunk in util.filechunkiter(fp, limit=size):
                                          progress.increment(step=len(chunk))
                                          ofp.write(chunk)
                          # force @filecache properties to be reloaded from
                          # streamclone-ed file at next access
                          repo.invalidate(clearfilecache=True)
                      elapsed = util.timer() - start
                      if elapsed <= 0:
                          elapsed = 0.001
                      progress.complete()
                      repo.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
                                     (util.bytecount(bytecount), elapsed,
                                      util.bytecount(bytecount / elapsed)))
              def readbundle1header(fp):
                  compression = fp.read(2)
                  if compression != 'UN':
                      raise error.Abort(_('only uncompressed stream clone bundles are '
                          'supported; got %s') % compression)
                  filecount, bytecount = struct.unpack('>QQ', fp.read(16))
                  requireslen = struct.unpack('>H', fp.read(2))[0]
                  requires = fp.read(requireslen)
                  if not requires.endswith('\0'):
                      raise error.Abort(_('malformed stream clone bundle: '
                                          'requirements not properly encoded'))
                  requirements = set(requires.rstrip('\0').split(','))
                  return filecount, bytecount, requirements
              def applybundlev1(repo, fp):
                  """Apply the content from a stream clone bundle version 1.
                  We assume the 4 byte header has been read and validated and the file handle
                  is at the 2 byte compression identifier.
                  """
                  if len(repo):
                      raise error.Abort(_('cannot apply stream clone bundle on non-empty '
                                          'repo'))
                  filecount, bytecount, requirements = readbundle1header(fp)
                  missingreqs = requirements - repo.supportedformats
                  if missingreqs:
                      raise error.Abort(_('unable to apply stream clone: '
                                          'unsupported format: %s') %
                                          ', '.join(sorted(missingreqs)))
                  consumev1(repo, fp, filecount, bytecount)
              class streamcloneapplier(object):
                  """Class to manage applying streaming clone bundles.
                  We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
                  readers to perform bundle type-specific functionality.
                  """
                  def __init__(self, fh):
                      self._fh = fh
                  def apply(self, repo):
                      return applybundlev1(repo, self._fh)
              # type of file to stream
              _fileappend = 0 # append only file
              _filefull = 1   # full snapshot file
              # Source of the file
              _srcstore = 's' # store (svfs)
              _srccache = 'c' # cache (cache)
              # This is it's own function so extensions can override it.
              def _walkstreamfullstorefiles(repo):
                  """list snapshot file from the store"""
                  fnames = []
                  if not repo.publishing():
                      fnames.append('phaseroots')
                  return fnames
              def _filterfull(entry, copy, vfsmap):
                  """actually copy the snapshot files"""
                  src, name, ftype, data = entry
                  if ftype != _filefull:
                      return entry
                  return (src, name, ftype, copy(vfsmap[src].join(name)))
              @contextlib.contextmanager
              def maketempcopies():
                  """return a function to temporary copy file"""
                  files = []
                  try:
                      def copy(src):
                          fd, dst = pycompat.mkstemp()
                          os.close(fd)
                          files.append(dst)
                          util.copyfiles(src, dst, hardlink=True)
                          return dst
                      yield copy
                  finally:
                      for tmp in files:
                          util.tryunlink(tmp)
              def _makemap(repo):
                  """make a (src -> vfs) map for the repo"""
                  vfsmap = {
                      _srcstore: repo.svfs,
                      _srccache: repo.cachevfs,
                  }
                  # we keep repo.vfs out of the on purpose, ther are too many danger there
                  # (eg: .hg/hgrc)
                  assert repo.vfs not in vfsmap.values()
                  return vfsmap
              def _emit2(repo, entries, totalfilesize):
                  """actually emit the stream bundle"""
                  vfsmap = _makemap(repo)
                  progress = repo.ui.makeprogress(_('bundle'), total=totalfilesize,
                                                  unit=_('bytes'))
                  progress.update(0)
                  with maketempcopies() as copy, progress:
                      # copy is delayed until we are in the try
                      entries = [_filterfull(e, copy, vfsmap) for e in entries]
                      yield None # this release the lock on the repository
                      seen = 0
                      for src, name, ftype, data in entries:
                          vfs = vfsmap[src]
                          yield src
                          yield util.uvarintencode(len(name))
                          if ftype == _fileappend:
                              fp = vfs(name)
                              size = data
                          elif ftype == _filefull:
                              fp = open(data, 'rb')
                              size = util.fstat(fp).st_size
                          try:
                              yield util.uvarintencode(size)
                              yield name
                              if size <= 65536:
                                  chunks = (fp.read(size),)
                              else:
                                  chunks = util.filechunkiter(fp, limit=size)
                              for chunk in chunks:
                                  seen += len(chunk)
                                  progress.update(seen)
                                  yield chunk
                          finally:
                              fp.close()
              def generatev2(repo, includes, excludes, includeobsmarkers):
                  """Emit content for version 2 of a streaming clone.
                  the data stream consists the following entries:
 ) A char representing the file destination (eg: store or cache)
 ) A varint containing the length of the filename
 ) A varint containing the length of file data
 ) N bytes containing the filename (the internal, store-agnostic form)
 ) N bytes containing the file data
                  Returns a 3-tuple of (file count, file size, data iterator).
                  """
-                 # temporarily raise error until we add storage level logic
-                 if includes or excludes:
-                     raise error.Abort(_("server does not support narrow stream clones"))
                  with repo.lock():
                      entries = []
                      totalfilesize = 0
                      matcher = None
                      if includes or excludes:
                          matcher = narrowspec.match(repo.root, includes, excludes)
                      repo.ui.debug('scanning\n')
                      for name, ename, size in _walkstreamfiles(repo, matcher):
                          if size:
                              entries.append((_srcstore, name, _fileappend, size))
                              totalfilesize += size
                      for name in _walkstreamfullstorefiles(repo):
                          if repo.svfs.exists(name):
                              totalfilesize += repo.svfs.lstat(name).st_size
                              entries.append((_srcstore, name, _filefull, None))
                      if includeobsmarkers and repo.svfs.exists('obsstore'):
                          totalfilesize += repo.svfs.lstat('obsstore').st_size
                          entries.append((_srcstore, 'obsstore', _filefull, None))
                      for name in cacheutil.cachetocopy(repo):
                          if repo.cachevfs.exists(name):
                              totalfilesize += repo.cachevfs.lstat(name).st_size
                              entries.append((_srccache, name, _filefull, None))
                      chunks = _emit2(repo, entries, totalfilesize)
                      first = next(chunks)
                      assert first is None
                  return len(entries), totalfilesize, chunks
              @contextlib.contextmanager
              def nested(*ctxs):
                  this = ctxs[0]
                  rest = ctxs[1:]
                  with this:
                      if rest:
                          with nested(*rest):
                              yield
                      else:
                          yield
              def consumev2(repo, fp, filecount, filesize):
                  """Apply the contents from a version 2 streaming clone.
                  Data is read from an object that only needs to provide a ``read(size)``
                  method.
                  """
                  with repo.lock():
                      repo.ui.status(_('%d files to transfer, %s of data\n') %
                                     (filecount, util.bytecount(filesize)))
                      start = util.timer()
                      progress = repo.ui.makeprogress(_('clone'), total=filesize,
                                                      unit=_('bytes'))
                      progress.update(0)
                      vfsmap = _makemap(repo)
                      with repo.transaction('clone'):
                          ctxs = (vfs.backgroundclosing(repo.ui)
                                  for vfs in vfsmap.values())
                          with nested(*ctxs):
                              for i in range(filecount):
                                  src = util.readexactly(fp, 1)
                                  vfs = vfsmap[src]
                                  namelen = util.uvarintdecodestream(fp)
                                  datalen = util.uvarintdecodestream(fp)
                                  name = util.readexactly(fp, namelen)
                                  if repo.ui.debugflag:
                                      repo.ui.debug('adding [%s] %s (%s)\n' %
                                                    (src, name, util.bytecount(datalen)))
                                  with vfs(name, 'w') as ofp:
                                      for chunk in util.filechunkiter(fp, limit=datalen):
                                          progress.increment(step=len(chunk))
                                          ofp.write(chunk)
                          # force @filecache properties to be reloaded from
                          # streamclone-ed file at next access
                          repo.invalidate(clearfilecache=True)
                      elapsed = util.timer() - start
                      if elapsed <= 0:
                          elapsed = 0.001
                      repo.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
                                     (util.bytecount(progress.pos), elapsed,
                                      util.bytecount(progress.pos / elapsed)))
                      progress.complete()
              def applybundlev2(repo, fp, filecount, filesize, requirements):
                  from . import localrepo
                  missingreqs = [r for r in requirements if r not in repo.supported]
                  if missingreqs:
                      raise error.Abort(_('unable to apply stream clone: '
                                          'unsupported format: %s') %
                                        ', '.join(sorted(missingreqs)))
                  consumev2(repo, fp, filecount, filesize)
                  # new requirements = old non-format requirements +
                  #                    new format-related remote requirements
                  # requirements from the streamed-in repository
                  repo.requirements = set(requirements) | (
                          repo.requirements - repo.supportedformats)
                  repo.svfs.options = localrepo.resolvestorevfsoptions(
                      repo.ui, repo.requirements, repo.features)
                  repo._writerequirements()

tests/test-narrow-clone-stream.t

0 +51 -4

+             #testcases tree flat
              Tests narrow stream clones
                $ . "$TESTDIR/narrow-library.sh"
+             #if tree
+               $ cat << EOF >> $HGRCPATH
+               > [experimental]
+               > treemanifest = 1
+               > EOF
+             #endif
              Server setup
                $ hg init master
                $ cd master
                $ mkdir dir
                $ mkdir dir/src
                $ cd dir/src
                $ for x in `$TESTDIR/seq.py 20`; do echo $x > "f$x"; hg add "f$x"; hg commit -m "Commit src $x"; done
                $ cd ..
                $ mkdir tests
                $ cd tests
                $ for x in `$TESTDIR/seq.py 20`; do echo $x > "f$x"; hg add "f$x"; hg commit -m "Commit src $x"; done
                $ cd ../../..
              Trying to stream clone when the server does not support it
                $ hg clone --narrow ssh://user@dummy/master narrow --noupdate --include "dir/src/f10" --stream
                streaming all changes
                remote: abort: server does not support narrow stream clones
                abort: pull failed on remote
                [255]
              Enable stream clone on the server
-               $ echo "[server]" >> master/.hg/hgrc
+               $ echo "[experimental.server]" >> master/.hg/hgrc
                $ echo "stream-narrow-clones=True" >> master/.hg/hgrc
              Cloning a specific file when stream clone is supported
                $ hg clone --narrow ssh://user@dummy/master narrow --noupdate --include "dir/src/f10" --stream
                streaming all changes
-               remote: abort: server does not support narrow stream clones
-               abort: pull failed on remote
-               [255]
+               * files to transfer, * KB of data (glob)
+               transferred * KB in * seconds (* */sec) (glob)
+               $ cd narrow
+               $ ls
+               $ hg tracked
+               I path:dir/src/f10
+             Making sure we have the correct set of requirements
+               $ cat .hg/requires
+               dotencode
+               fncache
+               generaldelta
+               narrowhg-experimental
+               revlogv1
+               store
+               treemanifest (tree !)
+             Making sure store has the required files
+               $ ls .hg/store/
+changelog.i
+manifest.i
+               data
+               fncache
+               meta (tree !)
+               narrowspec
+               undo
+               undo.backupfiles
+               undo.phaseroots
+             Checking that repository has all the required data and not broken
+               $ hg verify
+               checking changesets
+               checking manifests
+               checking directory manifests (tree !)
+               crosschecking files in changesets and manifests
+               checking files
+               checked 40 changesets with 1 changes to 1 files

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages