upstream/mercurial-mirror Commit - r40648:3fa4183e

py3: use node.hex(h.digest()) instead of h.hexdigest()...

Pulkit Goyal -

r40648:3fa4183e default

parent child

hgext/remotefilelog/debugcommands.py

0 +3 -2

              # debugcommands.py - debug logic for remotefilelog
              #
              # Copyright 2013 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import hashlib
              import os
              import zlib
              from mercurial.node import bin, hex, nullid, short
              from mercurial.i18n import _
              from mercurial import (
                  error,
                  filelog,
+                 node as nodemod,
                  revlog,
              )
              from . import (
                  constants,
                  datapack,
                  extutil,
                  fileserverclient,
                  historypack,
                  repack,
                  shallowutil,
              )
              def debugremotefilelog(ui, path, **opts):
                  decompress = opts.get(r'decompress')
                  size, firstnode, mapping = parsefileblob(path, decompress)
                  ui.status(_("size: %s bytes\n") % (size))
                  ui.status(_("path: %s \n") % (path))
                  ui.status(_("key: %s \n") % (short(firstnode)))
                  ui.status(_("\n"))
                  ui.status(_("%12s => %12s %13s %13s %12s\n") %
                            ("node", "p1", "p2", "linknode", "copyfrom"))
                  queue = [firstnode]
                  while queue:
                      node = queue.pop(0)
                      p1, p2, linknode, copyfrom = mapping[node]
                      ui.status(_("%s => %s  %s  %s  %s\n") %
                          (short(node), short(p1), short(p2), short(linknode), copyfrom))
                      if p1 != nullid:
                          queue.append(p1)
                      if p2 != nullid:
                          queue.append(p2)
              def buildtemprevlog(repo, file):
                  # get filename key
-                 filekey = hashlib.sha1(file).hexdigest()
+                 filekey = nodemod.hex(hashlib.sha1(file).digest())
                  filedir = os.path.join(repo.path, 'store/data', filekey)
                  # sort all entries based on linkrev
                  fctxs = []
                  for filenode in os.listdir(filedir):
                      if '_old' not in filenode:
                          fctxs.append(repo.filectx(file, fileid=bin(filenode)))
                  fctxs = sorted(fctxs, key=lambda x: x.linkrev())
                  # add to revlog
                  temppath = repo.sjoin('data/temprevlog.i')
                  if os.path.exists(temppath):
                      os.remove(temppath)
                  r = filelog.filelog(repo.svfs, 'temprevlog')
                  class faket(object):
                      def add(self, a, b, c):
                          pass
                  t = faket()
                  for fctx in fctxs:
                      if fctx.node() not in repo:
                          continue
                      p = fctx.filelog().parents(fctx.filenode())
                      meta = {}
                      if fctx.renamed():
                          meta['copy'] = fctx.renamed()[0]
                          meta['copyrev'] = hex(fctx.renamed()[1])
                      r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
                  return r
              def debugindex(orig, ui, repo, file_=None, **opts):
                  """dump the contents of an index file"""
                  if (opts.get(r'changelog') or
                      opts.get(r'manifest') or
                      opts.get(r'dir') or
                      not shallowutil.isenabled(repo) or
                      not repo.shallowmatch(file_)):
                      return orig(ui, repo, file_, **opts)
                  r = buildtemprevlog(repo, file_)
                  # debugindex like normal
                  format = opts.get('format', 0)
                  if format not in (0, 1):
                      raise error.Abort(_("unknown format %d") % format)
                  generaldelta = r.version & revlog.FLAG_GENERALDELTA
                  if generaldelta:
                      basehdr = ' delta'
                  else:
                      basehdr = '  base'
                  if format == 0:
                      ui.write(("   rev    offset  length " + basehdr + " linkrev"
                                " nodeid       p1           p2\n"))
                  elif format == 1:
                      ui.write(("   rev flag   offset   length"
                                "     size " + basehdr + "   link     p1     p2"
                                "       nodeid\n"))
                  for i in r:
                      node = r.node(i)
                      if generaldelta:
                          base = r.deltaparent(i)
                      else:
                          base = r.chainbase(i)
                      if format == 0:
                          try:
                              pp = r.parents(node)
                          except Exception:
                              pp = [nullid, nullid]
                          ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
                                  i, r.start(i), r.length(i), base, r.linkrev(i),
                                  short(node), short(pp[0]), short(pp[1])))
                      elif format == 1:
                          pr = r.parentrevs(i)
                          ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % (
                                  i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
                                  base, r.linkrev(i), pr[0], pr[1], short(node)))
              def debugindexdot(orig, ui, repo, file_):
                  """dump an index DAG as a graphviz dot file"""
                  if not shallowutil.isenabled(repo):
                      return orig(ui, repo, file_)
                  r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
                  ui.write(("digraph G {\n"))
                  for i in r:
                      node = r.node(i)
                      pp = r.parents(node)
                      ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
                      if pp[1] != nullid:
                          ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
                  ui.write("}\n")
              def verifyremotefilelog(ui, path, **opts):
                  decompress = opts.get(r'decompress')
                  for root, dirs, files in os.walk(path):
                      for file in files:
                          if file == "repos":
                              continue
                          filepath = os.path.join(root, file)
                          size, firstnode, mapping = parsefileblob(filepath, decompress)
                          for p1, p2, linknode, copyfrom in mapping.itervalues():
                              if linknode == nullid:
                                  actualpath = os.path.relpath(root, path)
                                  key = fileserverclient.getcachekey("reponame", actualpath,
                                                                     file)
                                  ui.status("%s %s\n" % (key, os.path.relpath(filepath,
                                                                              path)))
              def _decompressblob(raw):
                  return zlib.decompress(raw)
              def parsefileblob(path, decompress):
                  raw = None
                  f = open(path, "r")
                  try:
                      raw = f.read()
                  finally:
                      f.close()
                  if decompress:
                      raw = _decompressblob(raw)
                  offset, size, flags = shallowutil.parsesizeflags(raw)
                  start = offset + size
                  firstnode = None
                  mapping = {}
                  while start < len(raw):
                      divider = raw.index('\0', start + 80)
                      currentnode = raw[start:(start + 20)]
                      if not firstnode:
                          firstnode = currentnode
                      p1 = raw[(start + 20):(start + 40)]
                      p2 = raw[(start + 40):(start + 60)]
                      linknode = raw[(start + 60):(start + 80)]
                      copyfrom = raw[(start + 80):divider]
                      mapping[currentnode] = (p1, p2, linknode, copyfrom)
                      start = divider + 1
                  return size, firstnode, mapping
              def debugdatapack(ui, *paths, **opts):
                  for path in paths:
                      if '.data' in path:
                          path = path[:path.index('.data')]
                      ui.write("%s:\n" % path)
                      dpack = datapack.datapack(path)
                      node = opts.get(r'node')
                      if node:
                          deltachain = dpack.getdeltachain('', bin(node))
                          dumpdeltachain(ui, deltachain, **opts)
                          return
                      if opts.get(r'long'):
                          hashformatter = hex
                          hashlen = 42
                      else:
                          hashformatter = short
                          hashlen = 14
                      lastfilename = None
                      totaldeltasize = 0
                      totalblobsize = 0
                      def printtotals():
                          if lastfilename is not None:
                              ui.write("\n")
                          if not totaldeltasize or not totalblobsize:
                              return
                          difference = totalblobsize - totaldeltasize
                          deltastr = "%0.1f%% %s" % (
                              (100.0 * abs(difference) / totalblobsize),
                              ("smaller" if difference > 0 else "bigger"))
                          ui.write(("Total:%s%s  %s (%s)\n") % (
                              "".ljust(2 * hashlen - len("Total:")),
                              str(totaldeltasize).ljust(12),
                              str(totalblobsize).ljust(9),
                              deltastr
                          ))
                      bases = {}
                      nodes = set()
                      failures = 0
                      for filename, node, deltabase, deltalen in dpack.iterentries():
                          bases[node] = deltabase
                          if node in nodes:
                              ui.write(("Bad entry: %s appears twice\n" % short(node)))
                              failures += 1
                          nodes.add(node)
                          if filename != lastfilename:
                              printtotals()
                              name = '(empty name)' if filename == '' else filename
                              ui.write("%s:\n" % name)
                              ui.write("%s%s%s%s\n" % (
                                  "Node".ljust(hashlen),
                                  "Delta Base".ljust(hashlen),
                                  "Delta Length".ljust(14),
                                  "Blob Size".ljust(9)))
                              lastfilename = filename
                              totalblobsize = 0
                              totaldeltasize = 0
                          # Metadata could be missing, in which case it will be an empty dict.
                          meta = dpack.getmeta(filename, node)
                          if constants.METAKEYSIZE in meta:
                              blobsize = meta[constants.METAKEYSIZE]
                              totaldeltasize += deltalen
                              totalblobsize += blobsize
                          else:
                              blobsize = "(missing)"
                          ui.write("%s  %s  %s%s\n" % (
                              hashformatter(node),
                              hashformatter(deltabase),
                              str(deltalen).ljust(14),
                              blobsize))
                      if filename is not None:
                          printtotals()
                      failures += _sanitycheck(ui, set(nodes), bases)
                      if failures > 1:
                          ui.warn(("%d failures\n" % failures))
                          return 1
              def _sanitycheck(ui, nodes, bases):
                  """
                  Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
                  mapping of node->base):
                  - Each deltabase must itself be a node elsewhere in the pack
                  - There must be no cycles
                  """
                  failures = 0
                  for node in nodes:
                      seen = set()
                      current = node
                      deltabase = bases[current]
                      while deltabase != nullid:
                          if deltabase not in nodes:
                              ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" %
                                      (short(node), short(deltabase))))
                              failures += 1
                              break
                          if deltabase in seen:
                              ui.warn(("Bad entry: %s has a cycle (at %s)\n" %
                                      (short(node), short(deltabase))))
                              failures += 1
                              break
                          current = deltabase
                          seen.add(current)
                          deltabase = bases[current]
                      # Since ``node`` begins a valid chain, reset/memoize its base to nullid
                      # so we don't traverse it again.
                      bases[node] = nullid
                  return failures
              def dumpdeltachain(ui, deltachain, **opts):
                  hashformatter = hex
                  hashlen = 40
                  lastfilename = None
                  for filename, node, filename, deltabasenode, delta in deltachain:
                      if filename != lastfilename:
                          ui.write("\n%s\n" % filename)
                          lastfilename = filename
                      ui.write("%s  %s  %s  %s\n" % (
                          "Node".ljust(hashlen),
                          "Delta Base".ljust(hashlen),
                          "Delta SHA1".ljust(hashlen),
                          "Delta Length".ljust(6),
                      ))
                      ui.write("%s  %s  %s  %s\n" % (
                          hashformatter(node),
                          hashformatter(deltabasenode),
-                         hashlib.sha1(delta).hexdigest(),
+                         nodemod.hex(hashlib.sha1(delta).digest()),
                          len(delta)))
              def debughistorypack(ui, path):
                  if '.hist' in path:
                      path = path[:path.index('.hist')]
                  hpack = historypack.historypack(path)
                  lastfilename = None
                  for entry in hpack.iterentries():
                      filename, node, p1node, p2node, linknode, copyfrom = entry
                      if filename != lastfilename:
                          ui.write("\n%s\n" % filename)
                          ui.write("%s%s%s%s%s\n" % (
                              "Node".ljust(14),
                              "P1 Node".ljust(14),
                              "P2 Node".ljust(14),
                              "Link Node".ljust(14),
                              "Copy From"))
                          lastfilename = filename
                      ui.write("%s  %s  %s  %s  %s\n" % (short(node), short(p1node),
                          short(p2node), short(linknode), copyfrom))
              def debugwaitonrepack(repo):
                  with extutil.flock(repack.repacklockvfs(repo).join('repacklock'), ''):
                      return
              def debugwaitonprefetch(repo):
                  with repo._lock(repo.svfs, "prefetchlock", True, None,
                                       None, _('prefetching in %s') % repo.origroot):
                      pass

hgext/remotefilelog/fileserverclient.py

0 +3 -2

              # fileserverclient.py - client for communicating with the cache process
              #
              # Copyright 2013 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import hashlib
              import io
              import os
              import threading
              import time
              import zlib
              from mercurial.i18n import _
              from mercurial.node import bin, hex, nullid
              from mercurial import (
                  error,
+                 node,
                  pycompat,
                  revlog,
                  sshpeer,
                  util,
                  wireprotov1peer,
              )
              from mercurial.utils import procutil
              from . import (
                  constants,
                  contentstore,
                  metadatastore,
              )
              _sshv1peer = sshpeer.sshv1peer
              # Statistics for debugging
              fetchcost = 0
              fetches = 0
              fetched = 0
              fetchmisses = 0
              _lfsmod = None
              _downloading = _('downloading')
              def getcachekey(reponame, file, id):
-                 pathhash = hashlib.sha1(file).hexdigest()
+                 pathhash = node.hex(hashlib.sha1(file).digest())
                  return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
              def getlocalkey(file, id):
-                 pathhash = hashlib.sha1(file).hexdigest()
+                 pathhash = node.hex(hashlib.sha1(file).digest())
                  return os.path.join(pathhash, id)
              def peersetup(ui, peer):
                  class remotefilepeer(peer.__class__):
                      @wireprotov1peer.batchable
                      def x_rfl_getfile(self, file, node):
                          if not self.capable('x_rfl_getfile'):
                              raise error.Abort(
                                  'configured remotefile server does not support getfile')
                          f = wireprotov1peer.future()
                          yield {'file': file, 'node': node}, f
                          code, data = f.value.split('\0', 1)
                          if int(code):
                              raise error.LookupError(file, node, data)
                          yield data
                      @wireprotov1peer.batchable
                      def x_rfl_getflogheads(self, path):
                          if not self.capable('x_rfl_getflogheads'):
                              raise error.Abort('configured remotefile server does not '
                                                'support getflogheads')
                          f = wireprotov1peer.future()
                          yield {'path': path}, f
                          heads = f.value.split('\n') if f.value else []
                          yield heads
                      def _updatecallstreamopts(self, command, opts):
                          if command != 'getbundle':
                              return
                          if (constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                              not in self.capabilities()):
                              return
                          if not util.safehasattr(self, '_localrepo'):
                              return
                          if (constants.SHALLOWREPO_REQUIREMENT
                              not in self._localrepo.requirements):
                              return
                          bundlecaps = opts.get('bundlecaps')
                          if bundlecaps:
                              bundlecaps = [bundlecaps]
                          else:
                              bundlecaps = []
                          # shallow, includepattern, and excludepattern are a hacky way of
                          # carrying over data from the local repo to this getbundle
                          # command. We need to do it this way because bundle1 getbundle
                          # doesn't provide any other place we can hook in to manipulate
                          # getbundle args before it goes across the wire. Once we get rid
                          # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                          # do this more cleanly.
                          bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                          if self._localrepo.includepattern:
                              patterns = '\0'.join(self._localrepo.includepattern)
                              includecap = "includepattern=" + patterns
                              bundlecaps.append(includecap)
                          if self._localrepo.excludepattern:
                              patterns = '\0'.join(self._localrepo.excludepattern)
                              excludecap = "excludepattern=" + patterns
                              bundlecaps.append(excludecap)
                          opts['bundlecaps'] = ','.join(bundlecaps)
                      def _sendrequest(self, command, args, **opts):
                          self._updatecallstreamopts(command, args)
                          return super(remotefilepeer, self)._sendrequest(command, args,
                                                                          **opts)
                      def _callstream(self, command, **opts):
                          supertype = super(remotefilepeer, self)
                          if not util.safehasattr(supertype, '_sendrequest'):
                              self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                          return super(remotefilepeer, self)._callstream(command, **opts)
                  peer.__class__ = remotefilepeer
              class cacheconnection(object):
                  """The connection for communicating with the remote cache. Performs
                  gets and sets by communicating with an external process that has the
                  cache-specific implementation.
                  """
                  def __init__(self):
                      self.pipeo = self.pipei = self.pipee = None
                      self.subprocess = None
                      self.connected = False
                  def connect(self, cachecommand):
                      if self.pipeo:
                          raise error.Abort(_("cache connection already open"))
                      self.pipei, self.pipeo, self.pipee, self.subprocess = \
                          procutil.popen4(cachecommand)
                      self.connected = True
                  def close(self):
                      def tryclose(pipe):
                          try:
                              pipe.close()
                          except Exception:
                              pass
                      if self.connected:
                          try:
                              self.pipei.write("exit\n")
                          except Exception:
                              pass
                          tryclose(self.pipei)
                          self.pipei = None
                          tryclose(self.pipeo)
                          self.pipeo = None
                          tryclose(self.pipee)
                          self.pipee = None
                          try:
                              # Wait for process to terminate, making sure to avoid deadlock.
                              # See https://docs.python.org/2/library/subprocess.html for
                              # warnings about wait() and deadlocking.
                              self.subprocess.communicate()
                          except Exception:
                              pass
                          self.subprocess = None
                      self.connected = False
                  def request(self, request, flush=True):
                      if self.connected:
                          try:
                              self.pipei.write(request)
                              if flush:
                                  self.pipei.flush()
                          except IOError:
                              self.close()
                  def receiveline(self):
                      if not self.connected:
                          return None
                      try:
                          result = self.pipeo.readline()[:-1]
                          if not result:
                              self.close()
                      except IOError:
                          self.close()
                      return result
              def _getfilesbatch(
                      remote, receivemissing, progresstick, missed, idmap, batchsize):
                  # Over http(s), iterbatch is a streamy method and we can start
                  # looking at results early. This means we send one (potentially
                  # large) request, but then we show nice progress as we process
                  # file results, rather than showing chunks of $batchsize in
                  # progress.
                  #
                  # Over ssh, iterbatch isn't streamy because batch() wasn't
                  # explicitly designed as a streaming method. In the future we
                  # should probably introduce a streambatch() method upstream and
                  # use that for this.
                  with remote.commandexecutor() as e:
                      futures = []
                      for m in missed:
                          futures.append(e.callcommand('x_rfl_getfile', {
                              'file': idmap[m],
                              'node': m[-40:]
                          }))
                      for i, m in enumerate(missed):
                          r = futures[i].result()
                          futures[i] = None  # release memory
                          file_ = idmap[m]
                          node = m[-40:]
                          receivemissing(io.BytesIO('%d\n%s' % (len(r), r)), file_, node)
                          progresstick()
              def _getfiles_optimistic(
                  remote, receivemissing, progresstick, missed, idmap, step):
                  remote._callstream("x_rfl_getfiles")
                  i = 0
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  while i < len(missed):
                      # issue a batch of requests
                      start = i
                      end = min(len(missed), start + step)
                      i = end
                      for missingid in missed[start:end]:
                          # issue new request
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = "%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                      # receive batch results
                      for missingid in missed[start:end]:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          receivemissing(pipei, file, versionid)
                          progresstick()
                  # End the command
                  pipeo.write('\n')
                  pipeo.flush()
              def _getfiles_threaded(
                  remote, receivemissing, progresstick, missed, idmap, step):
                  remote._callstream("getfiles")
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  def writer():
                      for missingid in missed:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = "%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                  writerthread = threading.Thread(target=writer)
                  writerthread.daemon = True
                  writerthread.start()
                  for missingid in missed:
                      versionid = missingid[-40:]
                      file = idmap[missingid]
                      receivemissing(pipei, file, versionid)
                      progresstick()
                  writerthread.join()
                  # End the command
                  pipeo.write('\n')
                  pipeo.flush()
              class fileserverclient(object):
                  """A client for requesting files from the remote file server.
                  """
                  def __init__(self, repo):
                      ui = repo.ui
                      self.repo = repo
                      self.ui = ui
                      self.cacheprocess = ui.config("remotefilelog", "cacheprocess")
                      if self.cacheprocess:
                          self.cacheprocess = util.expandpath(self.cacheprocess)
                      # This option causes remotefilelog to pass the full file path to the
                      # cacheprocess instead of a hashed key.
                      self.cacheprocesspasspath = ui.configbool(
                          "remotefilelog", "cacheprocess.includepath")
                      self.debugoutput = ui.configbool("remotefilelog", "debug")
                      self.remotecache = cacheconnection()
                  def setstore(self, datastore, historystore, writedata, writehistory):
                      self.datastore = datastore
                      self.historystore = historystore
                      self.writedata = writedata
                      self.writehistory = writehistory
                  def _connect(self):
                      return self.repo.connectionpool.get(self.repo.fallbackpath)
                  def request(self, fileids):
                      """Takes a list of filename/node pairs and fetches them from the
                      server. Files are stored in the local cache.
                      A list of nodes that the server couldn't find is returned.
                      If the connection fails, an exception is raised.
                      """
                      if not self.remotecache.connected:
                          self.connect()
                      cache = self.remotecache
                      writedata = self.writedata
                      repo = self.repo
                      count = len(fileids)
                      request = "get\n%d\n" % count
                      idmap = {}
                      reponame = repo.name
                      for file, id in fileids:
                          fullid = getcachekey(reponame, file, id)
                          if self.cacheprocesspasspath:
                              request += file + '\0'
                          request += fullid + "\n"
                          idmap[fullid] = file
                      cache.request(request)
                      total = count
                      self.ui.progress(_downloading, 0, total=count)
                      missed = []
                      count = 0
                      while True:
                          missingid = cache.receiveline()
                          if not missingid:
                              missedset = set(missed)
                              for missingid in idmap.iterkeys():
                                  if not missingid in missedset:
                                      missed.append(missingid)
                              self.ui.warn(_("warning: cache connection closed early - " +
                                  "falling back to server\n"))
                              break
                          if missingid == "0":
                              break
                          if missingid.startswith("_hits_"):
                              # receive progress reports
                              parts = missingid.split("_")
                              count += int(parts[2])
                              self.ui.progress(_downloading, count, total=total)
                              continue
                          missed.append(missingid)
                      global fetchmisses
                      fetchmisses += len(missed)
                      count = [total - len(missed)]
                      fromcache = count[0]
                      self.ui.progress(_downloading, count[0], total=total)
                      self.ui.log("remotefilelog", "remote cache hit rate is %r of %r\n",
                                  count[0], total, hit=count[0], total=total)
                      oldumask = os.umask(0o002)
                      try:
                          # receive cache misses from master
                          if missed:
                              def progresstick():
                                  count[0] += 1
                                  self.ui.progress(_downloading, count[0], total=total)
                              # When verbose is true, sshpeer prints 'running ssh...'
                              # to stdout, which can interfere with some command
                              # outputs
                              verbose = self.ui.verbose
                              self.ui.verbose = False
                              try:
                                  with self._connect() as conn:
                                      remote = conn.peer
                                      if remote.capable(
                                              constants.NETWORK_CAP_LEGACY_SSH_GETFILES):
                                          if not isinstance(remote, _sshv1peer):
                                              raise error.Abort('remotefilelog requires ssh '
                                                                'servers')
                                          step = self.ui.configint('remotefilelog',
                                                                   'getfilesstep')
                                          getfilestype = self.ui.config('remotefilelog',
                                                                        'getfilestype')
                                          if getfilestype == 'threaded':
                                              _getfiles = _getfiles_threaded
                                          else:
                                              _getfiles = _getfiles_optimistic
                                          _getfiles(remote, self.receivemissing, progresstick,
                                                    missed, idmap, step)
                                      elif remote.capable("x_rfl_getfile"):
                                          if remote.capable('batch'):
                                              batchdefault = 100
                                          else:
                                              batchdefault = 10
                                          batchsize = self.ui.configint(
                                              'remotefilelog', 'batchsize', batchdefault)
                                          _getfilesbatch(
                                              remote, self.receivemissing, progresstick,
                                              missed, idmap, batchsize)
                                      else:
                                          raise error.Abort("configured remotefilelog server"
                                                           " does not support remotefilelog")
                                  self.ui.log("remotefilefetchlog",
                                              "Success\n",
                                              fetched_files = count[0] - fromcache,
                                              total_to_fetch = total - fromcache)
                              except Exception:
                                  self.ui.log("remotefilefetchlog",
                                              "Fail\n",
                                              fetched_files = count[0] - fromcache,
                                              total_to_fetch = total - fromcache)
                                  raise
                              finally:
                                  self.ui.verbose = verbose
                              # send to memcache
                              count[0] = len(missed)
                              request = "set\n%d\n%s\n" % (count[0], "\n".join(missed))
                              cache.request(request)
                          self.ui.progress(_downloading, None)
                          # mark ourselves as a user of this cache
                          writedata.markrepo(self.repo.path)
                      finally:
                          os.umask(oldumask)
                  def receivemissing(self, pipe, filename, node):
                      line = pipe.readline()[:-1]
                      if not line:
                          raise error.ResponseError(_("error downloading file contents:"),
                                                    _("connection closed early"))
                      size = int(line)
                      data = pipe.read(size)
                      if len(data) != size:
                          raise error.ResponseError(_("error downloading file contents:"),
                                                    _("only received %s of %s bytes")
                                                    % (len(data), size))
                      self.writedata.addremotefilelognode(filename, bin(node),
                                                           zlib.decompress(data))
                  def connect(self):
                      if self.cacheprocess:
                          cmd = "%s %s" % (self.cacheprocess, self.writedata._path)
                          self.remotecache.connect(cmd)
                      else:
                          # If no cache process is specified, we fake one that always
                          # returns cache misses.  This enables tests to run easily
                          # and may eventually allow us to be a drop in replacement
                          # for the largefiles extension.
                          class simplecache(object):
                              def __init__(self):
                                  self.missingids = []
                                  self.connected = True
                              def close(self):
                                  pass
                              def request(self, value, flush=True):
                                  lines = value.split("\n")
                                  if lines[0] != "get":
                                      return
                                  self.missingids = lines[2:-1]
                                  self.missingids.append('0')
                              def receiveline(self):
                                  if len(self.missingids) > 0:
                                      return self.missingids.pop(0)
                                  return None
                          self.remotecache = simplecache()
                  def close(self):
                      if fetches:
                          msg = ("%s files fetched over %d fetches - " +
                                 "(%d misses, %0.2f%% hit ratio) over %0.2fs\n") % (
                                     fetched,
                                     fetches,
                                     fetchmisses,
                                     float(fetched - fetchmisses) / float(fetched) * 100.0,
                                     fetchcost)
                          if self.debugoutput:
                              self.ui.warn(msg)
                          self.ui.log("remotefilelog.prefetch", msg.replace("%", "%%"),
                              remotefilelogfetched=fetched,
                              remotefilelogfetches=fetches,
                              remotefilelogfetchmisses=fetchmisses,
                              remotefilelogfetchtime=fetchcost * 1000)
                      if self.remotecache.connected:
                          self.remotecache.close()
                  def prefetch(self, fileids, force=False, fetchdata=True,
                               fetchhistory=False):
                      """downloads the given file versions to the cache
                      """
                      repo = self.repo
                      idstocheck = []
                      for file, id in fileids:
                          # hack
                          # - we don't use .hgtags
                          # - workingctx produces ids with length 42,
                          #   which we skip since they aren't in any cache
                          if (file == '.hgtags' or len(id) == 42
                              or not repo.shallowmatch(file)):
                              continue
                          idstocheck.append((file, bin(id)))
                      datastore = self.datastore
                      historystore = self.historystore
                      if force:
                          datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                          historystore = metadatastore.unionmetadatastore(
                              *repo.sharedhistorystores)
                      missingids = set()
                      if fetchdata:
                          missingids.update(datastore.getmissing(idstocheck))
                      if fetchhistory:
                          missingids.update(historystore.getmissing(idstocheck))
                      # partition missing nodes into nullid and not-nullid so we can
                      # warn about this filtering potentially shadowing bugs.
                      nullids = len([None for unused, id in missingids if id == nullid])
                      if nullids:
                          missingids = [(f, id) for f, id in missingids if id != nullid]
                          repo.ui.develwarn(
                              ('remotefilelog not fetching %d null revs'
                               ' - this is likely hiding bugs' % nullids),
                              config='remotefilelog-ext')
                      if missingids:
                          global fetches, fetched, fetchcost
                          fetches += 1
                          # We want to be able to detect excess individual file downloads, so
                          # let's log that information for debugging.
                          if fetches >= 15 and fetches < 18:
                              if fetches == 15:
                                  fetchwarning = self.ui.config('remotefilelog',
                                                                'fetchwarning')
                                  if fetchwarning:
                                      self.ui.warn(fetchwarning + '\n')
                              self.logstacktrace()
                          missingids = [(file, hex(id)) for file, id in missingids]
                          fetched += len(missingids)
                          start = time.time()
                          missingids = self.request(missingids)
                          if missingids:
                              raise error.Abort(_("unable to download %d files") %
                                                len(missingids))
                          fetchcost += time.time() - start
                          self._lfsprefetch(fileids)
                  def _lfsprefetch(self, fileids):
                      if not _lfsmod or not util.safehasattr(
                              self.repo.svfs, 'lfslocalblobstore'):
                          return
                      if not _lfsmod.wrapper.candownload(self.repo):
                          return
                      pointers = []
                      store = self.repo.svfs.lfslocalblobstore
                      for file, id in fileids:
                          node = bin(id)
                          rlog = self.repo.file(file)
                          if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                              text = rlog.revision(node, raw=True)
                              p = _lfsmod.pointer.deserialize(text)
                              oid = p.oid()
                              if not store.has(oid):
                                  pointers.append(p)
                      if len(pointers) > 0:
                          self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                          assert all(store.has(p.oid()) for p in pointers)
                  def logstacktrace(self):
                      import traceback
                      self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s\n',
                                  ''.join(traceback.format_stack()))

hgext/remotefilelog/shallowutil.py

0 +3 -2

              # shallowutil.py -- remotefilelog utilities
              #
              # Copyright 2014 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import collections
              import errno
              import hashlib
              import os
              import stat
              import struct
              import tempfile
              from mercurial.i18n import _
              from mercurial import (
                  error,
+                 node,
                  pycompat,
                  revlog,
                  util,
              )
              from mercurial.utils import (
                  storageutil,
                  stringutil,
              )
              from . import constants
              if not pycompat.iswindows:
                  import grp
              def isenabled(repo):
                  """returns whether the repository is remotefilelog enabled or not"""
                  return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
              def getcachekey(reponame, file, id):
-                 pathhash = hashlib.sha1(file).hexdigest()
+                 pathhash = node.hex(hashlib.sha1(file).digest())
                  return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
              def getlocalkey(file, id):
-                 pathhash = hashlib.sha1(file).hexdigest()
+                 pathhash = node.hex(hashlib.sha1(file).digest())
                  return os.path.join(pathhash, id)
              def getcachepath(ui, allowempty=False):
                  cachepath = ui.config("remotefilelog", "cachepath")
                  if not cachepath:
                      if allowempty:
                          return None
                      else:
                          raise error.Abort(_("could not find config option "
                                              "remotefilelog.cachepath"))
                  return util.expandpath(cachepath)
              def getcachepackpath(repo, category):
                  cachepath = getcachepath(repo.ui)
                  if category != constants.FILEPACK_CATEGORY:
                      return os.path.join(cachepath, repo.name, 'packs', category)
                  else:
                      return os.path.join(cachepath, repo.name, 'packs')
              def getlocalpackpath(base, category):
                  return os.path.join(base, 'packs', category)
              def createrevlogtext(text, copyfrom=None, copyrev=None):
                  """returns a string that matches the revlog contents in a
                  traditional revlog
                  """
                  meta = {}
                  if copyfrom or text.startswith('\1\n'):
                      if copyfrom:
                          meta['copy'] = copyfrom
                          meta['copyrev'] = copyrev
                      text = storageutil.packmeta(meta, text)
                  return text
              def parsemeta(text):
                  """parse mercurial filelog metadata"""
                  meta, size = storageutil.parsemeta(text)
                  if text.startswith('\1\n'):
                      s = text.index('\1\n', 2)
                      text = text[s + 2:]
                  return meta or {}, text
              def sumdicts(*dicts):
                  """Adds all the values of *dicts together into one dictionary. This assumes
                  the values in *dicts are all summable.
                  e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
                  """
                  result = collections.defaultdict(lambda: 0)
                  for dict in dicts:
                      for k, v in dict.iteritems():
                          result[k] += v
                  return result
              def prefixkeys(dict, prefix):
                  """Returns ``dict`` with ``prefix`` prepended to all its keys."""
                  result = {}
                  for k, v in dict.iteritems():
                      result[prefix + k] = v
                  return result
              def reportpackmetrics(ui, prefix, *stores):
                  dicts = [s.getmetrics() for s in stores]
                  dict = prefixkeys(sumdicts(*dicts), prefix + '_')
                  ui.log(prefix + "_packsizes", "", **pycompat.strkwargs(dict))
              def _parsepackmeta(metabuf):
                  """parse datapack meta, bytes (<metadata-list>) -> dict
                  The dict contains raw content - both keys and values are strings.
                  Upper-level business may want to convert some of them to other types like
                  integers, on their own.
                  raise ValueError if the data is corrupted
                  """
                  metadict = {}
                  offset = 0
                  buflen = len(metabuf)
                  while buflen - offset >= 3:
                      key = metabuf[offset]
                      offset += 1
                      metalen = struct.unpack_from('!H', metabuf, offset)[0]
                      offset += 2
                      if offset + metalen > buflen:
                          raise ValueError('corrupted metadata: incomplete buffer')
                      value = metabuf[offset:offset + metalen]
                      metadict[key] = value
                      offset += metalen
                  if offset != buflen:
                      raise ValueError('corrupted metadata: redundant data')
                  return metadict
              def _buildpackmeta(metadict):
                  """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
                  The dict contains raw content - both keys and values are strings.
                  Upper-level business may want to serialize some of other types (like
                  integers) to strings before calling this function.
                  raise ProgrammingError when metadata key is illegal, or ValueError if
                  length limit is exceeded
                  """
                  metabuf = ''
                  for k, v in sorted((metadict or {}).iteritems()):
                      if len(k) != 1:
                          raise error.ProgrammingError('packmeta: illegal key: %s' % k)
                      if len(v) > 0xfffe:
                          raise ValueError('metadata value is too long: 0x%x > 0xfffe'
                                           % len(v))
                      metabuf += k
                      metabuf += struct.pack('!H', len(v))
                      metabuf += v
                  # len(metabuf) is guaranteed representable in 4 bytes, because there are
                  # only 256 keys, and for each value, len(value) <= 0xfffe.
                  return metabuf
              _metaitemtypes = {
                  constants.METAKEYFLAG: (int, pycompat.long),
                  constants.METAKEYSIZE: (int, pycompat.long),
              }
              def buildpackmeta(metadict):
                  """like _buildpackmeta, but typechecks metadict and normalize it.
                  This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
                  and METAKEYFLAG will be dropped if its value is 0.
                  """
                  newmeta = {}
                  for k, v in (metadict or {}).iteritems():
                      expectedtype = _metaitemtypes.get(k, (bytes,))
                      if not isinstance(v, expectedtype):
                          raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
                      # normalize int to binary buffer
                      if int in expectedtype:
                          # optimization: remove flag if it's 0 to save space
                          if k == constants.METAKEYFLAG and v == 0:
                              continue
                          v = int2bin(v)
                      newmeta[k] = v
                  return _buildpackmeta(newmeta)
              def parsepackmeta(metabuf):
                  """like _parsepackmeta, but convert fields to desired types automatically.
                  This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
                  integers.
                  """
                  metadict = _parsepackmeta(metabuf)
                  for k, v in metadict.iteritems():
                      if k in _metaitemtypes and int in _metaitemtypes[k]:
                          metadict[k] = bin2int(v)
                  return metadict
              def int2bin(n):
                  """convert a non-negative integer to raw binary buffer"""
                  buf = bytearray()
                  while n > 0:
                      buf.insert(0, n & 0xff)
                      n >>= 8
                  return bytes(buf)
              def bin2int(buf):
                  """the reverse of int2bin, convert a binary buffer to an integer"""
                  x = 0
                  for b in bytearray(buf):
                      x <<= 8
                      x |= b
                  return x
              def parsesizeflags(raw):
                  """given a remotefilelog blob, return (headersize, rawtextsize, flags)
                  see remotefilelogserver.createfileblob for the format.
                  raise RuntimeError if the content is illformed.
                  """
                  flags = revlog.REVIDX_DEFAULT_FLAGS
                  size = None
                  try:
                      index = raw.index('\0')
                      header = raw[:index]
                      if header.startswith('v'):
                          # v1 and above, header starts with 'v'
                          if header.startswith('v1\n'):
                              for s in header.split('\n'):
                                  if s.startswith(constants.METAKEYSIZE):
                                      size = int(s[len(constants.METAKEYSIZE):])
                                  elif s.startswith(constants.METAKEYFLAG):
                                      flags = int(s[len(constants.METAKEYFLAG):])
                          else:
                              raise RuntimeError('unsupported remotefilelog header: %s'
                                                 % header)
                      else:
                          # v0, str(int(size)) is the header
                          size = int(header)
                  except ValueError:
                      raise RuntimeError("unexpected remotefilelog header: illegal format")
                  if size is None:
                      raise RuntimeError("unexpected remotefilelog header: no size found")
                  return index + 1, size, flags
              def buildfileblobheader(size, flags, version=None):
                  """return the header of a remotefilelog blob.
                  see remotefilelogserver.createfileblob for the format.
                  approximately the reverse of parsesizeflags.
                  version could be 0 or 1, or None (auto decide).
                  """
                  # choose v0 if flags is empty, otherwise v1
                  if version is None:
                      version = int(bool(flags))
                  if version == 1:
                      header = ('v1\n%s%d\n%s%d'
                                % (constants.METAKEYSIZE, size,
                                   constants.METAKEYFLAG, flags))
                  elif version == 0:
                      if flags:
                          raise error.ProgrammingError('fileblob v0 does not support flag')
                      header = '%d' % size
                  else:
                      raise error.ProgrammingError('unknown fileblob version %d' % version)
                  return header
              def ancestormap(raw):
                  offset, size, flags = parsesizeflags(raw)
                  start = offset + size
                  mapping = {}
                  while start < len(raw):
                      divider = raw.index('\0', start + 80)
                      currentnode = raw[start:(start + 20)]
                      p1 = raw[(start + 20):(start + 40)]
                      p2 = raw[(start + 40):(start + 60)]
                      linknode = raw[(start + 60):(start + 80)]
                      copyfrom = raw[(start + 80):divider]
                      mapping[currentnode] = (p1, p2, linknode, copyfrom)
                      start = divider + 1
                  return mapping
              def readfile(path):
                  f = open(path, 'rb')
                  try:
                      result = f.read()
                      # we should never have empty files
                      if not result:
                          os.remove(path)
                          raise IOError("empty file: %s" % path)
                      return result
                  finally:
                      f.close()
              def unlinkfile(filepath):
                  if pycompat.iswindows:
                      # On Windows, os.unlink cannnot delete readonly files
                      os.chmod(filepath, stat.S_IWUSR)
                  os.unlink(filepath)
              def renamefile(source, destination):
                  if pycompat.iswindows:
                      # On Windows, os.rename cannot rename readonly files
                      # and cannot overwrite destination if it exists
                      os.chmod(source, stat.S_IWUSR)
                      if os.path.isfile(destination):
                          os.chmod(destination, stat.S_IWUSR)
                          os.unlink(destination)
                  os.rename(source, destination)
              def writefile(path, content, readonly=False):
                  dirname, filename = os.path.split(path)
                  if not os.path.exists(dirname):
                      try:
                          os.makedirs(dirname)
                      except OSError as ex:
                          if ex.errno != errno.EEXIST:
                              raise
                  fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
                  os.close(fd)
                  try:
                      f = util.posixfile(temp, 'wb')
                      f.write(content)
                      f.close()
                      if readonly:
                          mode = 0o444
                      else:
                          # tempfiles are created with 0o600, so we need to manually set the
                          # mode.
                          oldumask = os.umask(0)
                          # there's no way to get the umask without modifying it, so set it
                          # back
                          os.umask(oldumask)
                          mode = ~oldumask
                      renamefile(temp, path)
                      os.chmod(path, mode)
                  except Exception:
                      try:
                          unlinkfile(temp)
                      except OSError:
                          pass
                      raise
              def sortnodes(nodes, parentfunc):
                  """Topologically sorts the nodes, using the parentfunc to find
                  the parents of nodes."""
                  nodes = set(nodes)
                  childmap = {}
                  parentmap = {}
                  roots = []
                  # Build a child and parent map
                  for n in nodes:
                      parents = [p for p in parentfunc(n) if p in nodes]
                      parentmap[n] = set(parents)
                      for p in parents:
                          childmap.setdefault(p, set()).add(n)
                      if not parents:
                          roots.append(n)
                  roots.sort()
                  # Process roots, adding children to the queue as they become roots
                  results = []
                  while roots:
                      n = roots.pop(0)
                      results.append(n)
                      if n in childmap:
                          children = childmap[n]
                          for c in children:
                              childparents = parentmap[c]
                              childparents.remove(n)
                              if len(childparents) == 0:
                                  # insert at the beginning, that way child nodes
                                  # are likely to be output immediately after their
                                  # parents.  This gives better compression results.
                                  roots.insert(0, c)
                  return results
              def readexactly(stream, n):
                  '''read n bytes from stream.read and abort if less was available'''
                  s = stream.read(n)
                  if len(s) < n:
                      raise error.Abort(_("stream ended unexpectedly"
                                         " (got %d bytes, expected %d)")
                                        % (len(s), n))
                  return s
              def readunpack(stream, fmt):
                  data = readexactly(stream, struct.calcsize(fmt))
                  return struct.unpack(fmt, data)
              def readpath(stream):
                  rawlen = readexactly(stream, constants.FILENAMESIZE)
                  pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
                  return readexactly(stream, pathlen)
              def readnodelist(stream):
                  rawlen = readexactly(stream, constants.NODECOUNTSIZE)
                  nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
                  for i in pycompat.xrange(nodecount):
                      yield readexactly(stream, constants.NODESIZE)
              def readpathlist(stream):
                  rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
                  pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
                  for i in pycompat.xrange(pathcount):
                      yield readpath(stream)
              def getgid(groupname):
                  try:
                      gid = grp.getgrnam(groupname).gr_gid
                      return gid
                  except KeyError:
                      return None
              def setstickygroupdir(path, gid, warn=None):
                  if gid is None:
                      return
                  try:
                      os.chown(path, -1, gid)
                      os.chmod(path, 0o2775)
                  except (IOError, OSError) as ex:
                      if warn:
                          warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
              def mkstickygroupdir(ui, path):
                  """Creates the given directory (if it doesn't exist) and give it a
                  particular group with setgid enabled."""
                  gid = None
                  groupname = ui.config("remotefilelog", "cachegroup")
                  if groupname:
                      gid = getgid(groupname)
                      if gid is None:
                          ui.warn(_('unable to resolve group name: %s\n') % groupname)
                  # we use a single stat syscall to test the existence and mode / group bit
                  st = None
                  try:
                      st = os.stat(path)
                  except OSError:
                      pass
                  if st:
                      # exists
                      if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
                          # permission needs to be fixed
                          setstickygroupdir(path, gid, ui.warn)
                      return
                  oldumask = os.umask(0o002)
                  try:
                      missingdirs = [path]
                      path = os.path.dirname(path)
                      while path and not os.path.exists(path):
                          missingdirs.append(path)
                          path = os.path.dirname(path)
                      for path in reversed(missingdirs):
                          try:
                              os.mkdir(path)
                          except OSError as ex:
                              if ex.errno != errno.EEXIST:
                                  raise
                      for path in missingdirs:
                          setstickygroupdir(path, gid, ui.warn)
                  finally:
                      os.umask(oldumask)
              def getusername(ui):
                  try:
                      return stringutil.shortuser(ui.username())
                  except Exception:
                      return 'unknown'
              def getreponame(ui):
                  reponame = ui.config('paths', 'default')
                  if reponame:
                      return os.path.basename(reponame)
                  return "unknown"

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages