upstream/mercurial-mirror Commit - r40648:3fa4183e

py3: use node.hex(h.digest()) instead of h.hexdigest()...

Pulkit Goyal -

r40648:3fa4183e default

parent child

hgext/remotefilelog/debugcommands.py

0 +3 -2

             # debugcommands.py - debug logic for remotefilelog
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import os
             import zlib
             from mercurial.node import bin, hex, nullid, short
             from mercurial.i18n import _
             from mercurial import (
                 error,
                 filelog,
+                node as nodemod,
                 revlog,
             )
             from . import (
                 constants,
                 datapack,
                 extutil,
                 fileserverclient,
                 historypack,
                 repack,
                 shallowutil,
             )
             def debugremotefilelog(ui, path, **opts):
                 decompress = opts.get(r'decompress')
                 size, firstnode, mapping = parsefileblob(path, decompress)
                 ui.status(_("size: %s bytes\n") % (size))
                 ui.status(_("path: %s \n") % (path))
                 ui.status(_("key: %s \n") % (short(firstnode)))
                 ui.status(_("\n"))
                 ui.status(_("%12s => %12s %13s %13s %12s\n") %
                           ("node", "p1", "p2", "linknode", "copyfrom"))
                 queue = [firstnode]
                 while queue:
                     node = queue.pop(0)
                     p1, p2, linknode, copyfrom = mapping[node]
                     ui.status(_("%s => %s  %s  %s  %s\n") %
                         (short(node), short(p1), short(p2), short(linknode), copyfrom))
                     if p1 != nullid:
                         queue.append(p1)
                     if p2 != nullid:
                         queue.append(p2)
             def buildtemprevlog(repo, file):
                 # get filename key
-                filekey = hashlib.sha1(file).hexdigest()
+                filekey = nodemod.hex(hashlib.sha1(file).digest())
                 filedir = os.path.join(repo.path, 'store/data', filekey)
                 # sort all entries based on linkrev
                 fctxs = []
                 for filenode in os.listdir(filedir):
                     if '_old' not in filenode:
                         fctxs.append(repo.filectx(file, fileid=bin(filenode)))
                 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
                 # add to revlog
                 temppath = repo.sjoin('data/temprevlog.i')
                 if os.path.exists(temppath):
                     os.remove(temppath)
                 r = filelog.filelog(repo.svfs, 'temprevlog')
                 class faket(object):
                     def add(self, a, b, c):
                         pass
                 t = faket()
                 for fctx in fctxs:
                     if fctx.node() not in repo:
                         continue
                     p = fctx.filelog().parents(fctx.filenode())
                     meta = {}
                     if fctx.renamed():
                         meta['copy'] = fctx.renamed()[0]
                         meta['copyrev'] = hex(fctx.renamed()[1])
                     r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
                 return r
             def debugindex(orig, ui, repo, file_=None, **opts):
                 """dump the contents of an index file"""
                 if (opts.get(r'changelog') or
                     opts.get(r'manifest') or
                     opts.get(r'dir') or
                     not shallowutil.isenabled(repo) or
                     not repo.shallowmatch(file_)):
                     return orig(ui, repo, file_, **opts)
                 r = buildtemprevlog(repo, file_)
                 # debugindex like normal
                 format = opts.get('format', 0)
                 if format not in (0, 1):
                     raise error.Abort(_("unknown format %d") % format)
                 generaldelta = r.version & revlog.FLAG_GENERALDELTA
                 if generaldelta:
                     basehdr = ' delta'
                 else:
                     basehdr = '  base'
                 if format == 0:
                     ui.write(("   rev    offset  length " + basehdr + " linkrev"
                               " nodeid       p1           p2\n"))
                 elif format == 1:
                     ui.write(("   rev flag   offset   length"
                               "     size " + basehdr + "   link     p1     p2"
                               "       nodeid\n"))
                 for i in r:
                     node = r.node(i)
                     if generaldelta:
                         base = r.deltaparent(i)
                     else:
                         base = r.chainbase(i)
                     if format == 0:
                         try:
                             pp = r.parents(node)
                         except Exception:
                             pp = [nullid, nullid]
                         ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
                                 i, r.start(i), r.length(i), base, r.linkrev(i),
                                 short(node), short(pp[0]), short(pp[1])))
                     elif format == 1:
                         pr = r.parentrevs(i)
                         ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % (
                                 i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
                                 base, r.linkrev(i), pr[0], pr[1], short(node)))
             def debugindexdot(orig, ui, repo, file_):
                 """dump an index DAG as a graphviz dot file"""
                 if not shallowutil.isenabled(repo):
                     return orig(ui, repo, file_)
                 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
                 ui.write(("digraph G {\n"))
                 for i in r:
                     node = r.node(i)
                     pp = r.parents(node)
                     ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
                     if pp[1] != nullid:
                         ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
                 ui.write("}\n")
             def verifyremotefilelog(ui, path, **opts):
                 decompress = opts.get(r'decompress')
                 for root, dirs, files in os.walk(path):
                     for file in files:
                         if file == "repos":
                             continue
                         filepath = os.path.join(root, file)
                         size, firstnode, mapping = parsefileblob(filepath, decompress)
                         for p1, p2, linknode, copyfrom in mapping.itervalues():
                             if linknode == nullid:
                                 actualpath = os.path.relpath(root, path)
                                 key = fileserverclient.getcachekey("reponame", actualpath,
                                                                    file)
                                 ui.status("%s %s\n" % (key, os.path.relpath(filepath,
                                                                             path)))
             def _decompressblob(raw):
                 return zlib.decompress(raw)
             def parsefileblob(path, decompress):
                 raw = None
                 f = open(path, "r")
                 try:
                     raw = f.read()
                 finally:
                     f.close()
                 if decompress:
                     raw = _decompressblob(raw)
                 offset, size, flags = shallowutil.parsesizeflags(raw)
                 start = offset + size
                 firstnode = None
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index('\0', start + 80)
                     currentnode = raw[start:(start + 20)]
                     if not firstnode:
                         firstnode = currentnode
                     p1 = raw[(start + 20):(start + 40)]
                     p2 = raw[(start + 40):(start + 60)]
                     linknode = raw[(start + 60):(start + 80)]
                     copyfrom = raw[(start + 80):divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return size, firstnode, mapping
             def debugdatapack(ui, *paths, **opts):
                 for path in paths:
                     if '.data' in path:
                         path = path[:path.index('.data')]
                     ui.write("%s:\n" % path)
                     dpack = datapack.datapack(path)
                     node = opts.get(r'node')
                     if node:
                         deltachain = dpack.getdeltachain('', bin(node))
                         dumpdeltachain(ui, deltachain, **opts)
                         return
                     if opts.get(r'long'):
                         hashformatter = hex
                         hashlen = 42
                     else:
                         hashformatter = short
                         hashlen = 14
                     lastfilename = None
                     totaldeltasize = 0
                     totalblobsize = 0
                     def printtotals():
                         if lastfilename is not None:
                             ui.write("\n")
                         if not totaldeltasize or not totalblobsize:
                             return
                         difference = totalblobsize - totaldeltasize
                         deltastr = "%0.1f%% %s" % (
                             (100.0 * abs(difference) / totalblobsize),
                             ("smaller" if difference > 0 else "bigger"))
                         ui.write(("Total:%s%s  %s (%s)\n") % (
                             "".ljust(2 * hashlen - len("Total:")),
                             str(totaldeltasize).ljust(12),
                             str(totalblobsize).ljust(9),
                             deltastr
                         ))
                     bases = {}
                     nodes = set()
                     failures = 0
                     for filename, node, deltabase, deltalen in dpack.iterentries():
                         bases[node] = deltabase
                         if node in nodes:
                             ui.write(("Bad entry: %s appears twice\n" % short(node)))
                             failures += 1
                         nodes.add(node)
                         if filename != lastfilename:
                             printtotals()
                             name = '(empty name)' if filename == '' else filename
                             ui.write("%s:\n" % name)
                             ui.write("%s%s%s%s\n" % (
                                 "Node".ljust(hashlen),
                                 "Delta Base".ljust(hashlen),
                                 "Delta Length".ljust(14),
                                 "Blob Size".ljust(9)))
                             lastfilename = filename
                             totalblobsize = 0
                             totaldeltasize = 0
                         # Metadata could be missing, in which case it will be an empty dict.
                         meta = dpack.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             blobsize = meta[constants.METAKEYSIZE]
                             totaldeltasize += deltalen
                             totalblobsize += blobsize
                         else:
                             blobsize = "(missing)"
                         ui.write("%s  %s  %s%s\n" % (
                             hashformatter(node),
                             hashformatter(deltabase),
                             str(deltalen).ljust(14),
                             blobsize))
                     if filename is not None:
                         printtotals()
                     failures += _sanitycheck(ui, set(nodes), bases)
                     if failures > 1:
                         ui.warn(("%d failures\n" % failures))
                         return 1
             def _sanitycheck(ui, nodes, bases):
                 """
                 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
                 mapping of node->base):
                 - Each deltabase must itself be a node elsewhere in the pack
                 - There must be no cycles
                 """
                 failures = 0
                 for node in nodes:
                     seen = set()
                     current = node
                     deltabase = bases[current]
                     while deltabase != nullid:
                         if deltabase not in nodes:
                             ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" %
                                     (short(node), short(deltabase))))
                             failures += 1
                             break
                         if deltabase in seen:
                             ui.warn(("Bad entry: %s has a cycle (at %s)\n" %
                                     (short(node), short(deltabase))))
                             failures += 1
                             break
                         current = deltabase
                         seen.add(current)
                         deltabase = bases[current]
                     # Since ``node`` begins a valid chain, reset/memoize its base to nullid
                     # so we don't traverse it again.
                     bases[node] = nullid
                 return failures
             def dumpdeltachain(ui, deltachain, **opts):
                 hashformatter = hex
                 hashlen = 40
                 lastfilename = None
                 for filename, node, filename, deltabasenode, delta in deltachain:
                     if filename != lastfilename:
                         ui.write("\n%s\n" % filename)
                         lastfilename = filename
                     ui.write("%s  %s  %s  %s\n" % (
                         "Node".ljust(hashlen),
                         "Delta Base".ljust(hashlen),
                         "Delta SHA1".ljust(hashlen),
                         "Delta Length".ljust(6),
                     ))
                     ui.write("%s  %s  %s  %s\n" % (
                         hashformatter(node),
                         hashformatter(deltabasenode),
-                        hashlib.sha1(delta).hexdigest(),
+                        nodemod.hex(hashlib.sha1(delta).digest()),
                         len(delta)))
             def debughistorypack(ui, path):
                 if '.hist' in path:
                     path = path[:path.index('.hist')]
                 hpack = historypack.historypack(path)
                 lastfilename = None
                 for entry in hpack.iterentries():
                     filename, node, p1node, p2node, linknode, copyfrom = entry
                     if filename != lastfilename:
                         ui.write("\n%s\n" % filename)
                         ui.write("%s%s%s%s%s\n" % (
                             "Node".ljust(14),
                             "P1 Node".ljust(14),
                             "P2 Node".ljust(14),
                             "Link Node".ljust(14),
                             "Copy From"))
                         lastfilename = filename
                     ui.write("%s  %s  %s  %s  %s\n" % (short(node), short(p1node),
                         short(p2node), short(linknode), copyfrom))
             def debugwaitonrepack(repo):
                 with extutil.flock(repack.repacklockvfs(repo).join('repacklock'), ''):
                     return
             def debugwaitonprefetch(repo):
                 with repo._lock(repo.svfs, "prefetchlock", True, None,
                                      None, _('prefetching in %s') % repo.origroot):
                     pass

hgext/remotefilelog/fileserverclient.py

0 +3 -2

             # fileserverclient.py - client for communicating with the cache process
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import io
             import os
             import threading
             import time
             import zlib
             from mercurial.i18n import _
             from mercurial.node import bin, hex, nullid
             from mercurial import (
                 error,
+                node,
                 pycompat,
                 revlog,
                 sshpeer,
                 util,
                 wireprotov1peer,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 metadatastore,
             )
             _sshv1peer = sshpeer.sshv1peer
             # Statistics for debugging
             fetchcost = 0
             fetches = 0
             fetched = 0
             fetchmisses = 0
             _lfsmod = None
             _downloading = _('downloading')
             def getcachekey(reponame, file, id):
-                pathhash = hashlib.sha1(file).hexdigest()
+                pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
-                pathhash = hashlib.sha1(file).hexdigest()
+                pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def peersetup(ui, peer):
                 class remotefilepeer(peer.__class__):
                     @wireprotov1peer.batchable
                     def x_rfl_getfile(self, file, node):
                         if not self.capable('x_rfl_getfile'):
                             raise error.Abort(
                                 'configured remotefile server does not support getfile')
                         f = wireprotov1peer.future()
                         yield {'file': file, 'node': node}, f
                         code, data = f.value.split('\0', 1)
                         if int(code):
                             raise error.LookupError(file, node, data)
                         yield data
                     @wireprotov1peer.batchable
                     def x_rfl_getflogheads(self, path):
                         if not self.capable('x_rfl_getflogheads'):
                             raise error.Abort('configured remotefile server does not '
                                               'support getflogheads')
                         f = wireprotov1peer.future()
                         yield {'path': path}, f
                         heads = f.value.split('\n') if f.value else []
                         yield heads
                     def _updatecallstreamopts(self, command, opts):
                         if command != 'getbundle':
                             return
                         if (constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                             not in self.capabilities()):
                             return
                         if not util.safehasattr(self, '_localrepo'):
                             return
                         if (constants.SHALLOWREPO_REQUIREMENT
                             not in self._localrepo.requirements):
                             return
                         bundlecaps = opts.get('bundlecaps')
                         if bundlecaps:
                             bundlecaps = [bundlecaps]
                         else:
                             bundlecaps = []
                         # shallow, includepattern, and excludepattern are a hacky way of
                         # carrying over data from the local repo to this getbundle
                         # command. We need to do it this way because bundle1 getbundle
                         # doesn't provide any other place we can hook in to manipulate
                         # getbundle args before it goes across the wire. Once we get rid
                         # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                         # do this more cleanly.
                         bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                         if self._localrepo.includepattern:
                             patterns = '\0'.join(self._localrepo.includepattern)
                             includecap = "includepattern=" + patterns
                             bundlecaps.append(includecap)
                         if self._localrepo.excludepattern:
                             patterns = '\0'.join(self._localrepo.excludepattern)
                             excludecap = "excludepattern=" + patterns
                             bundlecaps.append(excludecap)
                         opts['bundlecaps'] = ','.join(bundlecaps)
                     def _sendrequest(self, command, args, **opts):
                         self._updatecallstreamopts(command, args)
                         return super(remotefilepeer, self)._sendrequest(command, args,
                                                                         **opts)
                     def _callstream(self, command, **opts):
                         supertype = super(remotefilepeer, self)
                         if not util.safehasattr(supertype, '_sendrequest'):
                             self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                         return super(remotefilepeer, self)._callstream(command, **opts)
                 peer.__class__ = remotefilepeer
             class cacheconnection(object):
                 """The connection for communicating with the remote cache. Performs
                 gets and sets by communicating with an external process that has the
                 cache-specific implementation.
                 """
                 def __init__(self):
                     self.pipeo = self.pipei = self.pipee = None
                     self.subprocess = None
                     self.connected = False
                 def connect(self, cachecommand):
                     if self.pipeo:
                         raise error.Abort(_("cache connection already open"))
                     self.pipei, self.pipeo, self.pipee, self.subprocess = \
                         procutil.popen4(cachecommand)
                     self.connected = True
                 def close(self):
                     def tryclose(pipe):
                         try:
                             pipe.close()
                         except Exception:
                             pass
                     if self.connected:
                         try:
                             self.pipei.write("exit\n")
                         except Exception:
                             pass
                         tryclose(self.pipei)
                         self.pipei = None
                         tryclose(self.pipeo)
                         self.pipeo = None
                         tryclose(self.pipee)
                         self.pipee = None
                         try:
                             # Wait for process to terminate, making sure to avoid deadlock.
                             # See https://docs.python.org/2/library/subprocess.html for
                             # warnings about wait() and deadlocking.
                             self.subprocess.communicate()
                         except Exception:
                             pass
                         self.subprocess = None
                     self.connected = False
                 def request(self, request, flush=True):
                     if self.connected:
                         try:
                             self.pipei.write(request)
                             if flush:
                                 self.pipei.flush()
                         except IOError:
                             self.close()
                 def receiveline(self):
                     if not self.connected:
                         return None
                     try:
                         result = self.pipeo.readline()[:-1]
                         if not result:
                             self.close()
                     except IOError:
                         self.close()
                     return result
             def _getfilesbatch(
                     remote, receivemissing, progresstick, missed, idmap, batchsize):
                 # Over http(s), iterbatch is a streamy method and we can start
                 # looking at results early. This means we send one (potentially
                 # large) request, but then we show nice progress as we process
                 # file results, rather than showing chunks of $batchsize in
                 # progress.
                 #
                 # Over ssh, iterbatch isn't streamy because batch() wasn't
                 # explicitly designed as a streaming method. In the future we
                 # should probably introduce a streambatch() method upstream and
                 # use that for this.
                 with remote.commandexecutor() as e:
                     futures = []
                     for m in missed:
                         futures.append(e.callcommand('x_rfl_getfile', {
                             'file': idmap[m],
                             'node': m[-40:]
                         }))
                     for i, m in enumerate(missed):
                         r = futures[i].result()
                         futures[i] = None  # release memory
                         file_ = idmap[m]
                         node = m[-40:]
                         receivemissing(io.BytesIO('%d\n%s' % (len(r), r)), file_, node)
                         progresstick()
             def _getfiles_optimistic(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("x_rfl_getfiles")
                 i = 0
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 while i < len(missed):
                     # issue a batch of requests
                     start = i
                     end = min(len(missed), start + step)
                     i = end
                     for missingid in missed[start:end]:
                         # issue new request
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                     # receive batch results
                     for missingid in missed[start:end]:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         receivemissing(pipei, file, versionid)
                         progresstick()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             def _getfiles_threaded(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("getfiles")
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 def writer():
                     for missingid in missed:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                 writerthread = threading.Thread(target=writer)
                 writerthread.daemon = True
                 writerthread.start()
                 for missingid in missed:
                     versionid = missingid[-40:]
                     file = idmap[missingid]
                     receivemissing(pipei, file, versionid)
                     progresstick()
                 writerthread.join()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             class fileserverclient(object):
                 """A client for requesting files from the remote file server.
                 """
                 def __init__(self, repo):
                     ui = repo.ui
                     self.repo = repo
                     self.ui = ui
                     self.cacheprocess = ui.config("remotefilelog", "cacheprocess")
                     if self.cacheprocess:
                         self.cacheprocess = util.expandpath(self.cacheprocess)
                     # This option causes remotefilelog to pass the full file path to the
                     # cacheprocess instead of a hashed key.
                     self.cacheprocesspasspath = ui.configbool(
                         "remotefilelog", "cacheprocess.includepath")
                     self.debugoutput = ui.configbool("remotefilelog", "debug")
                     self.remotecache = cacheconnection()
                 def setstore(self, datastore, historystore, writedata, writehistory):
                     self.datastore = datastore
                     self.historystore = historystore
                     self.writedata = writedata
                     self.writehistory = writehistory
                 def _connect(self):
                     return self.repo.connectionpool.get(self.repo.fallbackpath)
                 def request(self, fileids):
                     """Takes a list of filename/node pairs and fetches them from the
                     server. Files are stored in the local cache.
                     A list of nodes that the server couldn't find is returned.
                     If the connection fails, an exception is raised.
                     """
                     if not self.remotecache.connected:
                         self.connect()
                     cache = self.remotecache
                     writedata = self.writedata
                     repo = self.repo
                     count = len(fileids)
                     request = "get\n%d\n" % count
                     idmap = {}
                     reponame = repo.name
                     for file, id in fileids:
                         fullid = getcachekey(reponame, file, id)
                         if self.cacheprocesspasspath:
                             request += file + '\0'
                         request += fullid + "\n"
                         idmap[fullid] = file
                     cache.request(request)
                     total = count
                     self.ui.progress(_downloading, 0, total=count)
                     missed = []
                     count = 0
                     while True:
                         missingid = cache.receiveline()
                         if not missingid:
                             missedset = set(missed)
                             for missingid in idmap.iterkeys():
                                 if not missingid in missedset:
                                     missed.append(missingid)
                             self.ui.warn(_("warning: cache connection closed early - " +
                                 "falling back to server\n"))
                             break
                         if missingid == "0":
                             break
                         if missingid.startswith("_hits_"):
                             # receive progress reports
                             parts = missingid.split("_")
                             count += int(parts[2])
                             self.ui.progress(_downloading, count, total=total)
                             continue
                         missed.append(missingid)
                     global fetchmisses
                     fetchmisses += len(missed)
                     count = [total - len(missed)]
                     fromcache = count[0]
                     self.ui.progress(_downloading, count[0], total=total)
                     self.ui.log("remotefilelog", "remote cache hit rate is %r of %r\n",
                                 count[0], total, hit=count[0], total=total)
                     oldumask = os.umask(0o002)
                     try:
                         # receive cache misses from master
                         if missed:
                             def progresstick():
                                 count[0] += 1
                                 self.ui.progress(_downloading, count[0], total=total)
                             # When verbose is true, sshpeer prints 'running ssh...'
                             # to stdout, which can interfere with some command
                             # outputs
                             verbose = self.ui.verbose
                             self.ui.verbose = False
                             try:
                                 with self._connect() as conn:
                                     remote = conn.peer
                                     if remote.capable(
                                             constants.NETWORK_CAP_LEGACY_SSH_GETFILES):
                                         if not isinstance(remote, _sshv1peer):
                                             raise error.Abort('remotefilelog requires ssh '
                                                               'servers')
                                         step = self.ui.configint('remotefilelog',
                                                                  'getfilesstep')
                                         getfilestype = self.ui.config('remotefilelog',
                                                                       'getfilestype')
                                         if getfilestype == 'threaded':
                                             _getfiles = _getfiles_threaded
                                         else:
                                             _getfiles = _getfiles_optimistic
                                         _getfiles(remote, self.receivemissing, progresstick,
                                                   missed, idmap, step)
                                     elif remote.capable("x_rfl_getfile"):
                                         if remote.capable('batch'):
                                             batchdefault = 100
                                         else:
                                             batchdefault = 10
                                         batchsize = self.ui.configint(
                                             'remotefilelog', 'batchsize', batchdefault)
                                         _getfilesbatch(
                                             remote, self.receivemissing, progresstick,
                                             missed, idmap, batchsize)
                                     else:
                                         raise error.Abort("configured remotefilelog server"
                                                          " does not support remotefilelog")
                                 self.ui.log("remotefilefetchlog",
                                             "Success\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                             except Exception:
                                 self.ui.log("remotefilefetchlog",
                                             "Fail\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                                 raise
                             finally:
                                 self.ui.verbose = verbose
                             # send to memcache
                             count[0] = len(missed)
                             request = "set\n%d\n%s\n" % (count[0], "\n".join(missed))
                             cache.request(request)
                         self.ui.progress(_downloading, None)
                         # mark ourselves as a user of this cache
                         writedata.markrepo(self.repo.path)
                     finally:
                         os.umask(oldumask)
                 def receivemissing(self, pipe, filename, node):
                     line = pipe.readline()[:-1]
                     if not line:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("connection closed early"))
                     size = int(line)
                     data = pipe.read(size)
                     if len(data) != size:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("only received %s of %s bytes")
                                                   % (len(data), size))
                     self.writedata.addremotefilelognode(filename, bin(node),
                                                          zlib.decompress(data))
                 def connect(self):
                     if self.cacheprocess:
                         cmd = "%s %s" % (self.cacheprocess, self.writedata._path)
                         self.remotecache.connect(cmd)
                     else:
                         # If no cache process is specified, we fake one that always
                         # returns cache misses.  This enables tests to run easily
                         # and may eventually allow us to be a drop in replacement
                         # for the largefiles extension.
                         class simplecache(object):
                             def __init__(self):
                                 self.missingids = []
                                 self.connected = True
                             def close(self):
                                 pass
                             def request(self, value, flush=True):
                                 lines = value.split("\n")
                                 if lines[0] != "get":
                                     return
                                 self.missingids = lines[2:-1]
                                 self.missingids.append('0')
                             def receiveline(self):
                                 if len(self.missingids) > 0:
                                     return self.missingids.pop(0)
                                 return None
                         self.remotecache = simplecache()
                 def close(self):
                     if fetches:
                         msg = ("%s files fetched over %d fetches - " +
                                "(%d misses, %0.2f%% hit ratio) over %0.2fs\n") % (
                                    fetched,
                                    fetches,
                                    fetchmisses,
                                    float(fetched - fetchmisses) / float(fetched) * 100.0,
                                    fetchcost)
                         if self.debugoutput:
                             self.ui.warn(msg)
                         self.ui.log("remotefilelog.prefetch", msg.replace("%", "%%"),
                             remotefilelogfetched=fetched,
                             remotefilelogfetches=fetches,
                             remotefilelogfetchmisses=fetchmisses,
                             remotefilelogfetchtime=fetchcost * 1000)
                     if self.remotecache.connected:
                         self.remotecache.close()
                 def prefetch(self, fileids, force=False, fetchdata=True,
                              fetchhistory=False):
                     """downloads the given file versions to the cache
                     """
                     repo = self.repo
                     idstocheck = []
                     for file, id in fileids:
                         # hack
                         # - we don't use .hgtags
                         # - workingctx produces ids with length 42,
                         #   which we skip since they aren't in any cache
                         if (file == '.hgtags' or len(id) == 42
                             or not repo.shallowmatch(file)):
                             continue
                         idstocheck.append((file, bin(id)))
                     datastore = self.datastore
                     historystore = self.historystore
                     if force:
                         datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                         historystore = metadatastore.unionmetadatastore(
                             *repo.sharedhistorystores)
                     missingids = set()
                     if fetchdata:
                         missingids.update(datastore.getmissing(idstocheck))
                     if fetchhistory:
                         missingids.update(historystore.getmissing(idstocheck))
                     # partition missing nodes into nullid and not-nullid so we can
                     # warn about this filtering potentially shadowing bugs.
                     nullids = len([None for unused, id in missingids if id == nullid])
                     if nullids:
                         missingids = [(f, id) for f, id in missingids if id != nullid]
                         repo.ui.develwarn(
                             ('remotefilelog not fetching %d null revs'
                              ' - this is likely hiding bugs' % nullids),
                             config='remotefilelog-ext')
                     if missingids:
                         global fetches, fetched, fetchcost
                         fetches += 1
                         # We want to be able to detect excess individual file downloads, so
                         # let's log that information for debugging.
                         if fetches >= 15 and fetches < 18:
                             if fetches == 15:
                                 fetchwarning = self.ui.config('remotefilelog',
                                                               'fetchwarning')
                                 if fetchwarning:
                                     self.ui.warn(fetchwarning + '\n')
                             self.logstacktrace()
                         missingids = [(file, hex(id)) for file, id in missingids]
                         fetched += len(missingids)
                         start = time.time()
                         missingids = self.request(missingids)
                         if missingids:
                             raise error.Abort(_("unable to download %d files") %
                                               len(missingids))
                         fetchcost += time.time() - start
                         self._lfsprefetch(fileids)
                 def _lfsprefetch(self, fileids):
                     if not _lfsmod or not util.safehasattr(
                             self.repo.svfs, 'lfslocalblobstore'):
                         return
                     if not _lfsmod.wrapper.candownload(self.repo):
                         return
                     pointers = []
                     store = self.repo.svfs.lfslocalblobstore
                     for file, id in fileids:
                         node = bin(id)
                         rlog = self.repo.file(file)
                         if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                             text = rlog.revision(node, raw=True)
                             p = _lfsmod.pointer.deserialize(text)
                             oid = p.oid()
                             if not store.has(oid):
                                 pointers.append(p)
                     if len(pointers) > 0:
                         self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                         assert all(store.has(p.oid()) for p in pointers)
                 def logstacktrace(self):
                     import traceback
                     self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s\n',
                                 ''.join(traceback.format_stack()))

hgext/remotefilelog/shallowutil.py

0 +3 -2

             # shallowutil.py -- remotefilelog utilities
             #
             # Copyright 2014 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import stat
             import struct
             import tempfile
             from mercurial.i18n import _
             from mercurial import (
                 error,
+                node,
                 pycompat,
                 revlog,
                 util,
             )
             from mercurial.utils import (
                 storageutil,
                 stringutil,
             )
             from . import constants
             if not pycompat.iswindows:
                 import grp
             def isenabled(repo):
                 """returns whether the repository is remotefilelog enabled or not"""
                 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
             def getcachekey(reponame, file, id):
-                pathhash = hashlib.sha1(file).hexdigest()
+                pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
-                pathhash = hashlib.sha1(file).hexdigest()
+                pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def getcachepath(ui, allowempty=False):
                 cachepath = ui.config("remotefilelog", "cachepath")
                 if not cachepath:
                     if allowempty:
                         return None
                     else:
                         raise error.Abort(_("could not find config option "
                                             "remotefilelog.cachepath"))
                 return util.expandpath(cachepath)
             def getcachepackpath(repo, category):
                 cachepath = getcachepath(repo.ui)
                 if category != constants.FILEPACK_CATEGORY:
                     return os.path.join(cachepath, repo.name, 'packs', category)
                 else:
                     return os.path.join(cachepath, repo.name, 'packs')
             def getlocalpackpath(base, category):
                 return os.path.join(base, 'packs', category)
             def createrevlogtext(text, copyfrom=None, copyrev=None):
                 """returns a string that matches the revlog contents in a
                 traditional revlog
                 """
                 meta = {}
                 if copyfrom or text.startswith('\1\n'):
                     if copyfrom:
                         meta['copy'] = copyfrom
                         meta['copyrev'] = copyrev
                     text = storageutil.packmeta(meta, text)
                 return text
             def parsemeta(text):
                 """parse mercurial filelog metadata"""
                 meta, size = storageutil.parsemeta(text)
                 if text.startswith('\1\n'):
                     s = text.index('\1\n', 2)
                     text = text[s + 2:]
                 return meta or {}, text
             def sumdicts(*dicts):
                 """Adds all the values of *dicts together into one dictionary. This assumes
                 the values in *dicts are all summable.
                 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
                 """
                 result = collections.defaultdict(lambda: 0)
                 for dict in dicts:
                     for k, v in dict.iteritems():
                         result[k] += v
                 return result
             def prefixkeys(dict, prefix):
                 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
                 result = {}
                 for k, v in dict.iteritems():
                     result[prefix + k] = v
                 return result
             def reportpackmetrics(ui, prefix, *stores):
                 dicts = [s.getmetrics() for s in stores]
                 dict = prefixkeys(sumdicts(*dicts), prefix + '_')
                 ui.log(prefix + "_packsizes", "", **pycompat.strkwargs(dict))
             def _parsepackmeta(metabuf):
                 """parse datapack meta, bytes (<metadata-list>) -> dict
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to convert some of them to other types like
                 integers, on their own.
                 raise ValueError if the data is corrupted
                 """
                 metadict = {}
                 offset = 0
                 buflen = len(metabuf)
                 while buflen - offset >= 3:
                     key = metabuf[offset]
                     offset += 1
                     metalen = struct.unpack_from('!H', metabuf, offset)[0]
                     offset += 2
                     if offset + metalen > buflen:
                         raise ValueError('corrupted metadata: incomplete buffer')
                     value = metabuf[offset:offset + metalen]
                     metadict[key] = value
                     offset += metalen
                 if offset != buflen:
                     raise ValueError('corrupted metadata: redundant data')
                 return metadict
             def _buildpackmeta(metadict):
                 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to serialize some of other types (like
                 integers) to strings before calling this function.
                 raise ProgrammingError when metadata key is illegal, or ValueError if
                 length limit is exceeded
                 """
                 metabuf = ''
                 for k, v in sorted((metadict or {}).iteritems()):
                     if len(k) != 1:
                         raise error.ProgrammingError('packmeta: illegal key: %s' % k)
                     if len(v) > 0xfffe:
                         raise ValueError('metadata value is too long: 0x%x > 0xfffe'
                                          % len(v))
                     metabuf += k
                     metabuf += struct.pack('!H', len(v))
                     metabuf += v
                 # len(metabuf) is guaranteed representable in 4 bytes, because there are
                 # only 256 keys, and for each value, len(value) <= 0xfffe.
                 return metabuf
             _metaitemtypes = {
                 constants.METAKEYFLAG: (int, pycompat.long),
                 constants.METAKEYSIZE: (int, pycompat.long),
             }
             def buildpackmeta(metadict):
                 """like _buildpackmeta, but typechecks metadict and normalize it.
                 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
                 and METAKEYFLAG will be dropped if its value is 0.
                 """
                 newmeta = {}
                 for k, v in (metadict or {}).iteritems():
                     expectedtype = _metaitemtypes.get(k, (bytes,))
                     if not isinstance(v, expectedtype):
                         raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
                     # normalize int to binary buffer
                     if int in expectedtype:
                         # optimization: remove flag if it's 0 to save space
                         if k == constants.METAKEYFLAG and v == 0:
                             continue
                         v = int2bin(v)
                     newmeta[k] = v
                 return _buildpackmeta(newmeta)
             def parsepackmeta(metabuf):
                 """like _parsepackmeta, but convert fields to desired types automatically.
                 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
                 integers.
                 """
                 metadict = _parsepackmeta(metabuf)
                 for k, v in metadict.iteritems():
                     if k in _metaitemtypes and int in _metaitemtypes[k]:
                         metadict[k] = bin2int(v)
                 return metadict
             def int2bin(n):
                 """convert a non-negative integer to raw binary buffer"""
                 buf = bytearray()
                 while n > 0:
                     buf.insert(0, n & 0xff)
                     n >>= 8
                 return bytes(buf)
             def bin2int(buf):
                 """the reverse of int2bin, convert a binary buffer to an integer"""
                 x = 0
                 for b in bytearray(buf):
                     x <<= 8
                     x |= b
                 return x
             def parsesizeflags(raw):
                 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
                 see remotefilelogserver.createfileblob for the format.
                 raise RuntimeError if the content is illformed.
                 """
                 flags = revlog.REVIDX_DEFAULT_FLAGS
                 size = None
                 try:
                     index = raw.index('\0')
                     header = raw[:index]
                     if header.startswith('v'):
                         # v1 and above, header starts with 'v'
                         if header.startswith('v1\n'):
                             for s in header.split('\n'):
                                 if s.startswith(constants.METAKEYSIZE):
                                     size = int(s[len(constants.METAKEYSIZE):])
                                 elif s.startswith(constants.METAKEYFLAG):
                                     flags = int(s[len(constants.METAKEYFLAG):])
                         else:
                             raise RuntimeError('unsupported remotefilelog header: %s'
                                                % header)
                     else:
                         # v0, str(int(size)) is the header
                         size = int(header)
                 except ValueError:
                     raise RuntimeError("unexpected remotefilelog header: illegal format")
                 if size is None:
                     raise RuntimeError("unexpected remotefilelog header: no size found")
                 return index + 1, size, flags
             def buildfileblobheader(size, flags, version=None):
                 """return the header of a remotefilelog blob.
                 see remotefilelogserver.createfileblob for the format.
                 approximately the reverse of parsesizeflags.
                 version could be 0 or 1, or None (auto decide).
                 """
                 # choose v0 if flags is empty, otherwise v1
                 if version is None:
                     version = int(bool(flags))
                 if version == 1:
                     header = ('v1\n%s%d\n%s%d'
                               % (constants.METAKEYSIZE, size,
                                  constants.METAKEYFLAG, flags))
                 elif version == 0:
                     if flags:
                         raise error.ProgrammingError('fileblob v0 does not support flag')
                     header = '%d' % size
                 else:
                     raise error.ProgrammingError('unknown fileblob version %d' % version)
                 return header
             def ancestormap(raw):
                 offset, size, flags = parsesizeflags(raw)
                 start = offset + size
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index('\0', start + 80)
                     currentnode = raw[start:(start + 20)]
                     p1 = raw[(start + 20):(start + 40)]
                     p2 = raw[(start + 40):(start + 60)]
                     linknode = raw[(start + 60):(start + 80)]
                     copyfrom = raw[(start + 80):divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return mapping
             def readfile(path):
                 f = open(path, 'rb')
                 try:
                     result = f.read()
                     # we should never have empty files
                     if not result:
                         os.remove(path)
                         raise IOError("empty file: %s" % path)
                     return result
                 finally:
                     f.close()
             def unlinkfile(filepath):
                 if pycompat.iswindows:
                     # On Windows, os.unlink cannnot delete readonly files
                     os.chmod(filepath, stat.S_IWUSR)
                 os.unlink(filepath)
             def renamefile(source, destination):
                 if pycompat.iswindows:
                     # On Windows, os.rename cannot rename readonly files
                     # and cannot overwrite destination if it exists
                     os.chmod(source, stat.S_IWUSR)
                     if os.path.isfile(destination):
                         os.chmod(destination, stat.S_IWUSR)
                         os.unlink(destination)
                 os.rename(source, destination)
             def writefile(path, content, readonly=False):
                 dirname, filename = os.path.split(path)
                 if not os.path.exists(dirname):
                     try:
                         os.makedirs(dirname)
                     except OSError as ex:
                         if ex.errno != errno.EEXIST:
                             raise
                 fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
                 os.close(fd)
                 try:
                     f = util.posixfile(temp, 'wb')
                     f.write(content)
                     f.close()
                     if readonly:
                         mode = 0o444
                     else:
                         # tempfiles are created with 0o600, so we need to manually set the
                         # mode.
                         oldumask = os.umask(0)
                         # there's no way to get the umask without modifying it, so set it
                         # back
                         os.umask(oldumask)
                         mode = ~oldumask
                     renamefile(temp, path)
                     os.chmod(path, mode)
                 except Exception:
                     try:
                         unlinkfile(temp)
                     except OSError:
                         pass
                     raise
             def sortnodes(nodes, parentfunc):
                 """Topologically sorts the nodes, using the parentfunc to find
                 the parents of nodes."""
                 nodes = set(nodes)
                 childmap = {}
                 parentmap = {}
                 roots = []
                 # Build a child and parent map
                 for n in nodes:
                     parents = [p for p in parentfunc(n) if p in nodes]
                     parentmap[n] = set(parents)
                     for p in parents:
                         childmap.setdefault(p, set()).add(n)
                     if not parents:
                         roots.append(n)
                 roots.sort()
                 # Process roots, adding children to the queue as they become roots
                 results = []
                 while roots:
                     n = roots.pop(0)
                     results.append(n)
                     if n in childmap:
                         children = childmap[n]
                         for c in children:
                             childparents = parentmap[c]
                             childparents.remove(n)
                             if len(childparents) == 0:
                                 # insert at the beginning, that way child nodes
                                 # are likely to be output immediately after their
                                 # parents.  This gives better compression results.
                                 roots.insert(0, c)
                 return results
             def readexactly(stream, n):
                 '''read n bytes from stream.read and abort if less was available'''
                 s = stream.read(n)
                 if len(s) < n:
                     raise error.Abort(_("stream ended unexpectedly"
                                        " (got %d bytes, expected %d)")
                                       % (len(s), n))
                 return s
             def readunpack(stream, fmt):
                 data = readexactly(stream, struct.calcsize(fmt))
                 return struct.unpack(fmt, data)
             def readpath(stream):
                 rawlen = readexactly(stream, constants.FILENAMESIZE)
                 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
                 return readexactly(stream, pathlen)
             def readnodelist(stream):
                 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
                 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(nodecount):
                     yield readexactly(stream, constants.NODESIZE)
             def readpathlist(stream):
                 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
                 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(pathcount):
                     yield readpath(stream)
             def getgid(groupname):
                 try:
                     gid = grp.getgrnam(groupname).gr_gid
                     return gid
                 except KeyError:
                     return None
             def setstickygroupdir(path, gid, warn=None):
                 if gid is None:
                     return
                 try:
                     os.chown(path, -1, gid)
                     os.chmod(path, 0o2775)
                 except (IOError, OSError) as ex:
                     if warn:
                         warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
             def mkstickygroupdir(ui, path):
                 """Creates the given directory (if it doesn't exist) and give it a
                 particular group with setgid enabled."""
                 gid = None
                 groupname = ui.config("remotefilelog", "cachegroup")
                 if groupname:
                     gid = getgid(groupname)
                     if gid is None:
                         ui.warn(_('unable to resolve group name: %s\n') % groupname)
                 # we use a single stat syscall to test the existence and mode / group bit
                 st = None
                 try:
                     st = os.stat(path)
                 except OSError:
                     pass
                 if st:
                     # exists
                     if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
                         # permission needs to be fixed
                         setstickygroupdir(path, gid, ui.warn)
                     return
                 oldumask = os.umask(0o002)
                 try:
                     missingdirs = [path]
                     path = os.path.dirname(path)
                     while path and not os.path.exists(path):
                         missingdirs.append(path)
                         path = os.path.dirname(path)
                     for path in reversed(missingdirs):
                         try:
                             os.mkdir(path)
                         except OSError as ex:
                             if ex.errno != errno.EEXIST:
                                 raise
                     for path in missingdirs:
                         setstickygroupdir(path, gid, ui.warn)
                 finally:
                     os.umask(oldumask)
             def getusername(ui):
                 try:
                     return stringutil.shortuser(ui.username())
                 except Exception:
                     return 'unknown'
             def getreponame(ui):
                 reponame = ui.config('paths', 'default')
                 if reponame:
                     return os.path.basename(reponame)
                 return "unknown"

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages