upstream/mercurial-mirror Commit - r40649:9769e0f6

py3: don't use dict.iterkeys()...

Pulkit Goyal -

r40649:9769e0f6 default

parent child

hgext/remotefilelog/basepack.py

0 +1 -1

             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import mmap
             import os
             import struct
             import time
             from mercurial.i18n import _
             from mercurial import (
                 policy,
                 pycompat,
                 util,
                 vfs as vfsmod,
             )
             from . import shallowutil
             osutil = policy.importmod(r'osutil')
             # The pack version supported by this implementation. This will need to be
             # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
             # changing any of the int sizes, changing the delta algorithm, etc.
             PACKVERSIONSIZE = 1
             INDEXVERSIONSIZE = 2
             FANOUTSTART = INDEXVERSIONSIZE
             # Constant that indicates a fanout table entry hasn't been filled in. (This does
             # not get serialized)
             EMPTYFANOUT = -1
             # The fanout prefix is the number of bytes that can be addressed by the fanout
             # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
             # look in the fanout table (which will be 2^8 entries long).
             SMALLFANOUTPREFIX = 1
             LARGEFANOUTPREFIX = 2
             # The number of entries in the index at which point we switch to a large fanout.
             # It is chosen to balance the linear scan through a sparse fanout, with the
             # size of the bisect in actual index.
             # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
             # bisect) with (8 step fanout scan + 1 step bisect)
             # 5 step bisect = log(2^16 / 8 / 255)  # fanout
             # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
             SMALLFANOUTCUTOFF = 2**16 / 8
             # The amount of time to wait between checking for new packs. This prevents an
             # exception when data is moved to a new pack after the process has already
             # loaded the pack list.
             REFRESHRATE = 0.1
             if pycompat.isposix:
                 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                 # The 'e' flag will be ignored on older versions of glibc.
                 PACKOPENMODE = 'rbe'
             else:
                 PACKOPENMODE = 'rb'
             class _cachebackedpacks(object):
                 def __init__(self, packs, cachesize):
                     self._packs = set(packs)
                     self._lrucache = util.lrucachedict(cachesize)
                     self._lastpack = None
                     # Avoid cold start of the cache by populating the most recent packs
                     # in the cache.
                     for i in reversed(range(min(cachesize, len(packs)))):
                         self._movetofront(packs[i])
                 def _movetofront(self, pack):
                     # This effectively makes pack the first entry in the cache.
                     self._lrucache[pack] = True
                 def _registerlastpackusage(self):
                     if self._lastpack is not None:
                         self._movetofront(self._lastpack)
                         self._lastpack = None
                 def add(self, pack):
                     self._registerlastpackusage()
                     # This method will mostly be called when packs are not in cache.
                     # Therefore, adding pack to the cache.
                     self._movetofront(pack)
                     self._packs.add(pack)
                 def __iter__(self):
                     self._registerlastpackusage()
                     # Cache iteration is based on LRU.
                     for pack in self._lrucache:
                         self._lastpack = pack
                         yield pack
                     cachedpacks = set(pack for pack in self._lrucache)
                     # Yield for paths not in the cache.
                     for pack in self._packs - cachedpacks:
                         self._lastpack = pack
                         yield pack
                     # Data not found in any pack.
                     self._lastpack = None
             class basepackstore(object):
                 # Default cache size limit for the pack files.
                 DEFAULTCACHESIZE = 100
                 def __init__(self, ui, path):
                     self.ui = ui
                     self.path = path
                     # lastrefesh is 0 so we'll immediately check for new packs on the first
                     # failure.
                     self.lastrefresh = 0
                     packs = []
                     for filepath, __, __ in self._getavailablepackfilessorted():
                         try:
                             pack = self.getpack(filepath)
                         except Exception as ex:
                             # An exception may be thrown if the pack file is corrupted
                             # somehow.  Log a warning but keep going in this case, just
                             # skipping this pack file.
                             #
                             # If this is an ENOENT error then don't even bother logging.
                             # Someone could have removed the file since we retrieved the
                             # list of paths.
                             if getattr(ex, 'errno', None) != errno.ENOENT:
                                 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
                             continue
                         packs.append(pack)
                     self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                 def _getavailablepackfiles(self):
                     """For each pack file (a index/data file combo), yields:
                       (full path without extension, mtime, size)
                     mtime will be the mtime of the index/data file (whichever is newer)
                     size is the combined size of index/data file
                     """
                     indexsuffixlen = len(self.INDEXSUFFIX)
                     packsuffixlen = len(self.PACKSUFFIX)
                     ids = set()
                     sizes = collections.defaultdict(lambda: 0)
                     mtimes = collections.defaultdict(lambda: [])
                     try:
                         for filename, type, stat in osutil.listdir(self.path, stat=True):
                             id = None
                             if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                 id = filename[:-indexsuffixlen]
                             elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                 id = filename[:-packsuffixlen]
                             # Since we expect to have two files corresponding to each ID
                             # (the index file and the pack file), we can yield once we see
                             # it twice.
                             if id:
                                 sizes[id] += stat.st_size # Sum both files' sizes together
                                 mtimes[id].append(stat.st_mtime)
                                 if id in ids:
                                     yield (os.path.join(self.path, id), max(mtimes[id]),
                                         sizes[id])
                                 else:
                                     ids.add(id)
                     except OSError as ex:
                         if ex.errno != errno.ENOENT:
                             raise
                 def _getavailablepackfilessorted(self):
                     """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                     yielding newest files first.
                     This is desirable, since it is more likely newer packfiles have more
                     desirable data.
                     """
                     files = []
                     for path, mtime, size in self._getavailablepackfiles():
                         files.append((mtime, size, path))
                     files = sorted(files, reverse=True)
                     for mtime, size, path in files:
                         yield path, mtime, size
                 def gettotalsizeandcount(self):
                     """Returns the total disk size (in bytes) of all the pack files in
                     this store, and the count of pack files.
                     (This might be smaller than the total size of the ``self.path``
                     directory, since this only considers fuly-writen pack files, and not
                     temporary files or other detritus on the directory.)
                     """
                     totalsize = 0
                     count = 0
                     for __, __, size in self._getavailablepackfiles():
                         totalsize += size
                         count += 1
                     return totalsize, count
                 def getmetrics(self):
                     """Returns metrics on the state of this store."""
                     size, count = self.gettotalsizeandcount()
                     return {
                         'numpacks': count,
                         'totalpacksize': size,
                     }
                 def getpack(self, path):
                     raise NotImplementedError()
                 def getmissing(self, keys):
                     missing = keys
                     for pack in self.packs:
                         missing = pack.getmissing(missing)
                         # Ensures better performance of the cache by keeping the most
                         # recently accessed pack at the beginning in subsequent iterations.
                         if not missing:
                             return missing
                     if missing:
                         for pack in self.refresh():
                             missing = pack.getmissing(missing)
                     return missing
                 def markledger(self, ledger, options=None):
                     for pack in self.packs:
                         pack.markledger(ledger)
                 def markforrefresh(self):
                     """Tells the store that there may be new pack files, so the next time it
                     has a lookup miss it should check for new files."""
                     self.lastrefresh = 0
                 def refresh(self):
                     """Checks for any new packs on disk, adds them to the main pack list,
                     and returns a list of just the new packs."""
                     now = time.time()
                     # If we experience a lot of misses (like in the case of getmissing() on
                     # new objects), let's only actually check disk for new stuff every once
                     # in a while. Generally this code path should only ever matter when a
                     # repack is going on in the background, and that should be pretty rare
                     # to have that happen twice in quick succession.
                     newpacks = []
                     if now > self.lastrefresh + REFRESHRATE:
                         self.lastrefresh = now
                         previous = set(p.path for p in self.packs)
                         for filepath, __, __ in self._getavailablepackfilessorted():
                             if filepath not in previous:
                                 newpack = self.getpack(filepath)
                                 newpacks.append(newpack)
                                 self.packs.add(newpack)
                     return newpacks
             class versionmixin(object):
                 # Mix-in for classes with multiple supported versions
                 VERSION = None
                 SUPPORTED_VERSIONS = [2]
                 def _checkversion(self, version):
                     if version in self.SUPPORTED_VERSIONS:
                         if self.VERSION is None:
                             # only affect this instance
                             self.VERSION = version
                         elif self.VERSION != version:
                             raise RuntimeError('inconsistent version: %s' % version)
                     else:
                         raise RuntimeError('unsupported version: %s' % version)
             class basepack(versionmixin):
                 # The maximum amount we should read via mmap before remmaping so the old
                 # pages can be released (100MB)
                 MAXPAGEDIN = 100 * 1024**2
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     self.path = path
                     self.packpath = path + self.PACKSUFFIX
                     self.indexpath = path + self.INDEXSUFFIX
                     self.indexsize = os.stat(self.indexpath).st_size
                     self.datasize = os.stat(self.packpath).st_size
                     self._index = None
                     self._data = None
                     self.freememory() # initialize the mmap
                     version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
                     self._checkversion(version)
                     version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
                     self._checkversion(version)
                     if 0b10000000 & config:
                         self.params = indexparams(LARGEFANOUTPREFIX, version)
                     else:
                         self.params = indexparams(SMALLFANOUTPREFIX, version)
                 @util.propertycache
                 def _fanouttable(self):
                     params = self.params
                     rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
                     fanouttable = []
                     for i in pycompat.xrange(0, params.fanoutcount):
                         loc = i * 4
                         fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
                         fanouttable.append(fanoutentry)
                     return fanouttable
                 @util.propertycache
                 def _indexend(self):
                     nodecount = struct.unpack_from('!Q', self._index,
                                                    self.params.indexstart - 8)[0]
                     return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                 def freememory(self):
                     """Unmap and remap the memory to free it up after known expensive
                     operations. Return True if self._data and self._index were reloaded.
                     """
                     if self._index:
                         if self._pagedin < self.MAXPAGEDIN:
                             return False
                         self._index.close()
                         self._data.close()
                     # TODO: use an opener/vfs to access these paths
                     with open(self.indexpath, PACKOPENMODE) as indexfp:
                         # memory-map the file, size 0 means whole file
                         self._index = mmap.mmap(indexfp.fileno(), 0,
                                                 access=mmap.ACCESS_READ)
                     with open(self.packpath, PACKOPENMODE) as datafp:
                         self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                     self._pagedin = 0
                     return True
                 def getmissing(self, keys):
                     raise NotImplementedError()
                 def markledger(self, ledger, options=None):
                     raise NotImplementedError()
                 def cleanup(self, ledger):
                     raise NotImplementedError()
                 def __iter__(self):
                     raise NotImplementedError()
                 def iterentries(self):
                     raise NotImplementedError()
             class mutablebasepack(versionmixin):
                 def __init__(self, ui, packdir, version=2):
                     self._checkversion(version)
                     # TODO(augie): make this configurable
                     self._compressor = 'GZ'
                     opener = vfsmod.vfs(packdir)
                     opener.createmode = 0o444
                     self.opener = opener
                     self.entries = {}
                     shallowutil.mkstickygroupdir(ui, packdir)
                     self.packfp, self.packpath = opener.mkstemp(
                         suffix=self.PACKSUFFIX + '-tmp')
                     self.idxfp, self.idxpath = opener.mkstemp(
                         suffix=self.INDEXSUFFIX + '-tmp')
                     self.packfp = os.fdopen(self.packfp, r'w+')
                     self.idxfp = os.fdopen(self.idxfp, r'w+')
                     self.sha = hashlib.sha1()
                     self._closed = False
                     # The opener provides no way of doing permission fixup on files created
                     # via mkstemp, so we must fix it ourselves. We can probably fix this
                     # upstream in vfs.mkstemp so we don't need to use the private method.
                     opener._fixfilemode(opener.join(self.packpath))
                     opener._fixfilemode(opener.join(self.idxpath))
                     # Write header
                     # TODO: make it extensible (ex: allow specifying compression algorithm,
                     # a flexible key/value header, delta algorithm, fanout size, etc)
                     versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
                     self.writeraw(versionbuf)
                 def __enter__(self):
                     return self
                 def __exit__(self, exc_type, exc_value, traceback):
                     if exc_type is None:
                         self.close()
                     else:
                         self.abort()
                 def abort(self):
                     # Unclean exit
                     self._cleantemppacks()
                 def writeraw(self, data):
                     self.packfp.write(data)
                     self.sha.update(data)
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     try:
                         sha = self.sha.hexdigest()
                         self.packfp.close()
                         self.writeindex()
                         if len(self.entries) == 0:
                             # Empty pack
                             self._cleantemppacks()
                             self._closed = True
                             return None
                         self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                         try:
                             self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                         except Exception as ex:
                             try:
                                 self.opener.unlink(sha + self.PACKSUFFIX)
                             except Exception:
                                 pass
                             # Throw exception 'ex' explicitly since a normal 'raise' would
                             # potentially throw an exception from the unlink cleanup.
                             raise ex
                     except Exception:
                         # Clean up temp packs in all exception cases
                         self._cleantemppacks()
                         raise
                     self._closed = True
                     result = self.opener.join(sha)
                     if ledger:
                         ledger.addcreated(result)
                     return result
                 def _cleantemppacks(self):
                     try:
                         self.opener.unlink(self.packpath)
                     except Exception:
                         pass
                     try:
                         self.opener.unlink(self.idxpath)
                     except Exception:
                         pass
                 def writeindex(self):
                     rawindex = ''
                     largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                     if largefanout:
                         params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                     else:
                         params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                     fanouttable = [EMPTYFANOUT] * params.fanoutcount
                     # Precompute the location of each entry
                     locations = {}
                     count = 0
-                    for node in sorted(self.entries.iterkeys()):
+                    for node in sorted(self.entries):
                         location = count * self.INDEXENTRYLENGTH
                         locations[node] = location
                         count += 1
                         # Must use [0] on the unpack result since it's always a tuple.
                         fanoutkey = struct.unpack(params.fanoutstruct,
                                                   node[:params.fanoutprefix])[0]
                         if fanouttable[fanoutkey] == EMPTYFANOUT:
                             fanouttable[fanoutkey] = location
                     rawfanouttable = ''
                     last = 0
                     for offset in fanouttable:
                         offset = offset if offset != EMPTYFANOUT else last
                         last = offset
                         rawfanouttable += struct.pack('!I', offset)
                     rawentrieslength = struct.pack('!Q', len(self.entries))
                     # The index offset is the it's location in the file. So after the 2 byte
                     # header and the fanouttable.
                     rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                     self._writeheader(params)
                     self.idxfp.write(rawfanouttable)
                     self.idxfp.write(rawentrieslength)
                     self.idxfp.write(rawindex)
                     self.idxfp.close()
                 def createindex(self, nodelocations):
                     raise NotImplementedError()
                 def _writeheader(self, indexparams):
                     # Index header
                     #    <version: 1 byte>
                     #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                     #    <unused: 7 bit> # future use (compression, delta format, etc)
                     config = 0
                     if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                         config = 0b10000000
                     self.idxfp.write(struct.pack('!BB', self.VERSION, config))
             class indexparams(object):
                 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
                              r'fanoutsize', r'indexstart')
                 def __init__(self, prefixsize, version):
                     self.fanoutprefix = prefixsize
                     # The struct pack format for fanout table location (i.e. the format that
                     # converts the node prefix into an integer location in the fanout
                     # table).
                     if prefixsize == SMALLFANOUTPREFIX:
                         self.fanoutstruct = '!B'
                     elif prefixsize == LARGEFANOUTPREFIX:
                         self.fanoutstruct = '!H'
                     else:
                         raise ValueError("invalid fanout prefix size: %s" % prefixsize)
                     # The number of fanout table entries
                     self.fanoutcount = 2**(prefixsize * 8)
                     # The total bytes used by the fanout table
                     self.fanoutsize = self.fanoutcount * 4
                     self.indexstart = FANOUTSTART + self.fanoutsize
                     # Skip the index length
                     self.indexstart += 8

hgext/remotefilelog/fileserverclient.py

0 +1 -1

             # fileserverclient.py - client for communicating with the cache process
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import io
             import os
             import threading
             import time
             import zlib
             from mercurial.i18n import _
             from mercurial.node import bin, hex, nullid
             from mercurial import (
                 error,
                 node,
                 pycompat,
                 revlog,
                 sshpeer,
                 util,
                 wireprotov1peer,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 metadatastore,
             )
             _sshv1peer = sshpeer.sshv1peer
             # Statistics for debugging
             fetchcost = 0
             fetches = 0
             fetched = 0
             fetchmisses = 0
             _lfsmod = None
             _downloading = _('downloading')
             def getcachekey(reponame, file, id):
                 pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
                 pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def peersetup(ui, peer):
                 class remotefilepeer(peer.__class__):
                     @wireprotov1peer.batchable
                     def x_rfl_getfile(self, file, node):
                         if not self.capable('x_rfl_getfile'):
                             raise error.Abort(
                                 'configured remotefile server does not support getfile')
                         f = wireprotov1peer.future()
                         yield {'file': file, 'node': node}, f
                         code, data = f.value.split('\0', 1)
                         if int(code):
                             raise error.LookupError(file, node, data)
                         yield data
                     @wireprotov1peer.batchable
                     def x_rfl_getflogheads(self, path):
                         if not self.capable('x_rfl_getflogheads'):
                             raise error.Abort('configured remotefile server does not '
                                               'support getflogheads')
                         f = wireprotov1peer.future()
                         yield {'path': path}, f
                         heads = f.value.split('\n') if f.value else []
                         yield heads
                     def _updatecallstreamopts(self, command, opts):
                         if command != 'getbundle':
                             return
                         if (constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                             not in self.capabilities()):
                             return
                         if not util.safehasattr(self, '_localrepo'):
                             return
                         if (constants.SHALLOWREPO_REQUIREMENT
                             not in self._localrepo.requirements):
                             return
                         bundlecaps = opts.get('bundlecaps')
                         if bundlecaps:
                             bundlecaps = [bundlecaps]
                         else:
                             bundlecaps = []
                         # shallow, includepattern, and excludepattern are a hacky way of
                         # carrying over data from the local repo to this getbundle
                         # command. We need to do it this way because bundle1 getbundle
                         # doesn't provide any other place we can hook in to manipulate
                         # getbundle args before it goes across the wire. Once we get rid
                         # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                         # do this more cleanly.
                         bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                         if self._localrepo.includepattern:
                             patterns = '\0'.join(self._localrepo.includepattern)
                             includecap = "includepattern=" + patterns
                             bundlecaps.append(includecap)
                         if self._localrepo.excludepattern:
                             patterns = '\0'.join(self._localrepo.excludepattern)
                             excludecap = "excludepattern=" + patterns
                             bundlecaps.append(excludecap)
                         opts['bundlecaps'] = ','.join(bundlecaps)
                     def _sendrequest(self, command, args, **opts):
                         self._updatecallstreamopts(command, args)
                         return super(remotefilepeer, self)._sendrequest(command, args,
                                                                         **opts)
                     def _callstream(self, command, **opts):
                         supertype = super(remotefilepeer, self)
                         if not util.safehasattr(supertype, '_sendrequest'):
                             self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                         return super(remotefilepeer, self)._callstream(command, **opts)
                 peer.__class__ = remotefilepeer
             class cacheconnection(object):
                 """The connection for communicating with the remote cache. Performs
                 gets and sets by communicating with an external process that has the
                 cache-specific implementation.
                 """
                 def __init__(self):
                     self.pipeo = self.pipei = self.pipee = None
                     self.subprocess = None
                     self.connected = False
                 def connect(self, cachecommand):
                     if self.pipeo:
                         raise error.Abort(_("cache connection already open"))
                     self.pipei, self.pipeo, self.pipee, self.subprocess = \
                         procutil.popen4(cachecommand)
                     self.connected = True
                 def close(self):
                     def tryclose(pipe):
                         try:
                             pipe.close()
                         except Exception:
                             pass
                     if self.connected:
                         try:
                             self.pipei.write("exit\n")
                         except Exception:
                             pass
                         tryclose(self.pipei)
                         self.pipei = None
                         tryclose(self.pipeo)
                         self.pipeo = None
                         tryclose(self.pipee)
                         self.pipee = None
                         try:
                             # Wait for process to terminate, making sure to avoid deadlock.
                             # See https://docs.python.org/2/library/subprocess.html for
                             # warnings about wait() and deadlocking.
                             self.subprocess.communicate()
                         except Exception:
                             pass
                         self.subprocess = None
                     self.connected = False
                 def request(self, request, flush=True):
                     if self.connected:
                         try:
                             self.pipei.write(request)
                             if flush:
                                 self.pipei.flush()
                         except IOError:
                             self.close()
                 def receiveline(self):
                     if not self.connected:
                         return None
                     try:
                         result = self.pipeo.readline()[:-1]
                         if not result:
                             self.close()
                     except IOError:
                         self.close()
                     return result
             def _getfilesbatch(
                     remote, receivemissing, progresstick, missed, idmap, batchsize):
                 # Over http(s), iterbatch is a streamy method and we can start
                 # looking at results early. This means we send one (potentially
                 # large) request, but then we show nice progress as we process
                 # file results, rather than showing chunks of $batchsize in
                 # progress.
                 #
                 # Over ssh, iterbatch isn't streamy because batch() wasn't
                 # explicitly designed as a streaming method. In the future we
                 # should probably introduce a streambatch() method upstream and
                 # use that for this.
                 with remote.commandexecutor() as e:
                     futures = []
                     for m in missed:
                         futures.append(e.callcommand('x_rfl_getfile', {
                             'file': idmap[m],
                             'node': m[-40:]
                         }))
                     for i, m in enumerate(missed):
                         r = futures[i].result()
                         futures[i] = None  # release memory
                         file_ = idmap[m]
                         node = m[-40:]
                         receivemissing(io.BytesIO('%d\n%s' % (len(r), r)), file_, node)
                         progresstick()
             def _getfiles_optimistic(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("x_rfl_getfiles")
                 i = 0
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 while i < len(missed):
                     # issue a batch of requests
                     start = i
                     end = min(len(missed), start + step)
                     i = end
                     for missingid in missed[start:end]:
                         # issue new request
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                     # receive batch results
                     for missingid in missed[start:end]:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         receivemissing(pipei, file, versionid)
                         progresstick()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             def _getfiles_threaded(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("getfiles")
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 def writer():
                     for missingid in missed:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                 writerthread = threading.Thread(target=writer)
                 writerthread.daemon = True
                 writerthread.start()
                 for missingid in missed:
                     versionid = missingid[-40:]
                     file = idmap[missingid]
                     receivemissing(pipei, file, versionid)
                     progresstick()
                 writerthread.join()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             class fileserverclient(object):
                 """A client for requesting files from the remote file server.
                 """
                 def __init__(self, repo):
                     ui = repo.ui
                     self.repo = repo
                     self.ui = ui
                     self.cacheprocess = ui.config("remotefilelog", "cacheprocess")
                     if self.cacheprocess:
                         self.cacheprocess = util.expandpath(self.cacheprocess)
                     # This option causes remotefilelog to pass the full file path to the
                     # cacheprocess instead of a hashed key.
                     self.cacheprocesspasspath = ui.configbool(
                         "remotefilelog", "cacheprocess.includepath")
                     self.debugoutput = ui.configbool("remotefilelog", "debug")
                     self.remotecache = cacheconnection()
                 def setstore(self, datastore, historystore, writedata, writehistory):
                     self.datastore = datastore
                     self.historystore = historystore
                     self.writedata = writedata
                     self.writehistory = writehistory
                 def _connect(self):
                     return self.repo.connectionpool.get(self.repo.fallbackpath)
                 def request(self, fileids):
                     """Takes a list of filename/node pairs and fetches them from the
                     server. Files are stored in the local cache.
                     A list of nodes that the server couldn't find is returned.
                     If the connection fails, an exception is raised.
                     """
                     if not self.remotecache.connected:
                         self.connect()
                     cache = self.remotecache
                     writedata = self.writedata
                     repo = self.repo
                     count = len(fileids)
                     request = "get\n%d\n" % count
                     idmap = {}
                     reponame = repo.name
                     for file, id in fileids:
                         fullid = getcachekey(reponame, file, id)
                         if self.cacheprocesspasspath:
                             request += file + '\0'
                         request += fullid + "\n"
                         idmap[fullid] = file
                     cache.request(request)
                     total = count
                     self.ui.progress(_downloading, 0, total=count)
                     missed = []
                     count = 0
                     while True:
                         missingid = cache.receiveline()
                         if not missingid:
                             missedset = set(missed)
-                            for missingid in idmap.iterkeys():
+                            for missingid in idmap:
                                 if not missingid in missedset:
                                     missed.append(missingid)
                             self.ui.warn(_("warning: cache connection closed early - " +
                                 "falling back to server\n"))
                             break
                         if missingid == "0":
                             break
                         if missingid.startswith("_hits_"):
                             # receive progress reports
                             parts = missingid.split("_")
                             count += int(parts[2])
                             self.ui.progress(_downloading, count, total=total)
                             continue
                         missed.append(missingid)
                     global fetchmisses
                     fetchmisses += len(missed)
                     count = [total - len(missed)]
                     fromcache = count[0]
                     self.ui.progress(_downloading, count[0], total=total)
                     self.ui.log("remotefilelog", "remote cache hit rate is %r of %r\n",
                                 count[0], total, hit=count[0], total=total)
                     oldumask = os.umask(0o002)
                     try:
                         # receive cache misses from master
                         if missed:
                             def progresstick():
                                 count[0] += 1
                                 self.ui.progress(_downloading, count[0], total=total)
                             # When verbose is true, sshpeer prints 'running ssh...'
                             # to stdout, which can interfere with some command
                             # outputs
                             verbose = self.ui.verbose
                             self.ui.verbose = False
                             try:
                                 with self._connect() as conn:
                                     remote = conn.peer
                                     if remote.capable(
                                             constants.NETWORK_CAP_LEGACY_SSH_GETFILES):
                                         if not isinstance(remote, _sshv1peer):
                                             raise error.Abort('remotefilelog requires ssh '
                                                               'servers')
                                         step = self.ui.configint('remotefilelog',
                                                                  'getfilesstep')
                                         getfilestype = self.ui.config('remotefilelog',
                                                                       'getfilestype')
                                         if getfilestype == 'threaded':
                                             _getfiles = _getfiles_threaded
                                         else:
                                             _getfiles = _getfiles_optimistic
                                         _getfiles(remote, self.receivemissing, progresstick,
                                                   missed, idmap, step)
                                     elif remote.capable("x_rfl_getfile"):
                                         if remote.capable('batch'):
                                             batchdefault = 100
                                         else:
                                             batchdefault = 10
                                         batchsize = self.ui.configint(
                                             'remotefilelog', 'batchsize', batchdefault)
                                         _getfilesbatch(
                                             remote, self.receivemissing, progresstick,
                                             missed, idmap, batchsize)
                                     else:
                                         raise error.Abort("configured remotefilelog server"
                                                          " does not support remotefilelog")
                                 self.ui.log("remotefilefetchlog",
                                             "Success\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                             except Exception:
                                 self.ui.log("remotefilefetchlog",
                                             "Fail\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                                 raise
                             finally:
                                 self.ui.verbose = verbose
                             # send to memcache
                             count[0] = len(missed)
                             request = "set\n%d\n%s\n" % (count[0], "\n".join(missed))
                             cache.request(request)
                         self.ui.progress(_downloading, None)
                         # mark ourselves as a user of this cache
                         writedata.markrepo(self.repo.path)
                     finally:
                         os.umask(oldumask)
                 def receivemissing(self, pipe, filename, node):
                     line = pipe.readline()[:-1]
                     if not line:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("connection closed early"))
                     size = int(line)
                     data = pipe.read(size)
                     if len(data) != size:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("only received %s of %s bytes")
                                                   % (len(data), size))
                     self.writedata.addremotefilelognode(filename, bin(node),
                                                          zlib.decompress(data))
                 def connect(self):
                     if self.cacheprocess:
                         cmd = "%s %s" % (self.cacheprocess, self.writedata._path)
                         self.remotecache.connect(cmd)
                     else:
                         # If no cache process is specified, we fake one that always
                         # returns cache misses.  This enables tests to run easily
                         # and may eventually allow us to be a drop in replacement
                         # for the largefiles extension.
                         class simplecache(object):
                             def __init__(self):
                                 self.missingids = []
                                 self.connected = True
                             def close(self):
                                 pass
                             def request(self, value, flush=True):
                                 lines = value.split("\n")
                                 if lines[0] != "get":
                                     return
                                 self.missingids = lines[2:-1]
                                 self.missingids.append('0')
                             def receiveline(self):
                                 if len(self.missingids) > 0:
                                     return self.missingids.pop(0)
                                 return None
                         self.remotecache = simplecache()
                 def close(self):
                     if fetches:
                         msg = ("%s files fetched over %d fetches - " +
                                "(%d misses, %0.2f%% hit ratio) over %0.2fs\n") % (
                                    fetched,
                                    fetches,
                                    fetchmisses,
                                    float(fetched - fetchmisses) / float(fetched) * 100.0,
                                    fetchcost)
                         if self.debugoutput:
                             self.ui.warn(msg)
                         self.ui.log("remotefilelog.prefetch", msg.replace("%", "%%"),
                             remotefilelogfetched=fetched,
                             remotefilelogfetches=fetches,
                             remotefilelogfetchmisses=fetchmisses,
                             remotefilelogfetchtime=fetchcost * 1000)
                     if self.remotecache.connected:
                         self.remotecache.close()
                 def prefetch(self, fileids, force=False, fetchdata=True,
                              fetchhistory=False):
                     """downloads the given file versions to the cache
                     """
                     repo = self.repo
                     idstocheck = []
                     for file, id in fileids:
                         # hack
                         # - we don't use .hgtags
                         # - workingctx produces ids with length 42,
                         #   which we skip since they aren't in any cache
                         if (file == '.hgtags' or len(id) == 42
                             or not repo.shallowmatch(file)):
                             continue
                         idstocheck.append((file, bin(id)))
                     datastore = self.datastore
                     historystore = self.historystore
                     if force:
                         datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                         historystore = metadatastore.unionmetadatastore(
                             *repo.sharedhistorystores)
                     missingids = set()
                     if fetchdata:
                         missingids.update(datastore.getmissing(idstocheck))
                     if fetchhistory:
                         missingids.update(historystore.getmissing(idstocheck))
                     # partition missing nodes into nullid and not-nullid so we can
                     # warn about this filtering potentially shadowing bugs.
                     nullids = len([None for unused, id in missingids if id == nullid])
                     if nullids:
                         missingids = [(f, id) for f, id in missingids if id != nullid]
                         repo.ui.develwarn(
                             ('remotefilelog not fetching %d null revs'
                              ' - this is likely hiding bugs' % nullids),
                             config='remotefilelog-ext')
                     if missingids:
                         global fetches, fetched, fetchcost
                         fetches += 1
                         # We want to be able to detect excess individual file downloads, so
                         # let's log that information for debugging.
                         if fetches >= 15 and fetches < 18:
                             if fetches == 15:
                                 fetchwarning = self.ui.config('remotefilelog',
                                                               'fetchwarning')
                                 if fetchwarning:
                                     self.ui.warn(fetchwarning + '\n')
                             self.logstacktrace()
                         missingids = [(file, hex(id)) for file, id in missingids]
                         fetched += len(missingids)
                         start = time.time()
                         missingids = self.request(missingids)
                         if missingids:
                             raise error.Abort(_("unable to download %d files") %
                                               len(missingids))
                         fetchcost += time.time() - start
                         self._lfsprefetch(fileids)
                 def _lfsprefetch(self, fileids):
                     if not _lfsmod or not util.safehasattr(
                             self.repo.svfs, 'lfslocalblobstore'):
                         return
                     if not _lfsmod.wrapper.candownload(self.repo):
                         return
                     pointers = []
                     store = self.repo.svfs.lfslocalblobstore
                     for file, id in fileids:
                         node = bin(id)
                         rlog = self.repo.file(file)
                         if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                             text = rlog.revision(node, raw=True)
                             p = _lfsmod.pointer.deserialize(text)
                             oid = p.oid()
                             if not store.has(oid):
                                 pointers.append(p)
                     if len(pointers) > 0:
                         self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                         assert all(store.has(p.oid()) for p in pointers)
                 def logstacktrace(self):
                     import traceback
                     self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s\n',
                                 ''.join(traceback.format_stack()))

hgext/remotefilelog/remotefilelog.py

0 +1 -1

             # remotefilelog.py - filelog implementation where filelog history is stored
             #                    remotely
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from mercurial.node import bin, nullid
             from mercurial.i18n import _
             from mercurial import (
                 ancestor,
                 error,
                 mdiff,
                 revlog,
             )
             from mercurial.utils import storageutil
             from . import (
                 constants,
                 fileserverclient,
                 shallowutil,
             )
             class remotefilelognodemap(object):
                 def __init__(self, filename, store):
                     self._filename = filename
                     self._store = store
                 def __contains__(self, node):
                     missing = self._store.getmissing([(self._filename, node)])
                     return not bool(missing)
                 def __get__(self, node):
                     if node not in self:
                         raise KeyError(node)
                     return node
             class remotefilelog(object):
                 _generaldelta = True
                 def __init__(self, opener, path, repo):
                     self.opener = opener
                     self.filename = path
                     self.repo = repo
                     self.nodemap = remotefilelognodemap(self.filename, repo.contentstore)
                     self.version = 1
                 def read(self, node):
                     """returns the file contents at this node"""
                     t = self.revision(node)
                     if not t.startswith('\1\n'):
                         return t
                     s = t.index('\1\n', 2)
                     return t[s + 2:]
                 def add(self, text, meta, transaction, linknode, p1=None, p2=None):
                     hashtext = text
                     # hash with the metadata, like in vanilla filelogs
                     hashtext = shallowutil.createrevlogtext(text, meta.get('copy'),
                                                             meta.get('copyrev'))
                     node = storageutil.hashrevisionsha1(hashtext, p1, p2)
                     return self.addrevision(hashtext, transaction, linknode, p1, p2,
                                             node=node)
                 def _createfileblob(self, text, meta, flags, p1, p2, node, linknode):
                     # text passed to "_createfileblob" does not include filelog metadata
                     header = shallowutil.buildfileblobheader(len(text), flags)
                     data = "%s\0%s" % (header, text)
                     realp1 = p1
                     copyfrom = ""
                     if meta and 'copy' in meta:
                         copyfrom = meta['copy']
                         realp1 = bin(meta['copyrev'])
                     data += "%s%s%s%s%s\0" % (node, realp1, p2, linknode, copyfrom)
                     visited = set()
                     pancestors = {}
                     queue = []
                     if realp1 != nullid:
                         p1flog = self
                         if copyfrom:
                             p1flog = remotefilelog(self.opener, copyfrom, self.repo)
                         pancestors.update(p1flog.ancestormap(realp1))
                         queue.append(realp1)
                         visited.add(realp1)
                     if p2 != nullid:
                         pancestors.update(self.ancestormap(p2))
                         queue.append(p2)
                         visited.add(p2)
                     ancestortext = ""
                     # add the ancestors in topological order
                     while queue:
                         c = queue.pop(0)
                         pa1, pa2, ancestorlinknode, pacopyfrom = pancestors[c]
                         pacopyfrom = pacopyfrom or ''
                         ancestortext += "%s%s%s%s%s\0" % (
                             c, pa1, pa2, ancestorlinknode, pacopyfrom)
                         if pa1 != nullid and pa1 not in visited:
                             queue.append(pa1)
                             visited.add(pa1)
                         if pa2 != nullid and pa2 not in visited:
                             queue.append(pa2)
                             visited.add(pa2)
                     data += ancestortext
                     return data
                 def addrevision(self, text, transaction, linknode, p1, p2, cachedelta=None,
                                 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS):
                     # text passed to "addrevision" includes hg filelog metadata header
                     if node is None:
                         node = storageutil.hashrevisionsha1(text, p1, p2)
                     meta, metaoffset = storageutil.parsemeta(text)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     return self.addrawrevision(rawtext, transaction, linknode, p1, p2,
                                                node, flags, cachedelta,
                                                _metatuple=(meta, metaoffset))
                 def addrawrevision(self, rawtext, transaction, linknode, p1, p2, node,
                                    flags, cachedelta=None, _metatuple=None):
                     if _metatuple:
                         # _metatuple: used by "addrevision" internally by remotefilelog
                         # meta was parsed confidently
                         meta, metaoffset = _metatuple
                     else:
                         # not from self.addrevision, but something else (repo._filecommit)
                         # calls addrawrevision directly. remotefilelog needs to get and
                         # strip filelog metadata.
                         # we don't have confidence about whether rawtext contains filelog
                         # metadata or not (flag processor could replace it), so we just
                         # parse it as best-effort.
                         # in LFS (flags != 0)'s case, the best way is to call LFS code to
                         # get the meta information, instead of storageutil.parsemeta.
                         meta, metaoffset = storageutil.parsemeta(rawtext)
                     if flags != 0:
                         # when flags != 0, be conservative and do not mangle rawtext, since
                         # a read flag processor expects the text not being mangled at all.
                         metaoffset = 0
                     if metaoffset:
                         # remotefilelog fileblob stores copy metadata in its ancestortext,
                         # not its main blob. so we need to remove filelog metadata
                         # (containing copy information) from text.
                         blobtext = rawtext[metaoffset:]
                     else:
                         blobtext = rawtext
                     data = self._createfileblob(blobtext, meta, flags, p1, p2, node,
                                                 linknode)
                     self.repo.contentstore.addremotefilelognode(self.filename, node, data)
                     return node
                 def renamed(self, node):
                     ancestors = self.repo.metadatastore.getancestors(self.filename, node)
                     p1, p2, linknode, copyfrom = ancestors[node]
                     if copyfrom:
                         return (copyfrom, p1)
                     return False
                 def size(self, node):
                     """return the size of a given revision"""
                     return len(self.read(node))
                 rawsize = size
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     if node == nullid:
                         return True
                     nodetext = self.read(node)
                     return nodetext != text
                 def __nonzero__(self):
                     return True
                 def __len__(self):
                     if self.filename == '.hgtags':
                         # The length of .hgtags is used to fast path tag checking.
                         # remotefilelog doesn't support .hgtags since the entire .hgtags
                         # history is needed.  Use the excludepattern setting to make
                         # .hgtags a normal filelog.
                         return 0
                     raise RuntimeError("len not supported")
                 def empty(self):
                     return False
                 def flags(self, node):
                     if isinstance(node, int):
                         raise error.ProgrammingError(
                             'remotefilelog does not accept integer rev for flags')
                     store = self.repo.contentstore
                     return store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
                 def parents(self, node):
                     if node == nullid:
                         return nullid, nullid
                     ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
                     p1, p2, linknode, copyfrom = ancestormap[node]
                     if copyfrom:
                         p1 = nullid
                     return p1, p2
                 def parentrevs(self, rev):
                     # TODO(augie): this is a node and should be a rev, but for now
                     # nothing in core seems to actually break.
                     return self.parents(rev)
                 def linknode(self, node):
                     ancestormap = self.repo.metadatastore.getancestors(self.filename, node)
                     p1, p2, linknode, copyfrom = ancestormap[node]
                     return linknode
                 def linkrev(self, node):
                     return self.repo.unfiltered().changelog.rev(self.linknode(node))
                 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
                                   assumehaveparentrevisions=False, deltaprevious=False,
                                   deltamode=None):
                     # we don't use any of these parameters here
                     del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
                     del deltamode
                     prevnode = None
                     for node in nodes:
                         p1, p2 = self.parents(node)
                         if prevnode is None:
                             basenode = prevnode = p1
                         if basenode == node:
                             basenode = nullid
                         if basenode != nullid:
                             revision = None
                             delta = self.revdiff(basenode, node)
                         else:
                             revision = self.revision(node, raw=True)
                             delta = None
                         yield revlog.revlogrevisiondelta(
                             node=node,
                             p1node=p1,
                             p2node=p2,
                             linknode=self.linknode(node),
                             basenode=basenode,
                             flags=self.flags(node),
                             baserevisionsize=None,
                             revision=revision,
                             delta=delta,
                             )
                 def revdiff(self, node1, node2):
                     return mdiff.textdiff(self.revision(node1, raw=True),
                                           self.revision(node2, raw=True))
                 def lookup(self, node):
                     if len(node) == 40:
                         node = bin(node)
                     if len(node) != 20:
                         raise error.LookupError(node, self.filename,
                                                 _('invalid lookup input'))
                     return node
                 def rev(self, node):
                     # This is a hack to make TortoiseHG work.
                     return node
                 def node(self, rev):
                     # This is a hack.
                     if isinstance(rev, int):
                         raise error.ProgrammingError(
                             'remotefilelog does not convert integer rev to node')
                     return rev
                 def revision(self, node, raw=False):
                     """returns the revlog contents at this node.
                     this includes the meta data traditionally included in file revlogs.
                     this is generally only used for bundling and communicating with vanilla
                     hg clients.
                     """
                     if node == nullid:
                         return ""
                     if len(node) != 20:
                         raise error.LookupError(node, self.filename,
                                                 _('invalid revision input'))
                     store = self.repo.contentstore
                     rawtext = store.get(self.filename, node)
                     if raw:
                         return rawtext
                     flags = store.getmeta(self.filename, node).get(constants.METAKEYFLAG, 0)
                     if flags == 0:
                         return rawtext
                     text, verifyhash = self._processflags(rawtext, flags, 'read')
                     return text
                 def _processflags(self, text, flags, operation, raw=False):
                     # mostly copied from hg/mercurial/revlog.py
                     validatehash = True
                     orderedflags = revlog.REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         if flag & flags:
                             vhash = True
                             if flag not in revlog._flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise revlog.RevlogError(message)
                             readfunc, writefunc, rawfunc = revlog._flagprocessors[flag]
                             if raw:
                                 vhash = rawfunc(self, text)
                             elif operation == 'read':
                                 text, vhash = readfunc(self, text)
                             elif operation == 'write':
                                 text, vhash = writefunc(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def _read(self, id):
                     """reads the raw file blob from disk, cache, or server"""
                     fileservice = self.repo.fileservice
                     localcache = fileservice.localcache
                     cachekey = fileserverclient.getcachekey(self.repo.name, self.filename,
                                                             id)
                     try:
                         return localcache.read(cachekey)
                     except KeyError:
                         pass
                     localkey = fileserverclient.getlocalkey(self.filename, id)
                     localpath = os.path.join(self.localpath, localkey)
                     try:
                         return shallowutil.readfile(localpath)
                     except IOError:
                         pass
                     fileservice.prefetch([(self.filename, id)])
                     try:
                         return localcache.read(cachekey)
                     except KeyError:
                         pass
                     raise error.LookupError(id, self.filename, _('no node'))
                 def ancestormap(self, node):
                     return self.repo.metadatastore.getancestors(self.filename, node)
                 def ancestor(self, a, b):
                     if a == nullid or b == nullid:
                         return nullid
                     revmap, parentfunc = self._buildrevgraph(a, b)
                     nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
                     ancs = ancestor.ancestors(parentfunc, revmap[a], revmap[b])
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(nodemap.__getitem__, ancs))
                     return nullid
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     if a == nullid or b == nullid:
                         return nullid
                     revmap, parentfunc = self._buildrevgraph(a, b)
                     nodemap = dict(((v, k) for (k, v) in revmap.iteritems()))
                     ancs = ancestor.commonancestorsheads(parentfunc, revmap[a], revmap[b])
                     return map(nodemap.__getitem__, ancs)
                 def _buildrevgraph(self, a, b):
                     """Builds a numeric revision graph for the given two nodes.
                     Returns a node->rev map and a rev->[revs] parent function.
                     """
                     amap = self.ancestormap(a)
                     bmap = self.ancestormap(b)
                     # Union the two maps
                     parentsmap = collections.defaultdict(list)
                     allparents = set()
                     for mapping in (amap, bmap):
                         for node, pdata in mapping.iteritems():
                             parents = parentsmap[node]
                             p1, p2, linknode, copyfrom = pdata
                             # Don't follow renames (copyfrom).
                             # remotefilectx.ancestor does that.
                             if p1 != nullid and not copyfrom:
                                 parents.append(p1)
                                 allparents.add(p1)
                             if p2 != nullid:
                                 parents.append(p2)
                                 allparents.add(p2)
                     # Breadth first traversal to build linkrev graph
                     parentrevs = collections.defaultdict(list)
                     revmap = {}
-                    queue = collections.deque(((None, n) for n in parentsmap.iterkeys()
+                    queue = collections.deque(((None, n) for n in parentsmap
                              if n not in allparents))
                     while queue:
                         prevrev, current = queue.pop()
                         if current in revmap:
                             if prevrev:
                                 parentrevs[prevrev].append(revmap[current])
                             continue
                         # Assign linkrevs in reverse order, so start at
                         # len(parentsmap) and work backwards.
                         currentrev = len(parentsmap) - len(revmap) - 1
                         revmap[current] = currentrev
                         if prevrev:
                             parentrevs[prevrev].append(currentrev)
                         for parent in parentsmap.get(current):
                             queue.appendleft((currentrev, parent))
                     return revmap, parentrevs.__getitem__
                 def strip(self, minlink, transaction):
                     pass
                 # misc unused things
                 def files(self):
                     return []
                 def checksize(self):
                     return 0, 0

hgext/remotefilelog/repack.py

0 +2 -2

             from __future__ import absolute_import
             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 short,
             )
             from mercurial import (
                 encoding,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 extutil,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod(r'osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
             if util.safehasattr(util, '_hgexecutable'):
                 # Before 5be286db
                 _hgexecutable = util.hgexecutable
             else:
                 from mercurial.utils import procutil
                 _hgexecutable = procutil.hgexecutable
             def backgroundrepack(repo, incremental=True, packsonly=False):
                 cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
                 msg = _("(running background repack)\n")
                 if incremental:
                     cmd.append('--incremental')
                     msg = _("(running background incremental repack)\n")
                 if packsonly:
                     cmd.append('--packsonly')
                 repo.ui.warn(msg)
                 procutil.runbgcommand(cmd, encoding.environ)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(
                         *repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores,
                         allowincomplete=True)
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _runrepack(repo, datasource, historysource, packpath,
                                constants.FILEPACK_CATEGORY, options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                                     *shstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, spackpath,
                                constants.TREEPACK_CATEGORY, options=options)
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                                     *ldstores,
                                     allowincomplete=True)
                     historysource = metadatastore.unionmetadatastore(
                                     *lhstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, lpackpath,
                                constants.TREEPACK_CATEGORY, options=options)
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _incrementalrepack(repo,
                                        repo.shareddatastores,
                                        repo.sharedhistorystores,
                                        packpath,
                                        constants.FILEPACK_CATEGORY,
                                        options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(repo,
                                        sdstores,
                                        shstores,
                                        spackpath,
                                        constants.TREEPACK_CATEGORY,
                                        options=options)
                     # Repack the local manifest store
                     _incrementalrepack(repo,
                                        ldstores,
                                        lhstores,
                                        lpackpath,
                                        constants.TREEPACK_CATEGORY,
                                        allowincompletedata=True,
                                        options=options)
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(repo,
                                                         constants.TREEPACK_CATEGORY)
                 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
                                                         constants.TREEPACK_CATEGORY)
                 return ((localpackpath, localdatastores, localhistorystores),
                         (sharedpackpath, shareddatastores, sharedhistorystores))
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes("packs", "maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [".datapack", ".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1]
                                                     in VALIDEXTS))])
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug('removing oversize packfile %s (%s)\n' %
                                       (path, util.bytecount(os.stat(path).st_size)))
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(repo, datastore, historystore, packpath, category,
                     allowincompletedata=False, options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(packpath,
                     _computeincrementaldatapack(repo.ui, files),
                     datapack.datapack)
                 datapacks.extend(s for s in datastore
                                  if not isinstance(s, datapack.datapackstore))
                 historypacks = _topacks(packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack)
                 historypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
                                         historypack.INDEXSUFFIX)
                 allhistorypacks = _topacks(packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack)
                 allhistorypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 _runrepack(repo,
                            contentstore.unioncontentstore(
                                *datapacks,
                                allowincomplete=allowincompletedata),
                            metadatastore.unionmetadatastore(
                                *historypacks,
                                allowincomplete=True),
                            packpath, category,
                            fullhistory=metadatastore.unionmetadatastore(
                                *allhistorypacks,
                                allowincomplete=True),
                             options=options)
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'data.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'data.generations'),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'data.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'data.repackmaxpacksize'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'data.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'history.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'history.generations', ['100MB']),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'history.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'history.repackmaxpacksize', '400MB'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'history.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = set(fn for fn, mode, stat in files)
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[:-len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
                                             reverse=True))
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in pycompat.xrange(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts['repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts['gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(sorted(generations[i], reverse=True,
                                                key=lambda x: sizes[x]))
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (repacksize < opts['repacksizelimit'] and genpacks and
                        len(chosenpacks) < opts['maxrepackpacks']):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(repo, data, history, packpath, category, fullhistory=None,
                            options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint('remotefilelog', 'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(repo, data, history, fullhistory, category,
                                   gc=garbagecollect, isold=isold, options=options)
                 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(_("skipping repack - another repack "
                                                          "is already running"))
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = ['.', 'draft()', 'parents(draft())']
                 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 revs = '+'.join(revs)
                 revs = ['sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
                     if repo[r].p1().rev() in processed:
                         # if the direct parent has already been processed
                         # then we only need to process the delta
                         m = repo[r].manifestctx().readdelta()
                     else:
                         # otherwise take the manifest and diff it
                         # with the previous manifest if one exists
                         if lastmanifest:
                             m = repo[r].manifest().diff(lastmanifest)
                         else:
                             m = repo[r].manifest()
                     lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.iteritems():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.iteritems():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker(object):
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(self, repo, data, history, fullhistory, category, gc=False,
                              isold=None, options=None):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError("Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n : (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
                                        _('repacking %s') % self.repo.origroot, timeout=0):
                         self.repo.hook('prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug("%s: orphan chain: %s\n" % (filename,
                             ", ".join([short(s) for s in orphans])))
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
                     nodes = filter(lambda node: node not in orphans, nodes)
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ui.progress(_("repacking data"), count, unit=self.unit,
                                     total=len(byfile))
                         ancestors = {}
-                        nodes = list(node for node in entries.iterkeys())
+                        nodes = list(node for node in entries)
                         nohistory = []
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             ui.progress(_("building history"), i, unit='nodes',
                                         total=len(nodes))
                             try:
                                 ancestors.update(self.fullhistory.getancestors(filename,
                                     node, known=ancestors))
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         ui.progress(_("building history"), None)
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug('repackdata: %d nodes without history\n' %
                                      len(nohistory))
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
                         orderednodes = filter(lambda node: node in nodes, orderednodes)
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if ((filename, node) not in self.keepkeys and
                                     self.isold(self.repo, filename, node)):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         for i, node in enumerate(orderednodes):
                             ui.progress(_("processing nodes"), i, unit='nodes',
                                         total=len(orderednodes))
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = nullid, 0
                                 deltabases[node] = (nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool('repack', 'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(ui, filename, orderednodes,
                                 orphans, deltabases)
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             meta = None
                             if deltabase != nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (deltabasename != filename or origdeltabase != deltabase
                                     or size is None):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         ui.progress(_("processing nodes"), None)
                         count += 1
                     ui.progress(_("repacking data"), None)
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ancestors = {}
-                        nodes = list(node for node in entries.iterkeys())
+                        nodes = list(node for node in entries)
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(self.history.getancestors(filename, node,
                                                                        known=ancestors))
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         count += 1
                         ui.progress(_("repacking history"), count, unit=self.unit,
                                     total=len(byfile))
                     ui.progress(_("repacking history"), None)
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != nullid:
                             parents.append(p1)
                         if p2 != nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger(object):
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry(object):
                 """Simple class representing a single revision entry in the repackledger.
                 """
                 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
                              r'datarepacked', r'historyrepacked', r'gced')
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if util.safehasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages