upstream/mercurial-mirror Commit - r40650:aa588bf4

py3: add b suffix to make sure file is opened in bytes mode...

Pulkit Goyal -

r40650:aa588bf4 default

parent child

hgext/remotefilelog/basepack.py

0 +2 -2

             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import mmap
             import os
             import struct
             import time
             from mercurial.i18n import _
             from mercurial import (
                 policy,
                 pycompat,
                 util,
                 vfs as vfsmod,
             )
             from . import shallowutil
             osutil = policy.importmod(r'osutil')
             # The pack version supported by this implementation. This will need to be
             # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
             # changing any of the int sizes, changing the delta algorithm, etc.
             PACKVERSIONSIZE = 1
             INDEXVERSIONSIZE = 2
             FANOUTSTART = INDEXVERSIONSIZE
             # Constant that indicates a fanout table entry hasn't been filled in. (This does
             # not get serialized)
             EMPTYFANOUT = -1
             # The fanout prefix is the number of bytes that can be addressed by the fanout
             # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
             # look in the fanout table (which will be 2^8 entries long).
             SMALLFANOUTPREFIX = 1
             LARGEFANOUTPREFIX = 2
             # The number of entries in the index at which point we switch to a large fanout.
             # It is chosen to balance the linear scan through a sparse fanout, with the
             # size of the bisect in actual index.
             # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
             # bisect) with (8 step fanout scan + 1 step bisect)
             # 5 step bisect = log(2^16 / 8 / 255)  # fanout
             # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
             SMALLFANOUTCUTOFF = 2**16 / 8
             # The amount of time to wait between checking for new packs. This prevents an
             # exception when data is moved to a new pack after the process has already
             # loaded the pack list.
             REFRESHRATE = 0.1
             if pycompat.isposix:
                 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                 # The 'e' flag will be ignored on older versions of glibc.
                 PACKOPENMODE = 'rbe'
             else:
                 PACKOPENMODE = 'rb'
             class _cachebackedpacks(object):
                 def __init__(self, packs, cachesize):
                     self._packs = set(packs)
                     self._lrucache = util.lrucachedict(cachesize)
                     self._lastpack = None
                     # Avoid cold start of the cache by populating the most recent packs
                     # in the cache.
                     for i in reversed(range(min(cachesize, len(packs)))):
                         self._movetofront(packs[i])
                 def _movetofront(self, pack):
                     # This effectively makes pack the first entry in the cache.
                     self._lrucache[pack] = True
                 def _registerlastpackusage(self):
                     if self._lastpack is not None:
                         self._movetofront(self._lastpack)
                         self._lastpack = None
                 def add(self, pack):
                     self._registerlastpackusage()
                     # This method will mostly be called when packs are not in cache.
                     # Therefore, adding pack to the cache.
                     self._movetofront(pack)
                     self._packs.add(pack)
                 def __iter__(self):
                     self._registerlastpackusage()
                     # Cache iteration is based on LRU.
                     for pack in self._lrucache:
                         self._lastpack = pack
                         yield pack
                     cachedpacks = set(pack for pack in self._lrucache)
                     # Yield for paths not in the cache.
                     for pack in self._packs - cachedpacks:
                         self._lastpack = pack
                         yield pack
                     # Data not found in any pack.
                     self._lastpack = None
             class basepackstore(object):
                 # Default cache size limit for the pack files.
                 DEFAULTCACHESIZE = 100
                 def __init__(self, ui, path):
                     self.ui = ui
                     self.path = path
                     # lastrefesh is 0 so we'll immediately check for new packs on the first
                     # failure.
                     self.lastrefresh = 0
                     packs = []
                     for filepath, __, __ in self._getavailablepackfilessorted():
                         try:
                             pack = self.getpack(filepath)
                         except Exception as ex:
                             # An exception may be thrown if the pack file is corrupted
                             # somehow.  Log a warning but keep going in this case, just
                             # skipping this pack file.
                             #
                             # If this is an ENOENT error then don't even bother logging.
                             # Someone could have removed the file since we retrieved the
                             # list of paths.
                             if getattr(ex, 'errno', None) != errno.ENOENT:
                                 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
                             continue
                         packs.append(pack)
                     self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                 def _getavailablepackfiles(self):
                     """For each pack file (a index/data file combo), yields:
                       (full path without extension, mtime, size)
                     mtime will be the mtime of the index/data file (whichever is newer)
                     size is the combined size of index/data file
                     """
                     indexsuffixlen = len(self.INDEXSUFFIX)
                     packsuffixlen = len(self.PACKSUFFIX)
                     ids = set()
                     sizes = collections.defaultdict(lambda: 0)
                     mtimes = collections.defaultdict(lambda: [])
                     try:
                         for filename, type, stat in osutil.listdir(self.path, stat=True):
                             id = None
                             if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                 id = filename[:-indexsuffixlen]
                             elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                 id = filename[:-packsuffixlen]
                             # Since we expect to have two files corresponding to each ID
                             # (the index file and the pack file), we can yield once we see
                             # it twice.
                             if id:
                                 sizes[id] += stat.st_size # Sum both files' sizes together
                                 mtimes[id].append(stat.st_mtime)
                                 if id in ids:
                                     yield (os.path.join(self.path, id), max(mtimes[id]),
                                         sizes[id])
                                 else:
                                     ids.add(id)
                     except OSError as ex:
                         if ex.errno != errno.ENOENT:
                             raise
                 def _getavailablepackfilessorted(self):
                     """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                     yielding newest files first.
                     This is desirable, since it is more likely newer packfiles have more
                     desirable data.
                     """
                     files = []
                     for path, mtime, size in self._getavailablepackfiles():
                         files.append((mtime, size, path))
                     files = sorted(files, reverse=True)
                     for mtime, size, path in files:
                         yield path, mtime, size
                 def gettotalsizeandcount(self):
                     """Returns the total disk size (in bytes) of all the pack files in
                     this store, and the count of pack files.
                     (This might be smaller than the total size of the ``self.path``
                     directory, since this only considers fuly-writen pack files, and not
                     temporary files or other detritus on the directory.)
                     """
                     totalsize = 0
                     count = 0
                     for __, __, size in self._getavailablepackfiles():
                         totalsize += size
                         count += 1
                     return totalsize, count
                 def getmetrics(self):
                     """Returns metrics on the state of this store."""
                     size, count = self.gettotalsizeandcount()
                     return {
                         'numpacks': count,
                         'totalpacksize': size,
                     }
                 def getpack(self, path):
                     raise NotImplementedError()
                 def getmissing(self, keys):
                     missing = keys
                     for pack in self.packs:
                         missing = pack.getmissing(missing)
                         # Ensures better performance of the cache by keeping the most
                         # recently accessed pack at the beginning in subsequent iterations.
                         if not missing:
                             return missing
                     if missing:
                         for pack in self.refresh():
                             missing = pack.getmissing(missing)
                     return missing
                 def markledger(self, ledger, options=None):
                     for pack in self.packs:
                         pack.markledger(ledger)
                 def markforrefresh(self):
                     """Tells the store that there may be new pack files, so the next time it
                     has a lookup miss it should check for new files."""
                     self.lastrefresh = 0
                 def refresh(self):
                     """Checks for any new packs on disk, adds them to the main pack list,
                     and returns a list of just the new packs."""
                     now = time.time()
                     # If we experience a lot of misses (like in the case of getmissing() on
                     # new objects), let's only actually check disk for new stuff every once
                     # in a while. Generally this code path should only ever matter when a
                     # repack is going on in the background, and that should be pretty rare
                     # to have that happen twice in quick succession.
                     newpacks = []
                     if now > self.lastrefresh + REFRESHRATE:
                         self.lastrefresh = now
                         previous = set(p.path for p in self.packs)
                         for filepath, __, __ in self._getavailablepackfilessorted():
                             if filepath not in previous:
                                 newpack = self.getpack(filepath)
                                 newpacks.append(newpack)
                                 self.packs.add(newpack)
                     return newpacks
             class versionmixin(object):
                 # Mix-in for classes with multiple supported versions
                 VERSION = None
                 SUPPORTED_VERSIONS = [2]
                 def _checkversion(self, version):
                     if version in self.SUPPORTED_VERSIONS:
                         if self.VERSION is None:
                             # only affect this instance
                             self.VERSION = version
                         elif self.VERSION != version:
                             raise RuntimeError('inconsistent version: %s' % version)
                     else:
                         raise RuntimeError('unsupported version: %s' % version)
             class basepack(versionmixin):
                 # The maximum amount we should read via mmap before remmaping so the old
                 # pages can be released (100MB)
                 MAXPAGEDIN = 100 * 1024**2
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     self.path = path
                     self.packpath = path + self.PACKSUFFIX
                     self.indexpath = path + self.INDEXSUFFIX
                     self.indexsize = os.stat(self.indexpath).st_size
                     self.datasize = os.stat(self.packpath).st_size
                     self._index = None
                     self._data = None
                     self.freememory() # initialize the mmap
                     version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
                     self._checkversion(version)
                     version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
                     self._checkversion(version)
                     if 0b10000000 & config:
                         self.params = indexparams(LARGEFANOUTPREFIX, version)
                     else:
                         self.params = indexparams(SMALLFANOUTPREFIX, version)
                 @util.propertycache
                 def _fanouttable(self):
                     params = self.params
                     rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
                     fanouttable = []
                     for i in pycompat.xrange(0, params.fanoutcount):
                         loc = i * 4
                         fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
                         fanouttable.append(fanoutentry)
                     return fanouttable
                 @util.propertycache
                 def _indexend(self):
                     nodecount = struct.unpack_from('!Q', self._index,
                                                    self.params.indexstart - 8)[0]
                     return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                 def freememory(self):
                     """Unmap and remap the memory to free it up after known expensive
                     operations. Return True if self._data and self._index were reloaded.
                     """
                     if self._index:
                         if self._pagedin < self.MAXPAGEDIN:
                             return False
                         self._index.close()
                         self._data.close()
                     # TODO: use an opener/vfs to access these paths
                     with open(self.indexpath, PACKOPENMODE) as indexfp:
                         # memory-map the file, size 0 means whole file
                         self._index = mmap.mmap(indexfp.fileno(), 0,
                                                 access=mmap.ACCESS_READ)
                     with open(self.packpath, PACKOPENMODE) as datafp:
                         self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                     self._pagedin = 0
                     return True
                 def getmissing(self, keys):
                     raise NotImplementedError()
                 def markledger(self, ledger, options=None):
                     raise NotImplementedError()
                 def cleanup(self, ledger):
                     raise NotImplementedError()
                 def __iter__(self):
                     raise NotImplementedError()
                 def iterentries(self):
                     raise NotImplementedError()
             class mutablebasepack(versionmixin):
                 def __init__(self, ui, packdir, version=2):
                     self._checkversion(version)
                     # TODO(augie): make this configurable
                     self._compressor = 'GZ'
                     opener = vfsmod.vfs(packdir)
                     opener.createmode = 0o444
                     self.opener = opener
                     self.entries = {}
                     shallowutil.mkstickygroupdir(ui, packdir)
                     self.packfp, self.packpath = opener.mkstemp(
                         suffix=self.PACKSUFFIX + '-tmp')
                     self.idxfp, self.idxpath = opener.mkstemp(
                         suffix=self.INDEXSUFFIX + '-tmp')
-                    self.packfp = os.fdopen(self.packfp, r'w+')
+                    self.packfp = os.fdopen(self.packfp, r'wb+')
-                    self.idxfp = os.fdopen(self.idxfp, r'w+')
+                    self.idxfp = os.fdopen(self.idxfp, r'wb+')
                     self.sha = hashlib.sha1()
                     self._closed = False
                     # The opener provides no way of doing permission fixup on files created
                     # via mkstemp, so we must fix it ourselves. We can probably fix this
                     # upstream in vfs.mkstemp so we don't need to use the private method.
                     opener._fixfilemode(opener.join(self.packpath))
                     opener._fixfilemode(opener.join(self.idxpath))
                     # Write header
                     # TODO: make it extensible (ex: allow specifying compression algorithm,
                     # a flexible key/value header, delta algorithm, fanout size, etc)
                     versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
                     self.writeraw(versionbuf)
                 def __enter__(self):
                     return self
                 def __exit__(self, exc_type, exc_value, traceback):
                     if exc_type is None:
                         self.close()
                     else:
                         self.abort()
                 def abort(self):
                     # Unclean exit
                     self._cleantemppacks()
                 def writeraw(self, data):
                     self.packfp.write(data)
                     self.sha.update(data)
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     try:
                         sha = self.sha.hexdigest()
                         self.packfp.close()
                         self.writeindex()
                         if len(self.entries) == 0:
                             # Empty pack
                             self._cleantemppacks()
                             self._closed = True
                             return None
                         self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                         try:
                             self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                         except Exception as ex:
                             try:
                                 self.opener.unlink(sha + self.PACKSUFFIX)
                             except Exception:
                                 pass
                             # Throw exception 'ex' explicitly since a normal 'raise' would
                             # potentially throw an exception from the unlink cleanup.
                             raise ex
                     except Exception:
                         # Clean up temp packs in all exception cases
                         self._cleantemppacks()
                         raise
                     self._closed = True
                     result = self.opener.join(sha)
                     if ledger:
                         ledger.addcreated(result)
                     return result
                 def _cleantemppacks(self):
                     try:
                         self.opener.unlink(self.packpath)
                     except Exception:
                         pass
                     try:
                         self.opener.unlink(self.idxpath)
                     except Exception:
                         pass
                 def writeindex(self):
                     rawindex = ''
                     largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                     if largefanout:
                         params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                     else:
                         params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                     fanouttable = [EMPTYFANOUT] * params.fanoutcount
                     # Precompute the location of each entry
                     locations = {}
                     count = 0
                     for node in sorted(self.entries):
                         location = count * self.INDEXENTRYLENGTH
                         locations[node] = location
                         count += 1
                         # Must use [0] on the unpack result since it's always a tuple.
                         fanoutkey = struct.unpack(params.fanoutstruct,
                                                   node[:params.fanoutprefix])[0]
                         if fanouttable[fanoutkey] == EMPTYFANOUT:
                             fanouttable[fanoutkey] = location
                     rawfanouttable = ''
                     last = 0
                     for offset in fanouttable:
                         offset = offset if offset != EMPTYFANOUT else last
                         last = offset
                         rawfanouttable += struct.pack('!I', offset)
                     rawentrieslength = struct.pack('!Q', len(self.entries))
                     # The index offset is the it's location in the file. So after the 2 byte
                     # header and the fanouttable.
                     rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                     self._writeheader(params)
                     self.idxfp.write(rawfanouttable)
                     self.idxfp.write(rawentrieslength)
                     self.idxfp.write(rawindex)
                     self.idxfp.close()
                 def createindex(self, nodelocations):
                     raise NotImplementedError()
                 def _writeheader(self, indexparams):
                     # Index header
                     #    <version: 1 byte>
                     #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                     #    <unused: 7 bit> # future use (compression, delta format, etc)
                     config = 0
                     if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                         config = 0b10000000
                     self.idxfp.write(struct.pack('!BB', self.VERSION, config))
             class indexparams(object):
                 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
                              r'fanoutsize', r'indexstart')
                 def __init__(self, prefixsize, version):
                     self.fanoutprefix = prefixsize
                     # The struct pack format for fanout table location (i.e. the format that
                     # converts the node prefix into an integer location in the fanout
                     # table).
                     if prefixsize == SMALLFANOUTPREFIX:
                         self.fanoutstruct = '!B'
                     elif prefixsize == LARGEFANOUTPREFIX:
                         self.fanoutstruct = '!H'
                     else:
                         raise ValueError("invalid fanout prefix size: %s" % prefixsize)
                     # The number of fanout table entries
                     self.fanoutcount = 2**(prefixsize * 8)
                     # The total bytes used by the fanout table
                     self.fanoutsize = self.fanoutcount * 4
                     self.indexstart = FANOUTSTART + self.fanoutsize
                     # Skip the index length
                     self.indexstart += 8

hgext/remotefilelog/basestore.py

0 +2 -2

             from __future__ import absolute_import
             import errno
             import hashlib
             import os
             import shutil
             import stat
             import time
             from mercurial.i18n import _
             from mercurial.node import bin, hex
             from mercurial import (
                 error,
                 pycompat,
                 util,
             )
             from . import (
                 constants,
                 shallowutil,
             )
             class basestore(object):
                 def __init__(self, repo, path, reponame, shared=False):
                     """Creates a remotefilelog store object for the given repo name.
                     `path` - The file path where this store keeps its data
                     `reponame` - The name of the repo. This is used to partition data from
                     many repos.
                     `shared` - True if this store is a shared cache of data from the central
                     server, for many repos on this machine. False means this store is for
                     the local data for one repo.
                     """
                     self.repo = repo
                     self.ui = repo.ui
                     self._path = path
                     self._reponame = reponame
                     self._shared = shared
                     self._uid = os.getuid() if not pycompat.iswindows else None
                     self._validatecachelog = self.ui.config("remotefilelog",
                                                             "validatecachelog")
                     self._validatecache = self.ui.config("remotefilelog", "validatecache",
                                                          'on')
                     if self._validatecache not in ('on', 'strict', 'off'):
                         self._validatecache = 'on'
                     if self._validatecache == 'off':
                         self._validatecache = False
                     if shared:
                         shallowutil.mkstickygroupdir(self.ui, path)
                 def getmissing(self, keys):
                     missing = []
                     for name, node in keys:
                         filepath = self._getfilepath(name, node)
                         exists = os.path.exists(filepath)
                         if (exists and self._validatecache == 'strict' and
                             not self._validatekey(filepath, 'contains')):
                             exists = False
                         if not exists:
                             missing.append((name, node))
                     return missing
                 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
                 def markledger(self, ledger, options=None):
                     if options and options.get(constants.OPTION_PACKSONLY):
                         return
                     if self._shared:
                         for filename, nodes in self._getfiles():
                             for node in nodes:
                                 ledger.markdataentry(self, filename, node)
                                 ledger.markhistoryentry(self, filename, node)
                 def cleanup(self, ledger):
                     ui = self.ui
                     entries = ledger.sources.get(self, [])
                     count = 0
                     for entry in entries:
                         if entry.gced or (entry.datarepacked and entry.historyrepacked):
                             ui.progress(_("cleaning up"), count, unit="files",
                                         total=len(entries))
                             path = self._getfilepath(entry.filename, entry.node)
                             util.tryunlink(path)
                         count += 1
                     ui.progress(_("cleaning up"), None)
                     # Clean up the repo cache directory.
                     self._cleanupdirectory(self._getrepocachepath())
                 # BELOW THIS ARE NON-STANDARD APIS
                 def _cleanupdirectory(self, rootdir):
                     """Removes the empty directories and unnecessary files within the root
                     directory recursively. Note that this method does not remove the root
                     directory itself. """
                     oldfiles = set()
                     otherfiles = set()
                     # osutil.listdir returns stat information which saves some rmdir/listdir
                     # syscalls.
                     for name, mode in util.osutil.listdir(rootdir):
                         if stat.S_ISDIR(mode):
                             dirpath = os.path.join(rootdir, name)
                             self._cleanupdirectory(dirpath)
                             # Now that the directory specified by dirpath is potentially
                             # empty, try and remove it.
                             try:
                                 os.rmdir(dirpath)
                             except OSError:
                                 pass
                         elif stat.S_ISREG(mode):
                             if name.endswith('_old'):
                                 oldfiles.add(name[:-4])
                             else:
                                 otherfiles.add(name)
                     # Remove the files which end with suffix '_old' and have no
                     # corresponding file without the suffix '_old'. See addremotefilelognode
                     # method for the generation/purpose of files with '_old' suffix.
                     for filename in oldfiles - otherfiles:
                         filepath = os.path.join(rootdir, filename + '_old')
                         util.tryunlink(filepath)
                 def _getfiles(self):
                     """Return a list of (filename, [node,...]) for all the revisions that
                     exist in the store.
                     This is useful for obtaining a list of all the contents of the store
                     when performing a repack to another store, since the store API requires
                     name+node keys and not namehash+node keys.
                     """
                     existing = {}
                     for filenamehash, node in self._listkeys():
                         existing.setdefault(filenamehash, []).append(node)
                     filenamemap = self._resolvefilenames(existing.keys())
                     for filename, sha in filenamemap.iteritems():
                         yield (filename, existing[sha])
                 def _resolvefilenames(self, hashes):
                     """Given a list of filename hashes that are present in the
                     remotefilelog store, return a mapping from filename->hash.
                     This is useful when converting remotefilelog blobs into other storage
                     formats.
                     """
                     if not hashes:
                         return {}
                     filenames = {}
                     missingfilename = set(hashes)
                     # Start with a full manifest, since it'll cover the majority of files
                     for filename in self.repo['tip'].manifest():
                         sha = hashlib.sha1(filename).digest()
                         if sha in missingfilename:
                             filenames[filename] = sha
                             missingfilename.discard(sha)
                     # Scan the changelog until we've found every file name
                     cl = self.repo.unfiltered().changelog
                     for rev in pycompat.xrange(len(cl) - 1, -1, -1):
                         if not missingfilename:
                             break
                         files = cl.readfiles(cl.node(rev))
                         for filename in files:
                             sha = hashlib.sha1(filename).digest()
                             if sha in missingfilename:
                                 filenames[filename] = sha
                                 missingfilename.discard(sha)
                     return filenames
                 def _getrepocachepath(self):
                     return os.path.join(
                         self._path, self._reponame) if self._shared else self._path
                 def _listkeys(self):
                     """List all the remotefilelog keys that exist in the store.
                     Returns a iterator of (filename hash, filecontent hash) tuples.
                     """
                     for root, dirs, files in os.walk(self._getrepocachepath()):
                         for filename in files:
                             if len(filename) != 40:
                                 continue
                             node = filename
                             if self._shared:
                                 # .../1a/85ffda..be21
                                 filenamehash = root[-41:-39] + root[-38:]
                             else:
                                 filenamehash = root[-40:]
                             yield (bin(filenamehash), bin(node))
                 def _getfilepath(self, name, node):
                     node = hex(node)
                     if self._shared:
                         key = shallowutil.getcachekey(self._reponame, name, node)
                     else:
                         key = shallowutil.getlocalkey(name, node)
                     return os.path.join(self._path, key)
                 def _getdata(self, name, node):
                     filepath = self._getfilepath(name, node)
                     try:
                         data = shallowutil.readfile(filepath)
                         if self._validatecache and not self._validatedata(data, filepath):
                             if self._validatecachelog:
                                 with open(self._validatecachelog, 'a+') as f:
                                     f.write("corrupt %s during read\n" % filepath)
                             os.rename(filepath, filepath + ".corrupt")
                             raise KeyError("corrupt local cache file %s" % filepath)
                     except IOError:
                         raise KeyError("no file found at %s for %s:%s" % (filepath, name,
                                                                           hex(node)))
                     return data
                 def addremotefilelognode(self, name, node, data):
                     filepath = self._getfilepath(name, node)
                     oldumask = os.umask(0o002)
                     try:
                         # if this node already exists, save the old version for
                         # recovery/debugging purposes.
                         if os.path.exists(filepath):
                             newfilename = filepath + '_old'
                             # newfilename can be read-only and shutil.copy will fail.
                             # Delete newfilename to avoid it
                             if os.path.exists(newfilename):
                                 shallowutil.unlinkfile(newfilename)
                             shutil.copy(filepath, newfilename)
                         shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
                         shallowutil.writefile(filepath, data, readonly=True)
                         if self._validatecache:
                             if not self._validatekey(filepath, 'write'):
                                 raise error.Abort(_("local cache write was corrupted %s") %
                                                   filepath)
                     finally:
                         os.umask(oldumask)
                 def markrepo(self, path):
                     """Call this to add the given repo path to the store's list of
                     repositories that are using it. This is useful later when doing garbage
                     collection, since it allows us to insecpt the repos to see what nodes
                     they want to be kept alive in the store.
                     """
                     repospath = os.path.join(self._path, "repos")
-                    with open(repospath, 'a') as reposfile:
+                    with open(repospath, 'ab') as reposfile:
                         reposfile.write(os.path.dirname(path) + "\n")
                     repospathstat = os.stat(repospath)
                     if repospathstat.st_uid == self._uid:
                         os.chmod(repospath, 0o0664)
                 def _validatekey(self, path, action):
                     with open(path, 'rb') as f:
                         data = f.read()
                     if self._validatedata(data, path):
                         return True
                     if self._validatecachelog:
-                        with open(self._validatecachelog, 'a+') as f:
+                        with open(self._validatecachelog, 'ab+') as f:
                             f.write("corrupt %s during %s\n" % (path, action))
                     os.rename(path, path + ".corrupt")
                     return False
                 def _validatedata(self, data, path):
                     try:
                         if len(data) > 0:
                             # see remotefilelogserver.createfileblob for the format
                             offset, size, flags = shallowutil.parsesizeflags(data)
                             if len(data) <= size:
                                 # it is truncated
                                 return False
                             # extract the node from the metadata
                             offset += size
                             datanode = data[offset:offset + 20]
                             # and compare against the path
                             if os.path.basename(path) == hex(datanode):
                                 # Content matches the intended path
                                 return True
                             return False
                     except (ValueError, RuntimeError):
                         pass
                     return False
                 def gc(self, keepkeys):
                     ui = self.ui
                     cachepath = self._path
                     _removing = _("removing unnecessary files")
                     _truncating = _("enforcing cache limit")
                     # prune cache
                     import Queue
                     queue = Queue.PriorityQueue()
                     originalsize = 0
                     size = 0
                     count = 0
                     removed = 0
                     # keep files newer than a day even if they aren't needed
                     limit = time.time() - (60 * 60 * 24)
                     ui.progress(_removing, count, unit="files")
                     for root, dirs, files in os.walk(cachepath):
                         for file in files:
                             if file == 'repos':
                                 continue
                             # Don't delete pack files
                             if '/packs/' in root:
                                 continue
                             ui.progress(_removing, count, unit="files")
                             path = os.path.join(root, file)
                             key = os.path.relpath(path, cachepath)
                             count += 1
                             try:
                                 pathstat = os.stat(path)
                             except OSError as e:
                                 # errno.ENOENT = no such file or directory
                                 if e.errno != errno.ENOENT:
                                     raise
                                 msg = _("warning: file %s was removed by another process\n")
                                 ui.warn(msg % path)
                                 continue
                             originalsize += pathstat.st_size
                             if key in keepkeys or pathstat.st_atime > limit:
                                 queue.put((pathstat.st_atime, path, pathstat))
                                 size += pathstat.st_size
                             else:
                                 try:
                                     shallowutil.unlinkfile(path)
                                 except OSError as e:
                                     # errno.ENOENT = no such file or directory
                                     if e.errno != errno.ENOENT:
                                         raise
                                     msg = _("warning: file %s was removed by another "
                                             "process\n")
                                     ui.warn(msg % path)
                                     continue
                                 removed += 1
                     ui.progress(_removing, None)
                     # remove oldest files until under limit
                     limit = ui.configbytes("remotefilelog", "cachelimit")
                     if size > limit:
                         excess = size - limit
                         removedexcess = 0
                         while queue and size > limit and size > 0:
                             ui.progress(_truncating, removedexcess, unit="bytes",
                                         total=excess)
                             atime, oldpath, oldpathstat = queue.get()
                             try:
                                 shallowutil.unlinkfile(oldpath)
                             except OSError as e:
                                 # errno.ENOENT = no such file or directory
                                 if e.errno != errno.ENOENT:
                                     raise
                                 msg = _("warning: file %s was removed by another process\n")
                                 ui.warn(msg % oldpath)
                             size -= oldpathstat.st_size
                             removed += 1
                             removedexcess += oldpathstat.st_size
                     ui.progress(_truncating, None)
                     ui.status(_("finished: removed %s of %s files (%0.2f GB to %0.2f GB)\n")
                               % (removed, count,
                                  float(originalsize) / 1024.0 / 1024.0 / 1024.0,
                                  float(size) / 1024.0 / 1024.0 / 1024.0))
             class baseunionstore(object):
                 def __init__(self, *args, **kwargs):
                     # If one of the functions that iterates all of the stores is about to
                     # throw a KeyError, try this many times with a full refresh between
                     # attempts. A repack operation may have moved data from one store to
                     # another while we were running.
                     self.numattempts = kwargs.get(r'numretries', 0) + 1
                     # If not-None, call this function on every retry and if the attempts are
                     # exhausted.
                     self.retrylog = kwargs.get(r'retrylog', None)
                 def markforrefresh(self):
                     for store in self.stores:
                         if util.safehasattr(store, 'markforrefresh'):
                             store.markforrefresh()
                 @staticmethod
                 def retriable(fn):
                     def noop(*args):
                         pass
                     def wrapped(self, *args, **kwargs):
                         retrylog = self.retrylog or noop
                         funcname = fn.__name__
                         for i in pycompat.xrange(self.numattempts):
                             if i > 0:
                                 retrylog('re-attempting (n=%d) %s\n' % (i, funcname))
                                 self.markforrefresh()
                             try:
                                 return fn(self, *args, **kwargs)
                             except KeyError:
                                 pass
                         # retries exhausted
                         retrylog('retries exhausted in %s, raising KeyError\n' % funcname)
                         raise
                     return wrapped

hgext/remotefilelog/debugcommands.py

0 +1 -1

             # debugcommands.py - debug logic for remotefilelog
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import os
             import zlib
             from mercurial.node import bin, hex, nullid, short
             from mercurial.i18n import _
             from mercurial import (
                 error,
                 filelog,
                 node as nodemod,
                 revlog,
             )
             from . import (
                 constants,
                 datapack,
                 extutil,
                 fileserverclient,
                 historypack,
                 repack,
                 shallowutil,
             )
             def debugremotefilelog(ui, path, **opts):
                 decompress = opts.get(r'decompress')
                 size, firstnode, mapping = parsefileblob(path, decompress)
                 ui.status(_("size: %s bytes\n") % (size))
                 ui.status(_("path: %s \n") % (path))
                 ui.status(_("key: %s \n") % (short(firstnode)))
                 ui.status(_("\n"))
                 ui.status(_("%12s => %12s %13s %13s %12s\n") %
                           ("node", "p1", "p2", "linknode", "copyfrom"))
                 queue = [firstnode]
                 while queue:
                     node = queue.pop(0)
                     p1, p2, linknode, copyfrom = mapping[node]
                     ui.status(_("%s => %s  %s  %s  %s\n") %
                         (short(node), short(p1), short(p2), short(linknode), copyfrom))
                     if p1 != nullid:
                         queue.append(p1)
                     if p2 != nullid:
                         queue.append(p2)
             def buildtemprevlog(repo, file):
                 # get filename key
                 filekey = nodemod.hex(hashlib.sha1(file).digest())
                 filedir = os.path.join(repo.path, 'store/data', filekey)
                 # sort all entries based on linkrev
                 fctxs = []
                 for filenode in os.listdir(filedir):
                     if '_old' not in filenode:
                         fctxs.append(repo.filectx(file, fileid=bin(filenode)))
                 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
                 # add to revlog
                 temppath = repo.sjoin('data/temprevlog.i')
                 if os.path.exists(temppath):
                     os.remove(temppath)
                 r = filelog.filelog(repo.svfs, 'temprevlog')
                 class faket(object):
                     def add(self, a, b, c):
                         pass
                 t = faket()
                 for fctx in fctxs:
                     if fctx.node() not in repo:
                         continue
                     p = fctx.filelog().parents(fctx.filenode())
                     meta = {}
                     if fctx.renamed():
                         meta['copy'] = fctx.renamed()[0]
                         meta['copyrev'] = hex(fctx.renamed()[1])
                     r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
                 return r
             def debugindex(orig, ui, repo, file_=None, **opts):
                 """dump the contents of an index file"""
                 if (opts.get(r'changelog') or
                     opts.get(r'manifest') or
                     opts.get(r'dir') or
                     not shallowutil.isenabled(repo) or
                     not repo.shallowmatch(file_)):
                     return orig(ui, repo, file_, **opts)
                 r = buildtemprevlog(repo, file_)
                 # debugindex like normal
                 format = opts.get('format', 0)
                 if format not in (0, 1):
                     raise error.Abort(_("unknown format %d") % format)
                 generaldelta = r.version & revlog.FLAG_GENERALDELTA
                 if generaldelta:
                     basehdr = ' delta'
                 else:
                     basehdr = '  base'
                 if format == 0:
                     ui.write(("   rev    offset  length " + basehdr + " linkrev"
                               " nodeid       p1           p2\n"))
                 elif format == 1:
                     ui.write(("   rev flag   offset   length"
                               "     size " + basehdr + "   link     p1     p2"
                               "       nodeid\n"))
                 for i in r:
                     node = r.node(i)
                     if generaldelta:
                         base = r.deltaparent(i)
                     else:
                         base = r.chainbase(i)
                     if format == 0:
                         try:
                             pp = r.parents(node)
                         except Exception:
                             pp = [nullid, nullid]
                         ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
                                 i, r.start(i), r.length(i), base, r.linkrev(i),
                                 short(node), short(pp[0]), short(pp[1])))
                     elif format == 1:
                         pr = r.parentrevs(i)
                         ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % (
                                 i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
                                 base, r.linkrev(i), pr[0], pr[1], short(node)))
             def debugindexdot(orig, ui, repo, file_):
                 """dump an index DAG as a graphviz dot file"""
                 if not shallowutil.isenabled(repo):
                     return orig(ui, repo, file_)
                 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
                 ui.write(("digraph G {\n"))
                 for i in r:
                     node = r.node(i)
                     pp = r.parents(node)
                     ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
                     if pp[1] != nullid:
                         ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
                 ui.write("}\n")
             def verifyremotefilelog(ui, path, **opts):
                 decompress = opts.get(r'decompress')
                 for root, dirs, files in os.walk(path):
                     for file in files:
                         if file == "repos":
                             continue
                         filepath = os.path.join(root, file)
                         size, firstnode, mapping = parsefileblob(filepath, decompress)
                         for p1, p2, linknode, copyfrom in mapping.itervalues():
                             if linknode == nullid:
                                 actualpath = os.path.relpath(root, path)
                                 key = fileserverclient.getcachekey("reponame", actualpath,
                                                                    file)
                                 ui.status("%s %s\n" % (key, os.path.relpath(filepath,
                                                                             path)))
             def _decompressblob(raw):
                 return zlib.decompress(raw)
             def parsefileblob(path, decompress):
                 raw = None
-                f = open(path, "r")
+                f = open(path, "rb")
                 try:
                     raw = f.read()
                 finally:
                     f.close()
                 if decompress:
                     raw = _decompressblob(raw)
                 offset, size, flags = shallowutil.parsesizeflags(raw)
                 start = offset + size
                 firstnode = None
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index('\0', start + 80)
                     currentnode = raw[start:(start + 20)]
                     if not firstnode:
                         firstnode = currentnode
                     p1 = raw[(start + 20):(start + 40)]
                     p2 = raw[(start + 40):(start + 60)]
                     linknode = raw[(start + 60):(start + 80)]
                     copyfrom = raw[(start + 80):divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return size, firstnode, mapping
             def debugdatapack(ui, *paths, **opts):
                 for path in paths:
                     if '.data' in path:
                         path = path[:path.index('.data')]
                     ui.write("%s:\n" % path)
                     dpack = datapack.datapack(path)
                     node = opts.get(r'node')
                     if node:
                         deltachain = dpack.getdeltachain('', bin(node))
                         dumpdeltachain(ui, deltachain, **opts)
                         return
                     if opts.get(r'long'):
                         hashformatter = hex
                         hashlen = 42
                     else:
                         hashformatter = short
                         hashlen = 14
                     lastfilename = None
                     totaldeltasize = 0
                     totalblobsize = 0
                     def printtotals():
                         if lastfilename is not None:
                             ui.write("\n")
                         if not totaldeltasize or not totalblobsize:
                             return
                         difference = totalblobsize - totaldeltasize
                         deltastr = "%0.1f%% %s" % (
                             (100.0 * abs(difference) / totalblobsize),
                             ("smaller" if difference > 0 else "bigger"))
                         ui.write(("Total:%s%s  %s (%s)\n") % (
                             "".ljust(2 * hashlen - len("Total:")),
                             str(totaldeltasize).ljust(12),
                             str(totalblobsize).ljust(9),
                             deltastr
                         ))
                     bases = {}
                     nodes = set()
                     failures = 0
                     for filename, node, deltabase, deltalen in dpack.iterentries():
                         bases[node] = deltabase
                         if node in nodes:
                             ui.write(("Bad entry: %s appears twice\n" % short(node)))
                             failures += 1
                         nodes.add(node)
                         if filename != lastfilename:
                             printtotals()
                             name = '(empty name)' if filename == '' else filename
                             ui.write("%s:\n" % name)
                             ui.write("%s%s%s%s\n" % (
                                 "Node".ljust(hashlen),
                                 "Delta Base".ljust(hashlen),
                                 "Delta Length".ljust(14),
                                 "Blob Size".ljust(9)))
                             lastfilename = filename
                             totalblobsize = 0
                             totaldeltasize = 0
                         # Metadata could be missing, in which case it will be an empty dict.
                         meta = dpack.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             blobsize = meta[constants.METAKEYSIZE]
                             totaldeltasize += deltalen
                             totalblobsize += blobsize
                         else:
                             blobsize = "(missing)"
                         ui.write("%s  %s  %s%s\n" % (
                             hashformatter(node),
                             hashformatter(deltabase),
                             str(deltalen).ljust(14),
                             blobsize))
                     if filename is not None:
                         printtotals()
                     failures += _sanitycheck(ui, set(nodes), bases)
                     if failures > 1:
                         ui.warn(("%d failures\n" % failures))
                         return 1
             def _sanitycheck(ui, nodes, bases):
                 """
                 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
                 mapping of node->base):
                 - Each deltabase must itself be a node elsewhere in the pack
                 - There must be no cycles
                 """
                 failures = 0
                 for node in nodes:
                     seen = set()
                     current = node
                     deltabase = bases[current]
                     while deltabase != nullid:
                         if deltabase not in nodes:
                             ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" %
                                     (short(node), short(deltabase))))
                             failures += 1
                             break
                         if deltabase in seen:
                             ui.warn(("Bad entry: %s has a cycle (at %s)\n" %
                                     (short(node), short(deltabase))))
                             failures += 1
                             break
                         current = deltabase
                         seen.add(current)
                         deltabase = bases[current]
                     # Since ``node`` begins a valid chain, reset/memoize its base to nullid
                     # so we don't traverse it again.
                     bases[node] = nullid
                 return failures
             def dumpdeltachain(ui, deltachain, **opts):
                 hashformatter = hex
                 hashlen = 40
                 lastfilename = None
                 for filename, node, filename, deltabasenode, delta in deltachain:
                     if filename != lastfilename:
                         ui.write("\n%s\n" % filename)
                         lastfilename = filename
                     ui.write("%s  %s  %s  %s\n" % (
                         "Node".ljust(hashlen),
                         "Delta Base".ljust(hashlen),
                         "Delta SHA1".ljust(hashlen),
                         "Delta Length".ljust(6),
                     ))
                     ui.write("%s  %s  %s  %s\n" % (
                         hashformatter(node),
                         hashformatter(deltabasenode),
                         nodemod.hex(hashlib.sha1(delta).digest()),
                         len(delta)))
             def debughistorypack(ui, path):
                 if '.hist' in path:
                     path = path[:path.index('.hist')]
                 hpack = historypack.historypack(path)
                 lastfilename = None
                 for entry in hpack.iterentries():
                     filename, node, p1node, p2node, linknode, copyfrom = entry
                     if filename != lastfilename:
                         ui.write("\n%s\n" % filename)
                         ui.write("%s%s%s%s%s\n" % (
                             "Node".ljust(14),
                             "P1 Node".ljust(14),
                             "P2 Node".ljust(14),
                             "Link Node".ljust(14),
                             "Copy From"))
                         lastfilename = filename
                     ui.write("%s  %s  %s  %s  %s\n" % (short(node), short(p1node),
                         short(p2node), short(linknode), copyfrom))
             def debugwaitonrepack(repo):
                 with extutil.flock(repack.repacklockvfs(repo).join('repacklock'), ''):
                     return
             def debugwaitonprefetch(repo):
                 with repo._lock(repo.svfs, "prefetchlock", True, None,
                                      None, _('prefetching in %s') % repo.origroot):
                     pass

hgext/remotefilelog/fileserverclient.py

0 +1 -1

             # fileserverclient.py - client for communicating with the cache process
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import hashlib
             import io
             import os
             import threading
             import time
             import zlib
             from mercurial.i18n import _
             from mercurial.node import bin, hex, nullid
             from mercurial import (
                 error,
                 node,
                 pycompat,
                 revlog,
                 sshpeer,
                 util,
                 wireprotov1peer,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 metadatastore,
             )
             _sshv1peer = sshpeer.sshv1peer
             # Statistics for debugging
             fetchcost = 0
             fetches = 0
             fetched = 0
             fetchmisses = 0
             _lfsmod = None
             _downloading = _('downloading')
             def getcachekey(reponame, file, id):
                 pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
                 pathhash = node.hex(hashlib.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def peersetup(ui, peer):
                 class remotefilepeer(peer.__class__):
                     @wireprotov1peer.batchable
                     def x_rfl_getfile(self, file, node):
                         if not self.capable('x_rfl_getfile'):
                             raise error.Abort(
                                 'configured remotefile server does not support getfile')
                         f = wireprotov1peer.future()
                         yield {'file': file, 'node': node}, f
                         code, data = f.value.split('\0', 1)
                         if int(code):
                             raise error.LookupError(file, node, data)
                         yield data
                     @wireprotov1peer.batchable
                     def x_rfl_getflogheads(self, path):
                         if not self.capable('x_rfl_getflogheads'):
                             raise error.Abort('configured remotefile server does not '
                                               'support getflogheads')
                         f = wireprotov1peer.future()
                         yield {'path': path}, f
                         heads = f.value.split('\n') if f.value else []
                         yield heads
                     def _updatecallstreamopts(self, command, opts):
                         if command != 'getbundle':
                             return
                         if (constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                             not in self.capabilities()):
                             return
                         if not util.safehasattr(self, '_localrepo'):
                             return
                         if (constants.SHALLOWREPO_REQUIREMENT
                             not in self._localrepo.requirements):
                             return
                         bundlecaps = opts.get('bundlecaps')
                         if bundlecaps:
                             bundlecaps = [bundlecaps]
                         else:
                             bundlecaps = []
                         # shallow, includepattern, and excludepattern are a hacky way of
                         # carrying over data from the local repo to this getbundle
                         # command. We need to do it this way because bundle1 getbundle
                         # doesn't provide any other place we can hook in to manipulate
                         # getbundle args before it goes across the wire. Once we get rid
                         # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                         # do this more cleanly.
                         bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                         if self._localrepo.includepattern:
                             patterns = '\0'.join(self._localrepo.includepattern)
                             includecap = "includepattern=" + patterns
                             bundlecaps.append(includecap)
                         if self._localrepo.excludepattern:
                             patterns = '\0'.join(self._localrepo.excludepattern)
                             excludecap = "excludepattern=" + patterns
                             bundlecaps.append(excludecap)
                         opts['bundlecaps'] = ','.join(bundlecaps)
                     def _sendrequest(self, command, args, **opts):
                         self._updatecallstreamopts(command, args)
                         return super(remotefilepeer, self)._sendrequest(command, args,
                                                                         **opts)
                     def _callstream(self, command, **opts):
                         supertype = super(remotefilepeer, self)
                         if not util.safehasattr(supertype, '_sendrequest'):
                             self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                         return super(remotefilepeer, self)._callstream(command, **opts)
                 peer.__class__ = remotefilepeer
             class cacheconnection(object):
                 """The connection for communicating with the remote cache. Performs
                 gets and sets by communicating with an external process that has the
                 cache-specific implementation.
                 """
                 def __init__(self):
                     self.pipeo = self.pipei = self.pipee = None
                     self.subprocess = None
                     self.connected = False
                 def connect(self, cachecommand):
                     if self.pipeo:
                         raise error.Abort(_("cache connection already open"))
                     self.pipei, self.pipeo, self.pipee, self.subprocess = \
                         procutil.popen4(cachecommand)
                     self.connected = True
                 def close(self):
                     def tryclose(pipe):
                         try:
                             pipe.close()
                         except Exception:
                             pass
                     if self.connected:
                         try:
                             self.pipei.write("exit\n")
                         except Exception:
                             pass
                         tryclose(self.pipei)
                         self.pipei = None
                         tryclose(self.pipeo)
                         self.pipeo = None
                         tryclose(self.pipee)
                         self.pipee = None
                         try:
                             # Wait for process to terminate, making sure to avoid deadlock.
                             # See https://docs.python.org/2/library/subprocess.html for
                             # warnings about wait() and deadlocking.
                             self.subprocess.communicate()
                         except Exception:
                             pass
                         self.subprocess = None
                     self.connected = False
                 def request(self, request, flush=True):
                     if self.connected:
                         try:
                             self.pipei.write(request)
                             if flush:
                                 self.pipei.flush()
                         except IOError:
                             self.close()
                 def receiveline(self):
                     if not self.connected:
                         return None
                     try:
                         result = self.pipeo.readline()[:-1]
                         if not result:
                             self.close()
                     except IOError:
                         self.close()
                     return result
             def _getfilesbatch(
                     remote, receivemissing, progresstick, missed, idmap, batchsize):
                 # Over http(s), iterbatch is a streamy method and we can start
                 # looking at results early. This means we send one (potentially
                 # large) request, but then we show nice progress as we process
                 # file results, rather than showing chunks of $batchsize in
                 # progress.
                 #
                 # Over ssh, iterbatch isn't streamy because batch() wasn't
                 # explicitly designed as a streaming method. In the future we
                 # should probably introduce a streambatch() method upstream and
                 # use that for this.
                 with remote.commandexecutor() as e:
                     futures = []
                     for m in missed:
                         futures.append(e.callcommand('x_rfl_getfile', {
                             'file': idmap[m],
                             'node': m[-40:]
                         }))
                     for i, m in enumerate(missed):
                         r = futures[i].result()
                         futures[i] = None  # release memory
                         file_ = idmap[m]
                         node = m[-40:]
                         receivemissing(io.BytesIO('%d\n%s' % (len(r), r)), file_, node)
                         progresstick()
             def _getfiles_optimistic(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("x_rfl_getfiles")
                 i = 0
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 while i < len(missed):
                     # issue a batch of requests
                     start = i
                     end = min(len(missed), start + step)
                     i = end
                     for missingid in missed[start:end]:
                         # issue new request
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                     # receive batch results
                     for missingid in missed[start:end]:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         receivemissing(pipei, file, versionid)
                         progresstick()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             def _getfiles_threaded(
                 remote, receivemissing, progresstick, missed, idmap, step):
                 remote._callstream("getfiles")
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 def writer():
                     for missingid in missed:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = "%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                 writerthread = threading.Thread(target=writer)
                 writerthread.daemon = True
                 writerthread.start()
                 for missingid in missed:
                     versionid = missingid[-40:]
                     file = idmap[missingid]
                     receivemissing(pipei, file, versionid)
                     progresstick()
                 writerthread.join()
                 # End the command
                 pipeo.write('\n')
                 pipeo.flush()
             class fileserverclient(object):
                 """A client for requesting files from the remote file server.
                 """
                 def __init__(self, repo):
                     ui = repo.ui
                     self.repo = repo
                     self.ui = ui
                     self.cacheprocess = ui.config("remotefilelog", "cacheprocess")
                     if self.cacheprocess:
                         self.cacheprocess = util.expandpath(self.cacheprocess)
                     # This option causes remotefilelog to pass the full file path to the
                     # cacheprocess instead of a hashed key.
                     self.cacheprocesspasspath = ui.configbool(
                         "remotefilelog", "cacheprocess.includepath")
                     self.debugoutput = ui.configbool("remotefilelog", "debug")
                     self.remotecache = cacheconnection()
                 def setstore(self, datastore, historystore, writedata, writehistory):
                     self.datastore = datastore
                     self.historystore = historystore
                     self.writedata = writedata
                     self.writehistory = writehistory
                 def _connect(self):
                     return self.repo.connectionpool.get(self.repo.fallbackpath)
                 def request(self, fileids):
                     """Takes a list of filename/node pairs and fetches them from the
                     server. Files are stored in the local cache.
                     A list of nodes that the server couldn't find is returned.
                     If the connection fails, an exception is raised.
                     """
                     if not self.remotecache.connected:
                         self.connect()
                     cache = self.remotecache
                     writedata = self.writedata
                     repo = self.repo
                     count = len(fileids)
                     request = "get\n%d\n" % count
                     idmap = {}
                     reponame = repo.name
                     for file, id in fileids:
                         fullid = getcachekey(reponame, file, id)
                         if self.cacheprocesspasspath:
                             request += file + '\0'
                         request += fullid + "\n"
                         idmap[fullid] = file
                     cache.request(request)
                     total = count
                     self.ui.progress(_downloading, 0, total=count)
                     missed = []
                     count = 0
                     while True:
                         missingid = cache.receiveline()
                         if not missingid:
                             missedset = set(missed)
                             for missingid in idmap:
                                 if not missingid in missedset:
                                     missed.append(missingid)
                             self.ui.warn(_("warning: cache connection closed early - " +
                                 "falling back to server\n"))
                             break
                         if missingid == "0":
                             break
                         if missingid.startswith("_hits_"):
                             # receive progress reports
                             parts = missingid.split("_")
                             count += int(parts[2])
                             self.ui.progress(_downloading, count, total=total)
                             continue
                         missed.append(missingid)
                     global fetchmisses
                     fetchmisses += len(missed)
                     count = [total - len(missed)]
                     fromcache = count[0]
                     self.ui.progress(_downloading, count[0], total=total)
                     self.ui.log("remotefilelog", "remote cache hit rate is %r of %r\n",
                                 count[0], total, hit=count[0], total=total)
                     oldumask = os.umask(0o002)
                     try:
                         # receive cache misses from master
                         if missed:
                             def progresstick():
                                 count[0] += 1
                                 self.ui.progress(_downloading, count[0], total=total)
                             # When verbose is true, sshpeer prints 'running ssh...'
                             # to stdout, which can interfere with some command
                             # outputs
                             verbose = self.ui.verbose
                             self.ui.verbose = False
                             try:
                                 with self._connect() as conn:
                                     remote = conn.peer
                                     if remote.capable(
                                             constants.NETWORK_CAP_LEGACY_SSH_GETFILES):
                                         if not isinstance(remote, _sshv1peer):
                                             raise error.Abort('remotefilelog requires ssh '
                                                               'servers')
                                         step = self.ui.configint('remotefilelog',
                                                                  'getfilesstep')
                                         getfilestype = self.ui.config('remotefilelog',
                                                                       'getfilestype')
                                         if getfilestype == 'threaded':
                                             _getfiles = _getfiles_threaded
                                         else:
                                             _getfiles = _getfiles_optimistic
                                         _getfiles(remote, self.receivemissing, progresstick,
                                                   missed, idmap, step)
                                     elif remote.capable("x_rfl_getfile"):
                                         if remote.capable('batch'):
                                             batchdefault = 100
                                         else:
                                             batchdefault = 10
                                         batchsize = self.ui.configint(
                                             'remotefilelog', 'batchsize', batchdefault)
                                         _getfilesbatch(
                                             remote, self.receivemissing, progresstick,
                                             missed, idmap, batchsize)
                                     else:
                                         raise error.Abort("configured remotefilelog server"
                                                          " does not support remotefilelog")
                                 self.ui.log("remotefilefetchlog",
                                             "Success\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                             except Exception:
                                 self.ui.log("remotefilefetchlog",
                                             "Fail\n",
                                             fetched_files = count[0] - fromcache,
                                             total_to_fetch = total - fromcache)
                                 raise
                             finally:
                                 self.ui.verbose = verbose
                             # send to memcache
                             count[0] = len(missed)
                             request = "set\n%d\n%s\n" % (count[0], "\n".join(missed))
                             cache.request(request)
                         self.ui.progress(_downloading, None)
                         # mark ourselves as a user of this cache
                         writedata.markrepo(self.repo.path)
                     finally:
                         os.umask(oldumask)
                 def receivemissing(self, pipe, filename, node):
                     line = pipe.readline()[:-1]
                     if not line:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("connection closed early"))
                     size = int(line)
                     data = pipe.read(size)
                     if len(data) != size:
                         raise error.ResponseError(_("error downloading file contents:"),
                                                   _("only received %s of %s bytes")
                                                   % (len(data), size))
                     self.writedata.addremotefilelognode(filename, bin(node),
                                                          zlib.decompress(data))
                 def connect(self):
                     if self.cacheprocess:
                         cmd = "%s %s" % (self.cacheprocess, self.writedata._path)
                         self.remotecache.connect(cmd)
                     else:
                         # If no cache process is specified, we fake one that always
                         # returns cache misses.  This enables tests to run easily
                         # and may eventually allow us to be a drop in replacement
                         # for the largefiles extension.
                         class simplecache(object):
                             def __init__(self):
                                 self.missingids = []
                                 self.connected = True
                             def close(self):
                                 pass
                             def request(self, value, flush=True):
                                 lines = value.split("\n")
                                 if lines[0] != "get":
                                     return
                                 self.missingids = lines[2:-1]
                                 self.missingids.append('0')
                             def receiveline(self):
                                 if len(self.missingids) > 0:
                                     return self.missingids.pop(0)
                                 return None
                         self.remotecache = simplecache()
                 def close(self):
                     if fetches:
-                        msg = ("%s files fetched over %d fetches - " +
+                        msg = ("%d files fetched over %d fetches - " +
                                "(%d misses, %0.2f%% hit ratio) over %0.2fs\n") % (
                                    fetched,
                                    fetches,
                                    fetchmisses,
                                    float(fetched - fetchmisses) / float(fetched) * 100.0,
                                    fetchcost)
                         if self.debugoutput:
                             self.ui.warn(msg)
                         self.ui.log("remotefilelog.prefetch", msg.replace("%", "%%"),
                             remotefilelogfetched=fetched,
                             remotefilelogfetches=fetches,
                             remotefilelogfetchmisses=fetchmisses,
                             remotefilelogfetchtime=fetchcost * 1000)
                     if self.remotecache.connected:
                         self.remotecache.close()
                 def prefetch(self, fileids, force=False, fetchdata=True,
                              fetchhistory=False):
                     """downloads the given file versions to the cache
                     """
                     repo = self.repo
                     idstocheck = []
                     for file, id in fileids:
                         # hack
                         # - we don't use .hgtags
                         # - workingctx produces ids with length 42,
                         #   which we skip since they aren't in any cache
                         if (file == '.hgtags' or len(id) == 42
                             or not repo.shallowmatch(file)):
                             continue
                         idstocheck.append((file, bin(id)))
                     datastore = self.datastore
                     historystore = self.historystore
                     if force:
                         datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                         historystore = metadatastore.unionmetadatastore(
                             *repo.sharedhistorystores)
                     missingids = set()
                     if fetchdata:
                         missingids.update(datastore.getmissing(idstocheck))
                     if fetchhistory:
                         missingids.update(historystore.getmissing(idstocheck))
                     # partition missing nodes into nullid and not-nullid so we can
                     # warn about this filtering potentially shadowing bugs.
                     nullids = len([None for unused, id in missingids if id == nullid])
                     if nullids:
                         missingids = [(f, id) for f, id in missingids if id != nullid]
                         repo.ui.develwarn(
                             ('remotefilelog not fetching %d null revs'
                              ' - this is likely hiding bugs' % nullids),
                             config='remotefilelog-ext')
                     if missingids:
                         global fetches, fetched, fetchcost
                         fetches += 1
                         # We want to be able to detect excess individual file downloads, so
                         # let's log that information for debugging.
                         if fetches >= 15 and fetches < 18:
                             if fetches == 15:
                                 fetchwarning = self.ui.config('remotefilelog',
                                                               'fetchwarning')
                                 if fetchwarning:
                                     self.ui.warn(fetchwarning + '\n')
                             self.logstacktrace()
                         missingids = [(file, hex(id)) for file, id in missingids]
                         fetched += len(missingids)
                         start = time.time()
                         missingids = self.request(missingids)
                         if missingids:
                             raise error.Abort(_("unable to download %d files") %
                                               len(missingids))
                         fetchcost += time.time() - start
                         self._lfsprefetch(fileids)
                 def _lfsprefetch(self, fileids):
                     if not _lfsmod or not util.safehasattr(
                             self.repo.svfs, 'lfslocalblobstore'):
                         return
                     if not _lfsmod.wrapper.candownload(self.repo):
                         return
                     pointers = []
                     store = self.repo.svfs.lfslocalblobstore
                     for file, id in fileids:
                         node = bin(id)
                         rlog = self.repo.file(file)
                         if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                             text = rlog.revision(node, raw=True)
                             p = _lfsmod.pointer.deserialize(text)
                             oid = p.oid()
                             if not store.has(oid):
                                 pointers.append(p)
                     if len(pointers) > 0:
                         self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                         assert all(store.has(p.oid()) for p in pointers)
                 def logstacktrace(self):
                     import traceback
                     self.ui.log('remotefilelog', 'excess remotefilelog fetching:\n%s\n',
                                 ''.join(traceback.format_stack()))

hgext/remotefilelog/remotefilelogserver.py

0 +2 -2

             # remotefilelogserver.py - server logic for a remotefilelog server
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import errno
             import os
             import stat
             import time
             import zlib
             from mercurial.i18n import _
             from mercurial.node import bin, hex, nullid
             from mercurial import (
                 changegroup,
                 changelog,
                 context,
                 error,
                 extensions,
                 match,
                 store,
                 streamclone,
                 util,
                 wireprotoserver,
                 wireprototypes,
                 wireprotov1server,
             )
             from .  import (
                 constants,
                 shallowutil,
             )
             _sshv1server = wireprotoserver.sshv1protocolhandler
             def setupserver(ui, repo):
                 """Sets up a normal Mercurial repo so it can serve files to shallow repos.
                 """
                 onetimesetup(ui)
                 # don't send files to shallow clients during pulls
                 def generatefiles(orig, self, changedfiles, linknodes, commonrevs, source,
                                   *args, **kwargs):
                     caps = self._bundlecaps or []
                     if constants.BUNDLE2_CAPABLITY in caps:
                         # only send files that don't match the specified patterns
                         includepattern = None
                         excludepattern = None
                         for cap in (self._bundlecaps or []):
                             if cap.startswith("includepattern="):
                                 includepattern = cap[len("includepattern="):].split('\0')
                             elif cap.startswith("excludepattern="):
                                 excludepattern = cap[len("excludepattern="):].split('\0')
                         m = match.always(repo.root, '')
                         if includepattern or excludepattern:
                             m = match.match(repo.root, '', None,
                                 includepattern, excludepattern)
                         changedfiles = list([f for f in changedfiles if not m(f)])
                     return orig(self, changedfiles, linknodes, commonrevs, source,
                                 *args, **kwargs)
                 extensions.wrapfunction(
                     changegroup.cgpacker, 'generatefiles', generatefiles)
             onetime = False
             def onetimesetup(ui):
                 """Configures the wireprotocol for both clients and servers.
                 """
                 global onetime
                 if onetime:
                     return
                 onetime = True
                 # support file content requests
                 wireprotov1server.wireprotocommand(
                     'x_rfl_getflogheads', 'path', permission='pull')(getflogheads)
                 wireprotov1server.wireprotocommand(
                     'x_rfl_getfiles', '', permission='pull')(getfiles)
                 wireprotov1server.wireprotocommand(
                     'x_rfl_getfile', 'file node', permission='pull')(getfile)
                 class streamstate(object):
                     match = None
                     shallowremote = False
                     noflatmf = False
                 state = streamstate()
                 def stream_out_shallow(repo, proto, other):
                     includepattern = None
                     excludepattern = None
                     raw = other.get('includepattern')
                     if raw:
                         includepattern = raw.split('\0')
                     raw = other.get('excludepattern')
                     if raw:
                         excludepattern = raw.split('\0')
                     oldshallow = state.shallowremote
                     oldmatch = state.match
                     oldnoflatmf = state.noflatmf
                     try:
                         state.shallowremote = True
                         state.match = match.always(repo.root, '')
                         state.noflatmf = other.get('noflatmanifest') == 'True'
                         if includepattern or excludepattern:
                             state.match = match.match(repo.root, '', None,
                                 includepattern, excludepattern)
                         streamres = wireprotov1server.stream(repo, proto)
                         # Force the first value to execute, so the file list is computed
                         # within the try/finally scope
                         first = next(streamres.gen)
                         second = next(streamres.gen)
                         def gen():
                             yield first
                             yield second
                             for value in streamres.gen:
                                 yield value
                         return wireprototypes.streamres(gen())
                     finally:
                         state.shallowremote = oldshallow
                         state.match = oldmatch
                         state.noflatmf = oldnoflatmf
                 wireprotov1server.commands['stream_out_shallow'] = (stream_out_shallow, '*')
                 # don't clone filelogs to shallow clients
                 def _walkstreamfiles(orig, repo, matcher=None):
                     if state.shallowremote:
                         # if we are shallow ourselves, stream our local commits
                         if shallowutil.isenabled(repo):
                             striplen = len(repo.store.path) + 1
                             readdir = repo.store.rawvfs.readdir
                             visit = [os.path.join(repo.store.path, 'data')]
                             while visit:
                                 p = visit.pop()
                                 for f, kind, st in readdir(p, stat=True):
                                     fp = p + '/' + f
                                     if kind == stat.S_IFREG:
                                         if not fp.endswith('.i') and not fp.endswith('.d'):
                                             n = util.pconvert(fp[striplen:])
                                             yield (store.decodedir(n), n, st.st_size)
                                     if kind == stat.S_IFDIR:
                                         visit.append(fp)
                         if 'treemanifest' in repo.requirements:
                             for (u, e, s) in repo.store.datafiles():
                                 if (u.startswith('meta/') and
                                     (u.endswith('.i') or u.endswith('.d'))):
                                     yield (u, e, s)
                         # Return .d and .i files that do not match the shallow pattern
                         match = state.match
                         if match and not match.always():
                             for (u, e, s) in repo.store.datafiles():
                                 f = u[5:-2]  # trim data/...  and .i/.d
                                 if not state.match(f):
                                     yield (u, e, s)
                         for x in repo.store.topfiles():
                             if state.noflatmf and x[0][:11] == '00manifest.':
                                 continue
                             yield x
                     elif shallowutil.isenabled(repo):
                         # don't allow cloning from a shallow repo to a full repo
                         # since it would require fetching every version of every
                         # file in order to create the revlogs.
                         raise error.Abort(_("Cannot clone from a shallow repo "
                                             "to a full repo."))
                     else:
                         for x in orig(repo, matcher):
                             yield x
                 extensions.wrapfunction(streamclone, '_walkstreamfiles', _walkstreamfiles)
                 # expose remotefilelog capabilities
                 def _capabilities(orig, repo, proto):
                     caps = orig(repo, proto)
                     if (shallowutil.isenabled(repo) or ui.configbool('remotefilelog',
                                                                      'server')):
                         if isinstance(proto, _sshv1server):
                             # legacy getfiles method which only works over ssh
                             caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
                         caps.append('x_rfl_getflogheads')
                         caps.append('x_rfl_getfile')
                     return caps
                 extensions.wrapfunction(wireprotov1server, '_capabilities', _capabilities)
                 def _adjustlinkrev(orig, self, *args, **kwargs):
                     # When generating file blobs, taking the real path is too slow on large
                     # repos, so force it to just return the linkrev directly.
                     repo = self._repo
                     if util.safehasattr(repo, 'forcelinkrev') and repo.forcelinkrev:
                         return self._filelog.linkrev(self._filelog.rev(self._filenode))
                     return orig(self, *args, **kwargs)
                 extensions.wrapfunction(
                     context.basefilectx, '_adjustlinkrev', _adjustlinkrev)
                 def _iscmd(orig, cmd):
                     if cmd == 'x_rfl_getfiles':
                         return False
                     return orig(cmd)
                 extensions.wrapfunction(wireprotoserver, 'iscmd', _iscmd)
             def _loadfileblob(repo, cachepath, path, node):
                 filecachepath = os.path.join(cachepath, path, hex(node))
                 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
                     filectx = repo.filectx(path, fileid=node)
                     if filectx.node() == nullid:
                         repo.changelog = changelog.changelog(repo.svfs)
                         filectx = repo.filectx(path, fileid=node)
                     text = createfileblob(filectx)
                     # TODO configurable compression engines
                     text = zlib.compress(text)
                     # everything should be user & group read/writable
                     oldumask = os.umask(0o002)
                     try:
                         dirname = os.path.dirname(filecachepath)
                         if not os.path.exists(dirname):
                             try:
                                 os.makedirs(dirname)
                             except OSError as ex:
                                 if ex.errno != errno.EEXIST:
                                     raise
                         f = None
                         try:
-                            f = util.atomictempfile(filecachepath, "w")
+                            f = util.atomictempfile(filecachepath, "wb")
                             f.write(text)
                         except (IOError, OSError):
                             # Don't abort if the user only has permission to read,
                             # and not write.
                             pass
                         finally:
                             if f:
                                 f.close()
                     finally:
                         os.umask(oldumask)
                 else:
-                    with open(filecachepath, "r") as f:
+                    with open(filecachepath, "rb") as f:
                         text = f.read()
                 return text
             def getflogheads(repo, proto, path):
                 """A server api for requesting a filelog's heads
                 """
                 flog = repo.file(path)
                 heads = flog.heads()
                 return '\n'.join((hex(head) for head in heads if head != nullid))
             def getfile(repo, proto, file, node):
                 """A server api for requesting a particular version of a file. Can be used
                 in batches to request many files at once. The return protocol is:
                 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
                 non-zero for an error.
                 data is a compressed blob with revlog flag and ancestors information. See
                 createfileblob for its content.
                 """
                 if shallowutil.isenabled(repo):
                     return '1\0' + _('cannot fetch remote files from shallow repo')
                 cachepath = repo.ui.config("remotefilelog", "servercachepath")
                 if not cachepath:
                     cachepath = os.path.join(repo.path, "remotefilelogcache")
                 node = bin(node.strip())
                 if node == nullid:
                     return '0\0'
                 return '0\0' + _loadfileblob(repo, cachepath, file, node)
             def getfiles(repo, proto):
                 """A server api for requesting particular versions of particular files.
                 """
                 if shallowutil.isenabled(repo):
                     raise error.Abort(_('cannot fetch remote files from shallow repo'))
                 if not isinstance(proto, _sshv1server):
                     raise error.Abort(_('cannot fetch remote files over non-ssh protocol'))
                 def streamer():
                     fin = proto._fin
                     cachepath = repo.ui.config("remotefilelog", "servercachepath")
                     if not cachepath:
                         cachepath = os.path.join(repo.path, "remotefilelogcache")
                     while True:
                         request = fin.readline()[:-1]
                         if not request:
                             break
                         node = bin(request[:40])
                         if node == nullid:
                             yield '0\n'
                             continue
                         path = request[40:]
                         text = _loadfileblob(repo, cachepath, path, node)
                         yield '%d\n%s' % (len(text), text)
                         # it would be better to only flush after processing a whole batch
                         # but currently we don't know if there are more requests coming
                         proto._fout.flush()
                 return wireprototypes.streamres(streamer())
             def createfileblob(filectx):
                 """
                 format:
                     v0:
                         str(len(rawtext)) + '\0' + rawtext + ancestortext
                     v1:
                         'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
                         metalist := metalist + '\n' + meta | meta
                         meta := sizemeta | flagmeta
                         sizemeta := METAKEYSIZE + str(len(rawtext))
                         flagmeta := METAKEYFLAG + str(flag)
                         note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
                         length of 1.
                 """
                 flog = filectx.filelog()
                 frev = filectx.filerev()
                 revlogflags = flog._revlog.flags(frev)
                 if revlogflags == 0:
                     # normal files
                     text = filectx.data()
                 else:
                     # lfs, read raw revision data
                     text = flog.revision(frev, raw=True)
                 repo = filectx._repo
                 ancestors = [filectx]
                 try:
                     repo.forcelinkrev = True
                     ancestors.extend([f for f in filectx.ancestors()])
                     ancestortext = ""
                     for ancestorctx in ancestors:
                         parents = ancestorctx.parents()
                         p1 = nullid
                         p2 = nullid
                         if len(parents) > 0:
                             p1 = parents[0].filenode()
                         if len(parents) > 1:
                             p2 = parents[1].filenode()
                         copyname = ""
                         rename = ancestorctx.renamed()
                         if rename:
                             copyname = rename[0]
                         linknode = ancestorctx.node()
                         ancestortext += "%s%s%s%s%s\0" % (
                             ancestorctx.filenode(), p1, p2, linknode,
                             copyname)
                 finally:
                     repo.forcelinkrev = False
                 header = shallowutil.buildfileblobheader(len(text), revlogflags)
                 return "%s\0%s%s" % (header, text, ancestortext)
             def gcserver(ui, repo):
                 if not repo.ui.configbool("remotefilelog", "server"):
                     return
                 neededfiles = set()
                 heads = repo.revs("heads(tip~25000:) - null")
                 cachepath = repo.vfs.join("remotefilelogcache")
                 for head in heads:
                     mf = repo[head].manifest()
                     for filename, filenode in mf.iteritems():
                         filecachepath = os.path.join(cachepath, filename, hex(filenode))
                         neededfiles.add(filecachepath)
                 # delete unneeded older files
                 days = repo.ui.configint("remotefilelog", "serverexpiration")
                 expiration = time.time() - (days * 24 * 60 * 60)
                 _removing = _("removing old server cache")
                 count = 0
                 ui.progress(_removing, count, unit="files")
                 for root, dirs, files in os.walk(cachepath):
                     for file in files:
                         filepath = os.path.join(root, file)
                         count += 1
                         ui.progress(_removing, count, unit="files")
                         if filepath in neededfiles:
                             continue
                         stat = os.stat(filepath)
                         if stat.st_mtime < expiration:
                             os.remove(filepath)
                 ui.progress(_removing, None)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages