upstream/mercurial-mirror Commit - r40562:1419f780

remotefilelog: fix various minor py3 problems...

Augie Fackler -

r40562:1419f780 default

parent child

hgext/remotefilelog/basepack.py

0 +2 -2

             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import mmap
             import os
             import struct
             import time
             from mercurial.i18n import _
             from mercurial import (
                 policy,
                 pycompat,
                 util,
                 vfs as vfsmod,
             )
             from . import shallowutil
             osutil = policy.importmod(r'osutil')
             # The pack version supported by this implementation. This will need to be
             # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
             # changing any of the int sizes, changing the delta algorithm, etc.
             PACKVERSIONSIZE = 1
             INDEXVERSIONSIZE = 2
             FANOUTSTART = INDEXVERSIONSIZE
             # Constant that indicates a fanout table entry hasn't been filled in. (This does
             # not get serialized)
             EMPTYFANOUT = -1
             # The fanout prefix is the number of bytes that can be addressed by the fanout
             # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
             # look in the fanout table (which will be 2^8 entries long).
             SMALLFANOUTPREFIX = 1
             LARGEFANOUTPREFIX = 2
             # The number of entries in the index at which point we switch to a large fanout.
             # It is chosen to balance the linear scan through a sparse fanout, with the
             # size of the bisect in actual index.
             # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
             # bisect) with (8 step fanout scan + 1 step bisect)
             # 5 step bisect = log(2^16 / 8 / 255)  # fanout
             # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
             SMALLFANOUTCUTOFF = 2**16 / 8
             # The amount of time to wait between checking for new packs. This prevents an
             # exception when data is moved to a new pack after the process has already
             # loaded the pack list.
             REFRESHRATE = 0.1
             if pycompat.isposix:
                 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                 # The 'e' flag will be ignored on older versions of glibc.
                 PACKOPENMODE = 'rbe'
             else:
                 PACKOPENMODE = 'rb'
             class _cachebackedpacks(object):
                 def __init__(self, packs, cachesize):
                     self._packs = set(packs)
                     self._lrucache = util.lrucachedict(cachesize)
                     self._lastpack = None
                     # Avoid cold start of the cache by populating the most recent packs
                     # in the cache.
                     for i in reversed(range(min(cachesize, len(packs)))):
                         self._movetofront(packs[i])
                 def _movetofront(self, pack):
                     # This effectively makes pack the first entry in the cache.
                     self._lrucache[pack] = True
                 def _registerlastpackusage(self):
                     if self._lastpack is not None:
                         self._movetofront(self._lastpack)
                         self._lastpack = None
                 def add(self, pack):
                     self._registerlastpackusage()
                     # This method will mostly be called when packs are not in cache.
                     # Therefore, adding pack to the cache.
                     self._movetofront(pack)
                     self._packs.add(pack)
                 def __iter__(self):
                     self._registerlastpackusage()
                     # Cache iteration is based on LRU.
                     for pack in self._lrucache:
                         self._lastpack = pack
                         yield pack
                     cachedpacks = set(pack for pack in self._lrucache)
                     # Yield for paths not in the cache.
                     for pack in self._packs - cachedpacks:
                         self._lastpack = pack
                         yield pack
                     # Data not found in any pack.
                     self._lastpack = None
             class basepackstore(object):
                 # Default cache size limit for the pack files.
                 DEFAULTCACHESIZE = 100
                 def __init__(self, ui, path):
                     self.ui = ui
                     self.path = path
                     # lastrefesh is 0 so we'll immediately check for new packs on the first
                     # failure.
                     self.lastrefresh = 0
                     packs = []
                     for filepath, __, __ in self._getavailablepackfilessorted():
                         try:
                             pack = self.getpack(filepath)
                         except Exception as ex:
                             # An exception may be thrown if the pack file is corrupted
                             # somehow.  Log a warning but keep going in this case, just
                             # skipping this pack file.
                             #
                             # If this is an ENOENT error then don't even bother logging.
                             # Someone could have removed the file since we retrieved the
                             # list of paths.
                             if getattr(ex, 'errno', None) != errno.ENOENT:
                                 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
                             continue
                         packs.append(pack)
                     self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                 def _getavailablepackfiles(self):
                     """For each pack file (a index/data file combo), yields:
                       (full path without extension, mtime, size)
                     mtime will be the mtime of the index/data file (whichever is newer)
                     size is the combined size of index/data file
                     """
                     indexsuffixlen = len(self.INDEXSUFFIX)
                     packsuffixlen = len(self.PACKSUFFIX)
                     ids = set()
                     sizes = collections.defaultdict(lambda: 0)
                     mtimes = collections.defaultdict(lambda: [])
                     try:
                         for filename, type, stat in osutil.listdir(self.path, stat=True):
                             id = None
                             if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                 id = filename[:-indexsuffixlen]
                             elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                 id = filename[:-packsuffixlen]
                             # Since we expect to have two files corresponding to each ID
                             # (the index file and the pack file), we can yield once we see
                             # it twice.
                             if id:
                                 sizes[id] += stat.st_size # Sum both files' sizes together
                                 mtimes[id].append(stat.st_mtime)
                                 if id in ids:
                                     yield (os.path.join(self.path, id), max(mtimes[id]),
                                         sizes[id])
                                 else:
                                     ids.add(id)
                     except OSError as ex:
                         if ex.errno != errno.ENOENT:
                             raise
                 def _getavailablepackfilessorted(self):
                     """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                     yielding newest files first.
                     This is desirable, since it is more likely newer packfiles have more
                     desirable data.
                     """
                     files = []
                     for path, mtime, size in self._getavailablepackfiles():
                         files.append((mtime, size, path))
                     files = sorted(files, reverse=True)
                     for mtime, size, path in files:
                         yield path, mtime, size
                 def gettotalsizeandcount(self):
                     """Returns the total disk size (in bytes) of all the pack files in
                     this store, and the count of pack files.
                     (This might be smaller than the total size of the ``self.path``
                     directory, since this only considers fuly-writen pack files, and not
                     temporary files or other detritus on the directory.)
                     """
                     totalsize = 0
                     count = 0
                     for __, __, size in self._getavailablepackfiles():
                         totalsize += size
                         count += 1
                     return totalsize, count
                 def getmetrics(self):
                     """Returns metrics on the state of this store."""
                     size, count = self.gettotalsizeandcount()
                     return {
                         'numpacks': count,
                         'totalpacksize': size,
                     }
                 def getpack(self, path):
                     raise NotImplementedError()
                 def getmissing(self, keys):
                     missing = keys
                     for pack in self.packs:
                         missing = pack.getmissing(missing)
                         # Ensures better performance of the cache by keeping the most
                         # recently accessed pack at the beginning in subsequent iterations.
                         if not missing:
                             return missing
                     if missing:
                         for pack in self.refresh():
                             missing = pack.getmissing(missing)
                     return missing
                 def markledger(self, ledger, options=None):
                     for pack in self.packs:
                         pack.markledger(ledger)
                 def markforrefresh(self):
                     """Tells the store that there may be new pack files, so the next time it
                     has a lookup miss it should check for new files."""
                     self.lastrefresh = 0
                 def refresh(self):
                     """Checks for any new packs on disk, adds them to the main pack list,
                     and returns a list of just the new packs."""
                     now = time.time()
                     # If we experience a lot of misses (like in the case of getmissing() on
                     # new objects), let's only actually check disk for new stuff every once
                     # in a while. Generally this code path should only ever matter when a
                     # repack is going on in the background, and that should be pretty rare
                     # to have that happen twice in quick succession.
                     newpacks = []
                     if now > self.lastrefresh + REFRESHRATE:
                         self.lastrefresh = now
                         previous = set(p.path for p in self.packs)
                         for filepath, __, __ in self._getavailablepackfilessorted():
                             if filepath not in previous:
                                 newpack = self.getpack(filepath)
                                 newpacks.append(newpack)
                                 self.packs.add(newpack)
                     return newpacks
             class versionmixin(object):
                 # Mix-in for classes with multiple supported versions
                 VERSION = None
                 SUPPORTED_VERSIONS = [2]
                 def _checkversion(self, version):
                     if version in self.SUPPORTED_VERSIONS:
                         if self.VERSION is None:
                             # only affect this instance
                             self.VERSION = version
                         elif self.VERSION != version:
                             raise RuntimeError('inconsistent version: %s' % version)
                     else:
                         raise RuntimeError('unsupported version: %s' % version)
             class basepack(versionmixin):
                 # The maximum amount we should read via mmap before remmaping so the old
                 # pages can be released (100MB)
                 MAXPAGEDIN = 100 * 1024**2
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     self.path = path
                     self.packpath = path + self.PACKSUFFIX
                     self.indexpath = path + self.INDEXSUFFIX
                     self.indexsize = os.stat(self.indexpath).st_size
                     self.datasize = os.stat(self.packpath).st_size
                     self._index = None
                     self._data = None
                     self.freememory() # initialize the mmap
                     version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
                     self._checkversion(version)
                     version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
                     self._checkversion(version)
                     if 0b10000000 & config:
                         self.params = indexparams(LARGEFANOUTPREFIX, version)
                     else:
                         self.params = indexparams(SMALLFANOUTPREFIX, version)
                 @util.propertycache
                 def _fanouttable(self):
                     params = self.params
                     rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
                     fanouttable = []
                     for i in pycompat.xrange(0, params.fanoutcount):
                         loc = i * 4
                         fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
                         fanouttable.append(fanoutentry)
                     return fanouttable
                 @util.propertycache
                 def _indexend(self):
                     nodecount = struct.unpack_from('!Q', self._index,
                                                    self.params.indexstart - 8)[0]
                     return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                 def freememory(self):
                     """Unmap and remap the memory to free it up after known expensive
                     operations. Return True if self._data and self._index were reloaded.
                     """
                     if self._index:
                         if self._pagedin < self.MAXPAGEDIN:
                             return False
                         self._index.close()
                         self._data.close()
                     # TODO: use an opener/vfs to access these paths
                     with open(self.indexpath, PACKOPENMODE) as indexfp:
                         # memory-map the file, size 0 means whole file
                         self._index = mmap.mmap(indexfp.fileno(), 0,
                                                 access=mmap.ACCESS_READ)
                     with open(self.packpath, PACKOPENMODE) as datafp:
                         self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                     self._pagedin = 0
                     return True
                 def getmissing(self, keys):
                     raise NotImplementedError()
                 def markledger(self, ledger, options=None):
                     raise NotImplementedError()
                 def cleanup(self, ledger):
                     raise NotImplementedError()
                 def __iter__(self):
                     raise NotImplementedError()
                 def iterentries(self):
                     raise NotImplementedError()
             class mutablebasepack(versionmixin):
                 def __init__(self, ui, packdir, version=2):
                     self._checkversion(version)
                     # TODO(augie): make this configurable
                     self._compressor = 'GZ'
                     opener = vfsmod.vfs(packdir)
                     opener.createmode = 0o444
                     self.opener = opener
                     self.entries = {}
                     shallowutil.mkstickygroupdir(ui, packdir)
                     self.packfp, self.packpath = opener.mkstemp(
                         suffix=self.PACKSUFFIX + '-tmp')
                     self.idxfp, self.idxpath = opener.mkstemp(
                         suffix=self.INDEXSUFFIX + '-tmp')
                     self.packfp = os.fdopen(self.packfp, 'w+')
                     self.idxfp = os.fdopen(self.idxfp, 'w+')
                     self.sha = hashlib.sha1()
                     self._closed = False
                     # The opener provides no way of doing permission fixup on files created
                     # via mkstemp, so we must fix it ourselves. We can probably fix this
                     # upstream in vfs.mkstemp so we don't need to use the private method.
                     opener._fixfilemode(opener.join(self.packpath))
                     opener._fixfilemode(opener.join(self.idxpath))
                     # Write header
                     # TODO: make it extensible (ex: allow specifying compression algorithm,
                     # a flexible key/value header, delta algorithm, fanout size, etc)
                     versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
                     self.writeraw(versionbuf)
                 def __enter__(self):
                     return self
                 def __exit__(self, exc_type, exc_value, traceback):
                     if exc_type is None:
                         self.close()
                     else:
                         self.abort()
                 def abort(self):
                     # Unclean exit
                     self._cleantemppacks()
                 def writeraw(self, data):
                     self.packfp.write(data)
                     self.sha.update(data)
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     try:
                         sha = self.sha.hexdigest()
                         self.packfp.close()
                         self.writeindex()
                         if len(self.entries) == 0:
                             # Empty pack
                             self._cleantemppacks()
                             self._closed = True
                             return None
                         self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                         try:
                             self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                         except Exception as ex:
                             try:
                                 self.opener.unlink(sha + self.PACKSUFFIX)
                             except Exception:
                                 pass
                             # Throw exception 'ex' explicitly since a normal 'raise' would
                             # potentially throw an exception from the unlink cleanup.
                             raise ex
                     except Exception:
                         # Clean up temp packs in all exception cases
                         self._cleantemppacks()
                         raise
                     self._closed = True
                     result = self.opener.join(sha)
                     if ledger:
                         ledger.addcreated(result)
                     return result
                 def _cleantemppacks(self):
                     try:
                         self.opener.unlink(self.packpath)
                     except Exception:
                         pass
                     try:
                         self.opener.unlink(self.idxpath)
                     except Exception:
                         pass
                 def writeindex(self):
                     rawindex = ''
                     largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                     if largefanout:
                         params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                     else:
                         params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                     fanouttable = [EMPTYFANOUT] * params.fanoutcount
                     # Precompute the location of each entry
                     locations = {}
                     count = 0
                     for node in sorted(self.entries.iterkeys()):
                         location = count * self.INDEXENTRYLENGTH
                         locations[node] = location
                         count += 1
                         # Must use [0] on the unpack result since it's always a tuple.
                         fanoutkey = struct.unpack(params.fanoutstruct,
                                                   node[:params.fanoutprefix])[0]
                         if fanouttable[fanoutkey] == EMPTYFANOUT:
                             fanouttable[fanoutkey] = location
                     rawfanouttable = ''
                     last = 0
                     for offset in fanouttable:
                         offset = offset if offset != EMPTYFANOUT else last
                         last = offset
                         rawfanouttable += struct.pack('!I', offset)
                     rawentrieslength = struct.pack('!Q', len(self.entries))
                     # The index offset is the it's location in the file. So after the 2 byte
                     # header and the fanouttable.
                     rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                     self._writeheader(params)
                     self.idxfp.write(rawfanouttable)
                     self.idxfp.write(rawentrieslength)
                     self.idxfp.write(rawindex)
                     self.idxfp.close()
                 def createindex(self, nodelocations):
                     raise NotImplementedError()
                 def _writeheader(self, indexparams):
                     # Index header
                     #    <version: 1 byte>
                     #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                     #    <unused: 7 bit> # future use (compression, delta format, etc)
                     config = 0
                     if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                         config = 0b10000000
                     self.idxfp.write(struct.pack('!BB', self.VERSION, config))
             class indexparams(object):
-                __slots__ = ('fanoutprefix', 'fanoutstruct', 'fanoutcount', 'fanoutsize',
+                __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
-                             'indexstart')
+                             r'fanoutsize', r'indexstart')
                 def __init__(self, prefixsize, version):
                     self.fanoutprefix = prefixsize
                     # The struct pack format for fanout table location (i.e. the format that
                     # converts the node prefix into an integer location in the fanout
                     # table).
                     if prefixsize == SMALLFANOUTPREFIX:
                         self.fanoutstruct = '!B'
                     elif prefixsize == LARGEFANOUTPREFIX:
                         self.fanoutstruct = '!H'
                     else:
                         raise ValueError("invalid fanout prefix size: %s" % prefixsize)
                     # The number of fanout table entries
                     self.fanoutcount = 2**(prefixsize * 8)
                     # The total bytes used by the fanout table
                     self.fanoutsize = self.fanoutcount * 4
                     self.indexstart = FANOUTSTART + self.fanoutsize
                     # Skip the index length
                     self.indexstart += 8

hgext/remotefilelog/repack.py

0 +2 -2

             from __future__ import absolute_import
             import os
             import time
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 short,
             )
             from mercurial import (
                 encoding,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 scmutil,
                 util,
                 vfs,
             )
             from mercurial.utils import procutil
             from . import (
                 constants,
                 contentstore,
                 datapack,
                 extutil,
                 historypack,
                 metadatastore,
                 shallowutil,
             )
             osutil = policy.importmod(r'osutil')
             class RepackAlreadyRunning(error.Abort):
                 pass
             if util.safehasattr(util, '_hgexecutable'):
                 # Before 5be286db
                 _hgexecutable = util.hgexecutable
             else:
                 from mercurial.utils import procutil
                 _hgexecutable = procutil.hgexecutable
             def backgroundrepack(repo, incremental=True, packsonly=False):
                 cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
                 msg = _("(running background repack)\n")
                 if incremental:
                     cmd.append('--incremental')
                     msg = _("(running background incremental repack)\n")
                 if packsonly:
                     cmd.append('--packsonly')
                 repo.ui.warn(msg)
                 procutil.runbgcommand(cmd, encoding.environ)
             def fullrepack(repo, options=None):
                 """If ``packsonly`` is True, stores creating only loose objects are skipped.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     datasource = contentstore.unioncontentstore(
                         *repo.shareddatastores)
                     historysource = metadatastore.unionmetadatastore(
                         *repo.sharedhistorystores,
                         allowincomplete=True)
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _runrepack(repo, datasource, historysource, packpath,
                                constants.FILEPACK_CATEGORY, options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     datasource = contentstore.unioncontentstore(*sdstores)
                     historysource = metadatastore.unionmetadatastore(
                                     *shstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, spackpath,
                                constants.TREEPACK_CATEGORY, options=options)
                     # Repack the local manifest store
                     datasource = contentstore.unioncontentstore(
                                     *ldstores,
                                     allowincomplete=True)
                     historysource = metadatastore.unionmetadatastore(
                                     *lhstores,
                                     allowincomplete=True)
                     _runrepack(repo, datasource, historysource, lpackpath,
                                constants.TREEPACK_CATEGORY, options=options)
             def incrementalrepack(repo, options=None):
                 """This repacks the repo by looking at the distribution of pack files in the
                 repo and performing the most minimal repack to keep the repo in good shape.
                 """
                 if util.safehasattr(repo, 'shareddatastores'):
                     packpath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     _incrementalrepack(repo,
                                        repo.shareddatastores,
                                        repo.sharedhistorystores,
                                        packpath,
                                        constants.FILEPACK_CATEGORY,
                                        options=options)
                 if util.safehasattr(repo.manifestlog, 'datastore'):
                     localdata, shareddata = _getmanifeststores(repo)
                     lpackpath, ldstores, lhstores = localdata
                     spackpath, sdstores, shstores = shareddata
                     # Repack the shared manifest store
                     _incrementalrepack(repo,
                                        sdstores,
                                        shstores,
                                        spackpath,
                                        constants.TREEPACK_CATEGORY,
                                        options=options)
                     # Repack the local manifest store
                     _incrementalrepack(repo,
                                        ldstores,
                                        lhstores,
                                        lpackpath,
                                        constants.TREEPACK_CATEGORY,
                                        allowincompletedata=True,
                                        options=options)
             def _getmanifeststores(repo):
                 shareddatastores = repo.manifestlog.shareddatastores
                 localdatastores = repo.manifestlog.localdatastores
                 sharedhistorystores = repo.manifestlog.sharedhistorystores
                 localhistorystores = repo.manifestlog.localhistorystores
                 sharedpackpath = shallowutil.getcachepackpath(repo,
                                                         constants.TREEPACK_CATEGORY)
                 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
                                                         constants.TREEPACK_CATEGORY)
                 return ((localpackpath, localdatastores, localhistorystores),
                         (sharedpackpath, shareddatastores, sharedhistorystores))
             def _topacks(packpath, files, constructor):
                 paths = list(os.path.join(packpath, p) for p in files)
                 packs = list(constructor(p) for p in paths)
                 return packs
             def _deletebigpacks(repo, folder, files):
                 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
                 Returns ``files` with the removed files omitted."""
                 maxsize = repo.ui.configbytes("packs", "maxpacksize")
                 if maxsize <= 0:
                     return files
                 # This only considers datapacks today, but we could broaden it to include
                 # historypacks.
                 VALIDEXTS = [".datapack", ".dataidx"]
                 # Either an oversize index or datapack will trigger cleanup of the whole
                 # pack:
                 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
                     if (stat.st_size > maxsize and (os.path.splitext(path)[1]
                                                     in VALIDEXTS))])
                 for rootfname in oversized:
                     rootpath = os.path.join(folder, rootfname)
                     for ext in VALIDEXTS:
                         path = rootpath + ext
                         repo.ui.debug('removing oversize packfile %s (%s)\n' %
                                       (path, util.bytecount(os.stat(path).st_size)))
                         os.unlink(path)
                 return [row for row in files if os.path.basename(row[0]) not in oversized]
             def _incrementalrepack(repo, datastore, historystore, packpath, category,
                     allowincompletedata=False, options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 files = osutil.listdir(packpath, stat=True)
                 files = _deletebigpacks(repo, packpath, files)
                 datapacks = _topacks(packpath,
                     _computeincrementaldatapack(repo.ui, files),
                     datapack.datapack)
                 datapacks.extend(s for s in datastore
                                  if not isinstance(s, datapack.datapackstore))
                 historypacks = _topacks(packpath,
                     _computeincrementalhistorypack(repo.ui, files),
                     historypack.historypack)
                 historypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 # ``allhistory{files,packs}`` contains all known history packs, even ones we
                 # don't plan to repack. They are used during the datapack repack to ensure
                 # good ordering of nodes.
                 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
                                         historypack.INDEXSUFFIX)
                 allhistorypacks = _topacks(packpath,
                     (f for f, mode, stat in allhistoryfiles),
                     historypack.historypack)
                 allhistorypacks.extend(s for s in historystore
                                     if not isinstance(s, historypack.historypackstore))
                 _runrepack(repo,
                            contentstore.unioncontentstore(
                                *datapacks,
                                allowincomplete=allowincompletedata),
                            metadatastore.unionmetadatastore(
                                *historypacks,
                                allowincomplete=True),
                            packpath, category,
                            fullhistory=metadatastore.unionmetadatastore(
                                *allhistorypacks,
                                allowincomplete=True),
                             options=options)
             def _computeincrementaldatapack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'data.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'data.generations'),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'data.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'data.repackmaxpacksize'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'data.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _computeincrementalhistorypack(ui, files):
                 opts = {
                     'gencountlimit' : ui.configint(
                         'remotefilelog', 'history.gencountlimit'),
                     'generations' : ui.configlist(
                         'remotefilelog', 'history.generations', ['100MB']),
                     'maxrepackpacks' : ui.configint(
                         'remotefilelog', 'history.maxrepackpacks'),
                     'repackmaxpacksize' : ui.configbytes(
                         'remotefilelog', 'history.repackmaxpacksize', '400MB'),
                     'repacksizelimit' : ui.configbytes(
                         'remotefilelog', 'history.repacksizelimit'),
                 }
                 packfiles = _allpackfileswithsuffix(
                     files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
                 return _computeincrementalpack(packfiles, opts)
             def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
                 result = []
                 fileset = set(fn for fn, mode, stat in files)
                 for filename, mode, stat in files:
                     if not filename.endswith(packsuffix):
                         continue
                     prefix = filename[:-len(packsuffix)]
                     # Don't process a pack if it doesn't have an index.
                     if (prefix + indexsuffix) not in fileset:
                         continue
                     result.append((prefix, mode, stat))
                 return result
             def _computeincrementalpack(files, opts):
                 """Given a set of pack files along with the configuration options, this
                 function computes the list of files that should be packed as part of an
                 incremental repack.
                 It tries to strike a balance between keeping incremental repacks cheap (i.e.
                 packing small things when possible, and rolling the packs up to the big ones
                 over time).
                 """
                 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
                                             reverse=True))
                 limits.append(0)
                 # Group the packs by generation (i.e. by size)
                 generations = []
                 for i in pycompat.xrange(len(limits)):
                     generations.append([])
                 sizes = {}
                 for prefix, mode, stat in files:
                     size = stat.st_size
                     if size > opts['repackmaxpacksize']:
                         continue
                     sizes[prefix] = size
                     for i, limit in enumerate(limits):
                         if size > limit:
                             generations[i].append(prefix)
                             break
                 # Steps for picking what packs to repack:
                 # 1. Pick the largest generation with > gencountlimit pack files.
                 # 2. Take the smallest three packs.
                 # 3. While total-size-of-packs < repacksizelimit: add another pack
                 # Find the largest generation with more than gencountlimit packs
                 genpacks = []
                 for i, limit in enumerate(limits):
                     if len(generations[i]) > opts['gencountlimit']:
                         # Sort to be smallest last, for easy popping later
                         genpacks.extend(sorted(generations[i], reverse=True,
                                                key=lambda x: sizes[x]))
                         break
                 # Take as many packs from the generation as we can
                 chosenpacks = genpacks[-3:]
                 genpacks = genpacks[:-3]
                 repacksize = sum(sizes[n] for n in chosenpacks)
                 while (repacksize < opts['repacksizelimit'] and genpacks and
                        len(chosenpacks) < opts['maxrepackpacks']):
                     chosenpacks.append(genpacks.pop())
                     repacksize += sizes[chosenpacks[-1]]
                 return chosenpacks
             def _runrepack(repo, data, history, packpath, category, fullhistory=None,
                            options=None):
                 shallowutil.mkstickygroupdir(repo.ui, packpath)
                 def isold(repo, filename, node):
                     """Check if the file node is older than a limit.
                     Unless a limit is specified in the config the default limit is taken.
                     """
                     filectx = repo.filectx(filename, fileid=node)
                     filetime = repo[filectx.linkrev()].date()
                     ttl = repo.ui.configint('remotefilelog', 'nodettl')
                     limit = time.time() - ttl
                     return filetime[0] < limit
                 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
                 if not fullhistory:
                     fullhistory = history
                 packer = repacker(repo, data, history, fullhistory, category,
                                   gc=garbagecollect, isold=isold, options=options)
                 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
                     with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
                         try:
                             packer.run(dpack, hpack)
                         except error.LockHeld:
                             raise RepackAlreadyRunning(_("skipping repack - another repack "
                                                          "is already running"))
             def keepset(repo, keyfn, lastkeepkeys=None):
                 """Computes a keepset which is not garbage collected.
                 'keyfn' is a function that maps filename, node to a unique key.
                 'lastkeepkeys' is an optional argument and if provided the keepset
                 function updates lastkeepkeys with more keys and returns the result.
                 """
                 if not lastkeepkeys:
                     keepkeys = set()
                 else:
                     keepkeys = lastkeepkeys
                 # We want to keep:
                 # 1. Working copy parent
                 # 2. Draft commits
                 # 3. Parents of draft commits
                 # 4. Pullprefetch and bgprefetchrevs revsets if specified
                 revs = ['.', 'draft()', 'parents(draft())']
                 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
                 if prefetchrevs:
                     revs.append('(%s)' % prefetchrevs)
                 revs = '+'.join(revs)
                 revs = ['sort((%s), "topo")' % revs]
                 keep = scmutil.revrange(repo, revs)
                 processed = set()
                 lastmanifest = None
                 # process the commits in toposorted order starting from the oldest
                 for r in reversed(keep._list):
                     if repo[r].p1().rev() in processed:
                         # if the direct parent has already been processed
                         # then we only need to process the delta
                         m = repo[r].manifestctx().readdelta()
                     else:
                         # otherwise take the manifest and diff it
                         # with the previous manifest if one exists
                         if lastmanifest:
                             m = repo[r].manifest().diff(lastmanifest)
                         else:
                             m = repo[r].manifest()
                     lastmanifest = repo[r].manifest()
                     processed.add(r)
                     # populate keepkeys with keys from the current manifest
                     if type(m) is dict:
                         # m is a result of diff of two manifests and is a dictionary that
                         # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
                         for filename, diff in m.iteritems():
                             if diff[0][0] is not None:
                                 keepkeys.add(keyfn(filename, diff[0][0]))
                     else:
                         # m is a manifest object
                         for filename, filenode in m.iteritems():
                             keepkeys.add(keyfn(filename, filenode))
                 return keepkeys
             class repacker(object):
                 """Class for orchestrating the repack of data and history information into a
                 new format.
                 """
                 def __init__(self, repo, data, history, fullhistory, category, gc=False,
                              isold=None, options=None):
                     self.repo = repo
                     self.data = data
                     self.history = history
                     self.fullhistory = fullhistory
                     self.unit = constants.getunits(category)
                     self.garbagecollect = gc
                     self.options = options
                     if self.garbagecollect:
                         if not isold:
                             raise ValueError("Function 'isold' is not properly specified")
                         # use (filename, node) tuple as a keepset key
                         self.keepkeys = keepset(repo, lambda f, n : (f, n))
                         self.isold = isold
                 def run(self, targetdata, targethistory):
                     ledger = repackledger()
                     with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
                                        _('repacking %s') % self.repo.origroot, timeout=0):
                         self.repo.hook('prerepack')
                         # Populate ledger from source
                         self.data.markledger(ledger, options=self.options)
                         self.history.markledger(ledger, options=self.options)
                         # Run repack
                         self.repackdata(ledger, targetdata)
                         self.repackhistory(ledger, targethistory)
                         # Call cleanup on each source
                         for source in ledger.sources:
                             source.cleanup(ledger)
                 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
                     """Reorderes ``orphans`` into a single chain inside ``nodes`` and
                     ``deltabases``.
                     We often have orphan entries (nodes without a base that aren't
                     referenced by other nodes -- i.e., part of a chain) due to gaps in
                     history. Rather than store them as individual fulltexts, we prefer to
                     insert them as one chain sorted by size.
                     """
                     if not orphans:
                         return nodes
                     def getsize(node, default=0):
                         meta = self.data.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             return meta[constants.METAKEYSIZE]
                         else:
                             return default
                     # Sort orphans by size; biggest first is preferred, since it's more
                     # likely to be the newest version assuming files grow over time.
                     # (Sort by node first to ensure the sort is stable.)
                     orphans = sorted(orphans)
                     orphans = list(sorted(orphans, key=getsize, reverse=True))
                     if ui.debugflag:
                         ui.debug("%s: orphan chain: %s\n" % (filename,
                             ", ".join([short(s) for s in orphans])))
                     # Create one contiguous chain and reassign deltabases.
                     for i, node in enumerate(orphans):
                         if i == 0:
                             deltabases[node] = (nullid, 0)
                         else:
                             parent = orphans[i - 1]
                             deltabases[node] = (parent, deltabases[parent][1] + 1)
                     nodes = filter(lambda node: node not in orphans, nodes)
                     nodes += orphans
                     return nodes
                 def repackdata(self, ledger, target):
                     ui = self.repo.ui
                     maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.datasource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ui.progress(_("repacking data"), count, unit=self.unit,
                                     total=len(byfile))
                         ancestors = {}
                         nodes = list(node for node in entries.iterkeys())
                         nohistory = []
                         for i, node in enumerate(nodes):
                             if node in ancestors:
                                 continue
                             ui.progress(_("building history"), i, unit='nodes',
                                         total=len(nodes))
                             try:
                                 ancestors.update(self.fullhistory.getancestors(filename,
                                     node, known=ancestors))
                             except KeyError:
                                 # Since we're packing data entries, we may not have the
                                 # corresponding history entries for them. It's not a big
                                 # deal, but the entries won't be delta'd perfectly.
                                 nohistory.append(node)
                         ui.progress(_("building history"), None)
                         # Order the nodes children first, so we can produce reverse deltas
                         orderednodes = list(reversed(self._toposort(ancestors)))
                         if len(nohistory) > 0:
                             ui.debug('repackdata: %d nodes without history\n' %
                                      len(nohistory))
                         orderednodes.extend(sorted(nohistory))
                         # Filter orderednodes to just the nodes we want to serialize (it
                         # currently also has the edge nodes' ancestors).
                         orderednodes = filter(lambda node: node in nodes, orderednodes)
                         # Garbage collect old nodes:
                         if self.garbagecollect:
                             neworderednodes = []
                             for node in orderednodes:
                                 # If the node is old and is not in the keepset, we skip it,
                                 # and mark as garbage collected
                                 if ((filename, node) not in self.keepkeys and
                                     self.isold(self.repo, filename, node)):
                                     entries[node].gced = True
                                     continue
                                 neworderednodes.append(node)
                             orderednodes = neworderednodes
                         # Compute delta bases for nodes:
                         deltabases = {}
                         nobase = set()
                         referenced = set()
                         nodes = set(nodes)
                         for i, node in enumerate(orderednodes):
                             ui.progress(_("processing nodes"), i, unit='nodes',
                                         total=len(orderednodes))
                             # Find delta base
                             # TODO: allow delta'ing against most recent descendant instead
                             # of immediate child
                             deltatuple = deltabases.get(node, None)
                             if deltatuple is None:
                                 deltabase, chainlen = nullid, 0
                                 deltabases[node] = (nullid, 0)
                                 nobase.add(node)
                             else:
                                 deltabase, chainlen = deltatuple
                                 referenced.add(deltabase)
                             # Use available ancestor information to inform our delta choices
                             ancestorinfo = ancestors.get(node)
                             if ancestorinfo:
                                 p1, p2, linknode, copyfrom = ancestorinfo
                                 # The presence of copyfrom means we're at a point where the
                                 # file was copied from elsewhere. So don't attempt to do any
                                 # deltas with the other file.
                                 if copyfrom:
                                     p1 = nullid
                                 if chainlen < maxchainlen:
                                     # Record this child as the delta base for its parents.
                                     # This may be non optimal, since the parents may have
                                     # many children, and this will only choose the last one.
                                     # TODO: record all children and try all deltas to find
                                     # best
                                     if p1 != nullid:
                                         deltabases[p1] = (node, chainlen + 1)
                                     if p2 != nullid:
                                         deltabases[p2] = (node, chainlen + 1)
                         # experimental config: repack.chainorphansbysize
                         if ui.configbool('repack', 'chainorphansbysize'):
                             orphans = nobase - referenced
                             orderednodes = self._chainorphans(ui, filename, orderednodes,
                                 orphans, deltabases)
                         # Compute deltas and write to the pack
                         for i, node in enumerate(orderednodes):
                             deltabase, chainlen = deltabases[node]
                             # Compute delta
                             # TODO: Optimize the deltachain fetching. Since we're
                             # iterating over the different version of the file, we may
                             # be fetching the same deltachain over and over again.
                             meta = None
                             if deltabase != nullid:
                                 deltaentry = self.data.getdelta(filename, node)
                                 delta, deltabasename, origdeltabase, meta = deltaentry
                                 size = meta.get(constants.METAKEYSIZE)
                                 if (deltabasename != filename or origdeltabase != deltabase
                                     or size is None):
                                     deltabasetext = self.data.get(filename, deltabase)
                                     original = self.data.get(filename, node)
                                     size = len(original)
                                     delta = mdiff.textdiff(deltabasetext, original)
                             else:
                                 delta = self.data.get(filename, node)
                                 size = len(delta)
                                 meta = self.data.getmeta(filename, node)
                             # TODO: don't use the delta if it's larger than the fulltext
                             if constants.METAKEYSIZE not in meta:
                                 meta[constants.METAKEYSIZE] = size
                             target.add(filename, node, deltabase, delta, meta)
                             entries[node].datarepacked = True
                         ui.progress(_("processing nodes"), None)
                         count += 1
                     ui.progress(_("repacking data"), None)
                     target.close(ledger=ledger)
                 def repackhistory(self, ledger, target):
                     ui = self.repo.ui
                     byfile = {}
                     for entry in ledger.entries.itervalues():
                         if entry.historysource:
                             byfile.setdefault(entry.filename, {})[entry.node] = entry
                     count = 0
                     for filename, entries in sorted(byfile.iteritems()):
                         ancestors = {}
                         nodes = list(node for node in entries.iterkeys())
                         for node in nodes:
                             if node in ancestors:
                                 continue
                             ancestors.update(self.history.getancestors(filename, node,
                                                                        known=ancestors))
                         # Order the nodes children first
                         orderednodes = reversed(self._toposort(ancestors))
                         # Write to the pack
                         dontprocess = set()
                         for node in orderednodes:
                             p1, p2, linknode, copyfrom = ancestors[node]
                             # If the node is marked dontprocess, but it's also in the
                             # explicit entries set, that means the node exists both in this
                             # file and in another file that was copied to this file.
                             # Usually this happens if the file was copied to another file,
                             # then the copy was deleted, then reintroduced without copy
                             # metadata. The original add and the new add have the same hash
                             # since the content is identical and the parents are null.
                             if node in dontprocess and node not in entries:
                                 # If copyfrom == filename, it means the copy history
                                 # went to come other file, then came back to this one, so we
                                 # should continue processing it.
                                 if p1 != nullid and copyfrom != filename:
                                     dontprocess.add(p1)
                                 if p2 != nullid:
                                     dontprocess.add(p2)
                                 continue
                             if copyfrom:
                                 dontprocess.add(p1)
                             target.add(filename, node, p1, p2, linknode, copyfrom)
                             if node in entries:
                                 entries[node].historyrepacked = True
                         count += 1
                         ui.progress(_("repacking history"), count, unit=self.unit,
                                     total=len(byfile))
                     ui.progress(_("repacking history"), None)
                     target.close(ledger=ledger)
                 def _toposort(self, ancestors):
                     def parentfunc(node):
                         p1, p2, linknode, copyfrom = ancestors[node]
                         parents = []
                         if p1 != nullid:
                             parents.append(p1)
                         if p2 != nullid:
                             parents.append(p2)
                         return parents
                     sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
                     return sortednodes
             class repackledger(object):
                 """Storage for all the bookkeeping that happens during a repack. It contains
                 the list of revisions being repacked, what happened to each revision, and
                 which source store contained which revision originally (for later cleanup).
                 """
                 def __init__(self):
                     self.entries = {}
                     self.sources = {}
                     self.created = set()
                 def markdataentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a data rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.datasource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def markhistoryentry(self, source, filename, node):
                     """Mark the given filename+node revision as having a history rev in the
                     given source.
                     """
                     entry = self._getorcreateentry(filename, node)
                     entry.historysource = True
                     entries = self.sources.get(source)
                     if not entries:
                         entries = set()
                         self.sources[source] = entries
                     entries.add(entry)
                 def _getorcreateentry(self, filename, node):
                     key = (filename, node)
                     value = self.entries.get(key)
                     if not value:
                         value = repackentry(filename, node)
                         self.entries[key] = value
                     return value
                 def addcreated(self, value):
                     self.created.add(value)
             class repackentry(object):
                 """Simple class representing a single revision entry in the repackledger.
                 """
-                __slots__ = ['filename', 'node', 'datasource', 'historysource',
+                __slots__ = (r'filename', r'node', r'datasource', r'historysource',
-                             'datarepacked', 'historyrepacked', 'gced']
+                             r'datarepacked', r'historyrepacked', r'gced')
                 def __init__(self, filename, node):
                     self.filename = filename
                     self.node = node
                     # If the revision has a data entry in the source
                     self.datasource = False
                     # If the revision has a history entry in the source
                     self.historysource = False
                     # If the revision's data entry was repacked into the repack target
                     self.datarepacked = False
                     # If the revision's history entry was repacked into the repack target
                     self.historyrepacked = False
                     # If garbage collected
                     self.gced = False
             def repacklockvfs(repo):
                 if util.safehasattr(repo, 'name'):
                     # Lock in the shared cache so repacks across multiple copies of the same
                     # repo are coordinated.
                     sharedcachepath = shallowutil.getcachepackpath(
                         repo,
                         constants.FILEPACK_CATEGORY)
                     return vfs.vfs(sharedcachepath)
                 else:
                     return repo.svfs

hgext/remotefilelog/shallowutil.py

0 +2 -2

             # shallowutil.py -- remotefilelog utilities
             #
             # Copyright 2014 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import stat
             import struct
             import tempfile
             from mercurial.i18n import _
             from mercurial import (
                 error,
                 pycompat,
                 revlog,
                 util,
             )
             from mercurial.utils import (
                 storageutil,
                 stringutil,
             )
             from . import constants
             if not pycompat.iswindows:
                 import grp
             def isenabled(repo):
                 """returns whether the repository is remotefilelog enabled or not"""
                 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
             def getcachekey(reponame, file, id):
                 pathhash = hashlib.sha1(file).hexdigest()
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
                 pathhash = hashlib.sha1(file).hexdigest()
                 return os.path.join(pathhash, id)
             def getcachepath(ui, allowempty=False):
                 cachepath = ui.config("remotefilelog", "cachepath")
                 if not cachepath:
                     if allowempty:
                         return None
                     else:
                         raise error.Abort(_("could not find config option "
                                             "remotefilelog.cachepath"))
                 return util.expandpath(cachepath)
             def getcachepackpath(repo, category):
                 cachepath = getcachepath(repo.ui)
                 if category != constants.FILEPACK_CATEGORY:
                     return os.path.join(cachepath, repo.name, 'packs', category)
                 else:
                     return os.path.join(cachepath, repo.name, 'packs')
             def getlocalpackpath(base, category):
                 return os.path.join(base, 'packs', category)
             def createrevlogtext(text, copyfrom=None, copyrev=None):
                 """returns a string that matches the revlog contents in a
                 traditional revlog
                 """
                 meta = {}
                 if copyfrom or text.startswith('\1\n'):
                     if copyfrom:
                         meta['copy'] = copyfrom
                         meta['copyrev'] = copyrev
                     text = storageutil.packmeta(meta, text)
                 return text
             def parsemeta(text):
                 """parse mercurial filelog metadata"""
                 meta, size = storageutil.parsemeta(text)
                 if text.startswith('\1\n'):
                     s = text.index('\1\n', 2)
                     text = text[s + 2:]
                 return meta or {}, text
             def sumdicts(*dicts):
                 """Adds all the values of *dicts together into one dictionary. This assumes
                 the values in *dicts are all summable.
                 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
                 """
                 result = collections.defaultdict(lambda: 0)
                 for dict in dicts:
                     for k, v in dict.iteritems():
                         result[k] += v
                 return result
             def prefixkeys(dict, prefix):
                 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
                 result = {}
                 for k, v in dict.iteritems():
                     result[prefix + k] = v
                 return result
             def reportpackmetrics(ui, prefix, *stores):
                 dicts = [s.getmetrics() for s in stores]
                 dict = prefixkeys(sumdicts(*dicts), prefix + '_')
                 ui.log(prefix + "_packsizes", "", **dict)
             def _parsepackmeta(metabuf):
                 """parse datapack meta, bytes (<metadata-list>) -> dict
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to convert some of them to other types like
                 integers, on their own.
                 raise ValueError if the data is corrupted
                 """
                 metadict = {}
                 offset = 0
                 buflen = len(metabuf)
                 while buflen - offset >= 3:
                     key = metabuf[offset]
                     offset += 1
                     metalen = struct.unpack_from('!H', metabuf, offset)[0]
                     offset += 2
                     if offset + metalen > buflen:
                         raise ValueError('corrupted metadata: incomplete buffer')
                     value = metabuf[offset:offset + metalen]
                     metadict[key] = value
                     offset += metalen
                 if offset != buflen:
                     raise ValueError('corrupted metadata: redundant data')
                 return metadict
             def _buildpackmeta(metadict):
                 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to serialize some of other types (like
                 integers) to strings before calling this function.
                 raise ProgrammingError when metadata key is illegal, or ValueError if
                 length limit is exceeded
                 """
                 metabuf = ''
                 for k, v in sorted((metadict or {}).iteritems()):
                     if len(k) != 1:
                         raise error.ProgrammingError('packmeta: illegal key: %s' % k)
                     if len(v) > 0xfffe:
                         raise ValueError('metadata value is too long: 0x%x > 0xfffe'
                                          % len(v))
                     metabuf += k
                     metabuf += struct.pack('!H', len(v))
                     metabuf += v
                 # len(metabuf) is guaranteed representable in 4 bytes, because there are
                 # only 256 keys, and for each value, len(value) <= 0xfffe.
                 return metabuf
             _metaitemtypes = {
-                constants.METAKEYFLAG: (int, long),
+                constants.METAKEYFLAG: (int, pycompat.long),
-                constants.METAKEYSIZE: (int, long),
+                constants.METAKEYSIZE: (int, pycompat.long),
             }
             def buildpackmeta(metadict):
                 """like _buildpackmeta, but typechecks metadict and normalize it.
                 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
                 and METAKEYFLAG will be dropped if its value is 0.
                 """
                 newmeta = {}
                 for k, v in (metadict or {}).iteritems():
                     expectedtype = _metaitemtypes.get(k, (bytes,))
                     if not isinstance(v, expectedtype):
                         raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
                     # normalize int to binary buffer
                     if int in expectedtype:
                         # optimization: remove flag if it's 0 to save space
                         if k == constants.METAKEYFLAG and v == 0:
                             continue
                         v = int2bin(v)
                     newmeta[k] = v
                 return _buildpackmeta(newmeta)
             def parsepackmeta(metabuf):
                 """like _parsepackmeta, but convert fields to desired types automatically.
                 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
                 integers.
                 """
                 metadict = _parsepackmeta(metabuf)
                 for k, v in metadict.iteritems():
                     if k in _metaitemtypes and int in _metaitemtypes[k]:
                         metadict[k] = bin2int(v)
                 return metadict
             def int2bin(n):
                 """convert a non-negative integer to raw binary buffer"""
                 buf = bytearray()
                 while n > 0:
                     buf.insert(0, n & 0xff)
                     n >>= 8
                 return bytes(buf)
             def bin2int(buf):
                 """the reverse of int2bin, convert a binary buffer to an integer"""
                 x = 0
                 for b in bytearray(buf):
                     x <<= 8
                     x |= b
                 return x
             def parsesizeflags(raw):
                 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
                 see remotefilelogserver.createfileblob for the format.
                 raise RuntimeError if the content is illformed.
                 """
                 flags = revlog.REVIDX_DEFAULT_FLAGS
                 size = None
                 try:
                     index = raw.index('\0')
                     header = raw[:index]
                     if header.startswith('v'):
                         # v1 and above, header starts with 'v'
                         if header.startswith('v1\n'):
                             for s in header.split('\n'):
                                 if s.startswith(constants.METAKEYSIZE):
                                     size = int(s[len(constants.METAKEYSIZE):])
                                 elif s.startswith(constants.METAKEYFLAG):
                                     flags = int(s[len(constants.METAKEYFLAG):])
                         else:
                             raise RuntimeError('unsupported remotefilelog header: %s'
                                                % header)
                     else:
                         # v0, str(int(size)) is the header
                         size = int(header)
                 except ValueError:
                     raise RuntimeError("unexpected remotefilelog header: illegal format")
                 if size is None:
                     raise RuntimeError("unexpected remotefilelog header: no size found")
                 return index + 1, size, flags
             def buildfileblobheader(size, flags, version=None):
                 """return the header of a remotefilelog blob.
                 see remotefilelogserver.createfileblob for the format.
                 approximately the reverse of parsesizeflags.
                 version could be 0 or 1, or None (auto decide).
                 """
                 # choose v0 if flags is empty, otherwise v1
                 if version is None:
                     version = int(bool(flags))
                 if version == 1:
                     header = ('v1\n%s%d\n%s%d'
                               % (constants.METAKEYSIZE, size,
                                  constants.METAKEYFLAG, flags))
                 elif version == 0:
                     if flags:
                         raise error.ProgrammingError('fileblob v0 does not support flag')
                     header = '%d' % size
                 else:
                     raise error.ProgrammingError('unknown fileblob version %d' % version)
                 return header
             def ancestormap(raw):
                 offset, size, flags = parsesizeflags(raw)
                 start = offset + size
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index('\0', start + 80)
                     currentnode = raw[start:(start + 20)]
                     p1 = raw[(start + 20):(start + 40)]
                     p2 = raw[(start + 40):(start + 60)]
                     linknode = raw[(start + 60):(start + 80)]
                     copyfrom = raw[(start + 80):divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return mapping
             def readfile(path):
                 f = open(path, 'rb')
                 try:
                     result = f.read()
                     # we should never have empty files
                     if not result:
                         os.remove(path)
                         raise IOError("empty file: %s" % path)
                     return result
                 finally:
                     f.close()
             def unlinkfile(filepath):
                 if pycompat.iswindows:
                     # On Windows, os.unlink cannnot delete readonly files
                     os.chmod(filepath, stat.S_IWUSR)
                 os.unlink(filepath)
             def renamefile(source, destination):
                 if pycompat.iswindows:
                     # On Windows, os.rename cannot rename readonly files
                     # and cannot overwrite destination if it exists
                     os.chmod(source, stat.S_IWUSR)
                     if os.path.isfile(destination):
                         os.chmod(destination, stat.S_IWUSR)
                         os.unlink(destination)
                 os.rename(source, destination)
             def writefile(path, content, readonly=False):
                 dirname, filename = os.path.split(path)
                 if not os.path.exists(dirname):
                     try:
                         os.makedirs(dirname)
                     except OSError as ex:
                         if ex.errno != errno.EEXIST:
                             raise
                 fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
                 os.close(fd)
                 try:
                     f = util.posixfile(temp, 'wb')
                     f.write(content)
                     f.close()
                     if readonly:
                         mode = 0o444
                     else:
                         # tempfiles are created with 0o600, so we need to manually set the
                         # mode.
                         oldumask = os.umask(0)
                         # there's no way to get the umask without modifying it, so set it
                         # back
                         os.umask(oldumask)
                         mode = ~oldumask
                     renamefile(temp, path)
                     os.chmod(path, mode)
                 except Exception:
                     try:
                         unlinkfile(temp)
                     except OSError:
                         pass
                     raise
             def sortnodes(nodes, parentfunc):
                 """Topologically sorts the nodes, using the parentfunc to find
                 the parents of nodes."""
                 nodes = set(nodes)
                 childmap = {}
                 parentmap = {}
                 roots = []
                 # Build a child and parent map
                 for n in nodes:
                     parents = [p for p in parentfunc(n) if p in nodes]
                     parentmap[n] = set(parents)
                     for p in parents:
                         childmap.setdefault(p, set()).add(n)
                     if not parents:
                         roots.append(n)
                 roots.sort()
                 # Process roots, adding children to the queue as they become roots
                 results = []
                 while roots:
                     n = roots.pop(0)
                     results.append(n)
                     if n in childmap:
                         children = childmap[n]
                         for c in children:
                             childparents = parentmap[c]
                             childparents.remove(n)
                             if len(childparents) == 0:
                                 # insert at the beginning, that way child nodes
                                 # are likely to be output immediately after their
                                 # parents.  This gives better compression results.
                                 roots.insert(0, c)
                 return results
             def readexactly(stream, n):
                 '''read n bytes from stream.read and abort if less was available'''
                 s = stream.read(n)
                 if len(s) < n:
                     raise error.Abort(_("stream ended unexpectedly"
                                        " (got %d bytes, expected %d)")
                                       % (len(s), n))
                 return s
             def readunpack(stream, fmt):
                 data = readexactly(stream, struct.calcsize(fmt))
                 return struct.unpack(fmt, data)
             def readpath(stream):
                 rawlen = readexactly(stream, constants.FILENAMESIZE)
                 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
                 return readexactly(stream, pathlen)
             def readnodelist(stream):
                 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
                 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(nodecount):
                     yield readexactly(stream, constants.NODESIZE)
             def readpathlist(stream):
                 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
                 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(pathcount):
                     yield readpath(stream)
             def getgid(groupname):
                 try:
                     gid = grp.getgrnam(groupname).gr_gid
                     return gid
                 except KeyError:
                     return None
             def setstickygroupdir(path, gid, warn=None):
                 if gid is None:
                     return
                 try:
                     os.chown(path, -1, gid)
                     os.chmod(path, 0o2775)
                 except (IOError, OSError) as ex:
                     if warn:
                         warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
             def mkstickygroupdir(ui, path):
                 """Creates the given directory (if it doesn't exist) and give it a
                 particular group with setgid enabled."""
                 gid = None
                 groupname = ui.config("remotefilelog", "cachegroup")
                 if groupname:
                     gid = getgid(groupname)
                     if gid is None:
                         ui.warn(_('unable to resolve group name: %s\n') % groupname)
                 # we use a single stat syscall to test the existence and mode / group bit
                 st = None
                 try:
                     st = os.stat(path)
                 except OSError:
                     pass
                 if st:
                     # exists
                     if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
                         # permission needs to be fixed
                         setstickygroupdir(path, gid, ui.warn)
                     return
                 oldumask = os.umask(0o002)
                 try:
                     missingdirs = [path]
                     path = os.path.dirname(path)
                     while path and not os.path.exists(path):
                         missingdirs.append(path)
                         path = os.path.dirname(path)
                     for path in reversed(missingdirs):
                         try:
                             os.mkdir(path)
                         except OSError as ex:
                             if ex.errno != errno.EEXIST:
                                 raise
                     for path in missingdirs:
                         setstickygroupdir(path, gid, ui.warn)
                 finally:
                     os.umask(oldumask)
             def getusername(ui):
                 try:
                     return stringutil.shortuser(ui.username())
                 except Exception:
                     return 'unknown'
             def getreponame(ui):
                 reponame = ui.config('paths', 'default')
                 if reponame:
                     return os.path.basename(reponame)
                 return "unknown"

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages