##// END OF EJS Templates
rust-status: refactor handling of unknown files...
rust-status: refactor handling of unknown files Differential Revision: https://phab.mercurial-scm.org/D8249

File last commit:

r44937:9d2b2df2 default
r45024:5f6a504d default
Show More
repack.py
914 lines | 30.4 KiB | text/x-python | PythonLexer
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 from __future__ import absolute_import
import os
import time
from mercurial.i18n import _
from mercurial.node import (
nullid,
short,
)
from mercurial import (
encoding,
error,
Boris Feld
remotefilelog: replace repack lock to solve race condition...
r43213 lock as lockmod,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 mdiff,
policy,
pycompat,
scmutil,
util,
vfs,
)
from mercurial.utils import procutil
from . import (
constants,
contentstore,
datapack,
historypack,
metadatastore,
shallowutil,
)
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 osutil = policy.importmod('osutil')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class RepackAlreadyRunning(error.Abort):
pass
Augie Fackler
formatting: blacken the codebase...
r43346
remotefilelog: remove the `ensurestart` usage...
r44303 def backgroundrepack(repo, incremental=True, packsonly=False):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']
msg = _(b"(running background repack)\n")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if incremental:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd.append(b'--incremental')
msg = _(b"(running background incremental repack)\n")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if packsonly:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd.append(b'--packsonly')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo.ui.warn(msg)
Augie Fackler
remotefilelog: tell runbgcommand to not block on child process startup...
r42697 # We know this command will find a binary, so don't block on it starting.
remotefilelog: add a developer option to wait for background processes...
r44298 kwargs = {}
if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):
kwargs['record_wait'] = repo.ui.atexit
remotefilelog: remove the `ensurestart` usage...
r44303 procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def fullrepack(repo, options=None):
"""If ``packsonly`` is True, stores creating only loose objects are skipped.
"""
Martin von Zweigbergk
py3: delete b'' prefix from safehasattr arguments...
r43385 if util.safehasattr(repo, 'shareddatastores'):
Augie Fackler
formatting: blacken the codebase...
r43346 datasource = contentstore.unioncontentstore(*repo.shareddatastores)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *repo.sharedhistorystores, allowincomplete=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
packpath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
_runrepack(
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo,
Augie Fackler
formatting: blacken the codebase...
r43346 datasource,
historysource,
packpath,
constants.FILEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Martin von Zweigbergk
py3: delete b'' prefix from safehasattr arguments...
r43385 if util.safehasattr(repo.manifestlog, 'datastore'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 localdata, shareddata = _getmanifeststores(repo)
lpackpath, ldstores, lhstores = localdata
spackpath, sdstores, shstores = shareddata
# Repack the shared manifest store
datasource = contentstore.unioncontentstore(*sdstores)
historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *shstores, allowincomplete=True
)
_runrepack(
repo,
datasource,
historysource,
spackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Repack the local manifest store
datasource = contentstore.unioncontentstore(
Augie Fackler
formatting: blacken the codebase...
r43346 *ldstores, allowincomplete=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *lhstores, allowincomplete=True
)
_runrepack(
repo,
datasource,
historysource,
lpackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def incrementalrepack(repo, options=None):
"""This repacks the repo by looking at the distribution of pack files in the
repo and performing the most minimal repack to keep the repo in good shape.
"""
Martin von Zweigbergk
py3: delete b'' prefix from safehasattr arguments...
r43385 if util.safehasattr(repo, 'shareddatastores'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 packpath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
_incrementalrepack(
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo,
Augie Fackler
formatting: blacken the codebase...
r43346 repo.shareddatastores,
repo.sharedhistorystores,
packpath,
constants.FILEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Martin von Zweigbergk
py3: delete b'' prefix from safehasattr arguments...
r43385 if util.safehasattr(repo.manifestlog, 'datastore'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 localdata, shareddata = _getmanifeststores(repo)
lpackpath, ldstores, lhstores = localdata
spackpath, sdstores, shstores = shareddata
# Repack the shared manifest store
Augie Fackler
formatting: blacken the codebase...
r43346 _incrementalrepack(
repo,
sdstores,
shstores,
spackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Repack the local manifest store
Augie Fackler
formatting: blacken the codebase...
r43346 _incrementalrepack(
repo,
ldstores,
lhstores,
lpackpath,
constants.TREEPACK_CATEGORY,
allowincompletedata=True,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _getmanifeststores(repo):
shareddatastores = repo.manifestlog.shareddatastores
localdatastores = repo.manifestlog.localdatastores
sharedhistorystores = repo.manifestlog.sharedhistorystores
localhistorystores = repo.manifestlog.localhistorystores
Augie Fackler
formatting: blacken the codebase...
r43346 sharedpackpath = shallowutil.getcachepackpath(
repo, constants.TREEPACK_CATEGORY
)
localpackpath = shallowutil.getlocalpackpath(
repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346 return (
(localpackpath, localdatastores, localhistorystores),
(sharedpackpath, shareddatastores, sharedhistorystores),
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _topacks(packpath, files, constructor):
paths = list(os.path.join(packpath, p) for p in files)
packs = list(constructor(p) for p in paths)
return packs
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _deletebigpacks(repo, folder, files):
"""Deletes packfiles that are bigger than ``packs.maxpacksize``.
Returns ``files` with the removed files omitted."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if maxsize <= 0:
return files
# This only considers datapacks today, but we could broaden it to include
# historypacks.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 VALIDEXTS = [b".datapack", b".dataidx"]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Either an oversize index or datapack will trigger cleanup of the whole
# pack:
Augie Fackler
formatting: blacken the codebase...
r43346 oversized = {
os.path.splitext(path)[0]
for path, ftype, stat in files
if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
for rootfname in oversized:
rootpath = os.path.join(folder, rootfname)
for ext in VALIDEXTS:
path = rootpath + ext
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'removing oversize packfile %s (%s)\n'
Augie Fackler
formatting: blacken the codebase...
r43346 % (path, util.bytecount(os.stat(path).st_size))
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 os.unlink(path)
return [row for row in files if os.path.basename(row[0]) not in oversized]
Augie Fackler
formatting: blacken the codebase...
r43346
def _incrementalrepack(
repo,
datastore,
historystore,
packpath,
category,
allowincompletedata=False,
options=None,
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 shallowutil.mkstickygroupdir(repo.ui, packpath)
files = osutil.listdir(packpath, stat=True)
files = _deletebigpacks(repo, packpath, files)
Augie Fackler
formatting: blacken the codebase...
r43346 datapacks = _topacks(
packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
)
datapacks.extend(
s for s in datastore if not isinstance(s, datapack.datapackstore)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346 historypacks = _topacks(
packpath,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 _computeincrementalhistorypack(repo.ui, files),
Augie Fackler
formatting: blacken the codebase...
r43346 historypack.historypack,
)
historypacks.extend(
s
for s in historystore
if not isinstance(s, historypack.historypackstore)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# ``allhistory{files,packs}`` contains all known history packs, even ones we
# don't plan to repack. They are used during the datapack repack to ensure
# good ordering of nodes.
Augie Fackler
formatting: blacken the codebase...
r43346 allhistoryfiles = _allpackfileswithsuffix(
files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
)
allhistorypacks = _topacks(
packpath,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 (f for f, mode, stat in allhistoryfiles),
Augie Fackler
formatting: blacken the codebase...
r43346 historypack.historypack,
)
allhistorypacks.extend(
s
for s in historystore
if not isinstance(s, historypack.historypackstore)
)
_runrepack(
repo,
contentstore.unioncontentstore(
*datapacks, allowincomplete=allowincompletedata
),
metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
packpath,
category,
fullhistory=metadatastore.unionmetadatastore(
*allhistorypacks, allowincomplete=True
),
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _computeincrementaldatapack(ui, files):
opts = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),
b'generations': ui.configlist(b'remotefilelog', b'data.generations'),
b'maxrepackpacks': ui.configint(
b'remotefilelog', b'data.maxrepackpacks'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackmaxpacksize': ui.configbytes(
b'remotefilelog', b'data.repackmaxpacksize'
),
b'repacksizelimit': ui.configbytes(
b'remotefilelog', b'data.repacksizelimit'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 }
packfiles = _allpackfileswithsuffix(
Augie Fackler
formatting: blacken the codebase...
r43346 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return _computeincrementalpack(packfiles, opts)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _computeincrementalhistorypack(ui, files):
opts = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'gencountlimit': ui.configint(
b'remotefilelog', b'history.gencountlimit'
),
b'generations': ui.configlist(
b'remotefilelog', b'history.generations', [b'100MB']
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'maxrepackpacks': ui.configint(
b'remotefilelog', b'history.maxrepackpacks'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackmaxpacksize': ui.configbytes(
b'remotefilelog', b'history.repackmaxpacksize', b'400MB'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repacksizelimit': ui.configbytes(
b'remotefilelog', b'history.repacksizelimit'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 }
packfiles = _allpackfileswithsuffix(
Augie Fackler
formatting: blacken the codebase...
r43346 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return _computeincrementalpack(packfiles, opts)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
result = []
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 fileset = {fn for fn, mode, stat in files}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for filename, mode, stat in files:
if not filename.endswith(packsuffix):
continue
Augie Fackler
formatting: blacken the codebase...
r43346 prefix = filename[: -len(packsuffix)]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Don't process a pack if it doesn't have an index.
if (prefix + indexsuffix) not in fileset:
continue
result.append((prefix, mode, stat))
return result
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _computeincrementalpack(files, opts):
"""Given a set of pack files along with the configuration options, this
function computes the list of files that should be packed as part of an
incremental repack.
It tries to strike a balance between keeping incremental repacks cheap (i.e.
packing small things when possible, and rolling the packs up to the big ones
over time).
"""
Augie Fackler
formatting: blacken the codebase...
r43346 limits = list(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 limits.append(0)
# Group the packs by generation (i.e. by size)
generations = []
for i in pycompat.xrange(len(limits)):
generations.append([])
sizes = {}
for prefix, mode, stat in files:
size = stat.st_size
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if size > opts[b'repackmaxpacksize']:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 continue
sizes[prefix] = size
for i, limit in enumerate(limits):
if size > limit:
generations[i].append(prefix)
break
# Steps for picking what packs to repack:
# 1. Pick the largest generation with > gencountlimit pack files.
# 2. Take the smallest three packs.
# 3. While total-size-of-packs < repacksizelimit: add another pack
# Find the largest generation with more than gencountlimit packs
genpacks = []
for i, limit in enumerate(limits):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if len(generations[i]) > opts[b'gencountlimit']:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Sort to be smallest last, for easy popping later
Augie Fackler
formatting: blacken the codebase...
r43346 genpacks.extend(
sorted(generations[i], reverse=True, key=lambda x: sizes[x])
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 break
# Take as many packs from the generation as we can
chosenpacks = genpacks[-3:]
genpacks = genpacks[:-3]
repacksize = sum(sizes[n] for n in chosenpacks)
Augie Fackler
formatting: blacken the codebase...
r43346 while (
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repacksize < opts[b'repacksizelimit']
Augie Fackler
formatting: blacken the codebase...
r43346 and genpacks
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 and len(chosenpacks) < opts[b'maxrepackpacks']
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 chosenpacks.append(genpacks.pop())
repacksize += sizes[chosenpacks[-1]]
return chosenpacks
Augie Fackler
formatting: blacken the codebase...
r43346
def _runrepack(
repo, data, history, packpath, category, fullhistory=None, options=None
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 shallowutil.mkstickygroupdir(repo.ui, packpath)
def isold(repo, filename, node):
"""Check if the file node is older than a limit.
Unless a limit is specified in the config the default limit is taken.
"""
filectx = repo.filectx(filename, fileid=node)
filetime = repo[filectx.linkrev()].date()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ttl = repo.ui.configint(b'remotefilelog', b'nodettl')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
limit = time.time() - ttl
return filetime[0] < limit
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if not fullhistory:
fullhistory = history
Augie Fackler
formatting: blacken the codebase...
r43346 packer = repacker(
repo,
data,
history,
fullhistory,
category,
gc=garbagecollect,
isold=isold,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Kyle Lippincott
remotefilelog: do not specify an explicit version for repack...
r41971 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
try:
packer.run(dpack, hpack)
except error.LockHeld:
Augie Fackler
formatting: blacken the codebase...
r43346 raise RepackAlreadyRunning(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(
b"skipping repack - another repack "
b"is already running"
)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def keepset(repo, keyfn, lastkeepkeys=None):
"""Computes a keepset which is not garbage collected.
'keyfn' is a function that maps filename, node to a unique key.
'lastkeepkeys' is an optional argument and if provided the keepset
function updates lastkeepkeys with more keys and returns the result.
"""
if not lastkeepkeys:
keepkeys = set()
else:
keepkeys = lastkeepkeys
# We want to keep:
# 1. Working copy parent
# 2. Draft commits
# 3. Parents of draft commits
# 4. Pullprefetch and bgprefetchrevs revsets if specified
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs = [b'.', b'draft()', b'parents(draft())']
prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if prefetchrevs:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs.append(b'(%s)' % prefetchrevs)
prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if prefetchrevs:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs.append(b'(%s)' % prefetchrevs)
revs = b'+'.join(revs)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs = [b'sort((%s), "topo")' % revs]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 keep = scmutil.revrange(repo, revs)
processed = set()
lastmanifest = None
# process the commits in toposorted order starting from the oldest
for r in reversed(keep._list):
if repo[r].p1().rev() in processed:
# if the direct parent has already been processed
# then we only need to process the delta
m = repo[r].manifestctx().readdelta()
else:
# otherwise take the manifest and diff it
# with the previous manifest if one exists
if lastmanifest:
m = repo[r].manifest().diff(lastmanifest)
else:
m = repo[r].manifest()
lastmanifest = repo[r].manifest()
processed.add(r)
# populate keepkeys with keys from the current manifest
if type(m) is dict:
# m is a result of diff of two manifests and is a dictionary that
# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
Gregory Szorc
py3: define and use pycompat.iteritems() for hgext/...
r43375 for filename, diff in pycompat.iteritems(m):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if diff[0][0] is not None:
keepkeys.add(keyfn(filename, diff[0][0]))
else:
# m is a manifest object
Gregory Szorc
py3: define and use pycompat.iteritems() for hgext/...
r43375 for filename, filenode in pycompat.iteritems(m):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 keepkeys.add(keyfn(filename, filenode))
return keepkeys
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class repacker(object):
"""Class for orchestrating the repack of data and history information into a
new format.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
def __init__(
self,
repo,
data,
history,
fullhistory,
category,
gc=False,
isold=None,
options=None,
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self.repo = repo
self.data = data
self.history = history
self.fullhistory = fullhistory
self.unit = constants.getunits(category)
self.garbagecollect = gc
self.options = options
if self.garbagecollect:
if not isold:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"Function 'isold' is not properly specified")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # use (filename, node) tuple as a keepset key
Augie Fackler
formatting: blacken the codebase...
r43346 self.keepkeys = keepset(repo, lambda f, n: (f, n))
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self.isold = isold
def run(self, targetdata, targethistory):
ledger = repackledger()
Augie Fackler
formatting: blacken the codebase...
r43346 with lockmod.lock(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.repo.hook(b'prerepack')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Populate ledger from source
self.data.markledger(ledger, options=self.options)
self.history.markledger(ledger, options=self.options)
# Run repack
self.repackdata(ledger, targetdata)
self.repackhistory(ledger, targethistory)
# Call cleanup on each source
for source in ledger.sources:
source.cleanup(ledger)
def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
"""Reorderes ``orphans`` into a single chain inside ``nodes`` and
``deltabases``.
We often have orphan entries (nodes without a base that aren't
referenced by other nodes -- i.e., part of a chain) due to gaps in
history. Rather than store them as individual fulltexts, we prefer to
insert them as one chain sorted by size.
"""
if not orphans:
return nodes
def getsize(node, default=0):
meta = self.data.getmeta(filename, node)
if constants.METAKEYSIZE in meta:
return meta[constants.METAKEYSIZE]
else:
return default
# Sort orphans by size; biggest first is preferred, since it's more
# likely to be the newest version assuming files grow over time.
# (Sort by node first to ensure the sort is stable.)
orphans = sorted(orphans)
orphans = list(sorted(orphans, key=getsize, reverse=True))
if ui.debugflag:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"%s: orphan chain: %s\n"
% (filename, b", ".join([short(s) for s in orphans]))
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Create one contiguous chain and reassign deltabases.
for i, node in enumerate(orphans):
if i == 0:
deltabases[node] = (nullid, 0)
else:
parent = orphans[i - 1]
deltabases[node] = (parent, deltabases[parent][1] + 1)
Augie Fackler
remotefilelog: use list comprehension instead of filter for py3 portability...
r41291 nodes = [n for n in nodes if n not in orphans]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 nodes += orphans
return nodes
def repackdata(self, ledger, target):
ui = self.repo.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
byfile = {}
Gregory Szorc
py3: define and use pycompat.itervalues()...
r43374 for entry in pycompat.itervalues(ledger.entries):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if entry.datasource:
byfile.setdefault(entry.filename, {})[entry.node] = entry
count = 0
Augie Fackler
formatting: blacken the codebase...
r43346 repackprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repacking data"), unit=self.unit, total=len(byfile)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
py3: define and use pycompat.iteritems() for hgext/...
r43375 for filename, entries in sorted(pycompat.iteritems(byfile)):
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 repackprogress.update(count)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
ancestors = {}
Pulkit Goyal
py3: don't use dict.iterkeys()...
r40649 nodes = list(node for node in entries)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 nohistory = []
Augie Fackler
formatting: blacken the codebase...
r43346 buildprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"building history"), unit=b'nodes', total=len(nodes)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for i, node in enumerate(nodes):
if node in ancestors:
continue
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 buildprogress.update(i)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 try:
Augie Fackler
formatting: blacken the codebase...
r43346 ancestors.update(
self.fullhistory.getancestors(
filename, node, known=ancestors
)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 except KeyError:
# Since we're packing data entries, we may not have the
# corresponding history entries for them. It's not a big
# deal, but the entries won't be delta'd perfectly.
nohistory.append(node)
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 buildprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Order the nodes children first, so we can produce reverse deltas
orderednodes = list(reversed(self._toposort(ancestors)))
if len(nohistory) > 0:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackdata: %d nodes without history\n' % len(nohistory)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 orderednodes.extend(sorted(nohistory))
# Filter orderednodes to just the nodes we want to serialize (it
# currently also has the edge nodes' ancestors).
Augie Fackler
formatting: blacken the codebase...
r43346 orderednodes = list(
filter(lambda node: node in nodes, orderednodes)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Garbage collect old nodes:
if self.garbagecollect:
neworderednodes = []
for node in orderednodes:
# If the node is old and is not in the keepset, we skip it,
# and mark as garbage collected
Augie Fackler
formatting: blacken the codebase...
r43346 if (filename, node) not in self.keepkeys and self.isold(
self.repo, filename, node
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 entries[node].gced = True
continue
neworderednodes.append(node)
orderednodes = neworderednodes
# Compute delta bases for nodes:
deltabases = {}
nobase = set()
referenced = set()
nodes = set(nodes)
Augie Fackler
formatting: blacken the codebase...
r43346 processprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"processing nodes"), unit=b'nodes', total=len(orderednodes)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for i, node in enumerate(orderednodes):
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 processprogress.update(i)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Find delta base
# TODO: allow delta'ing against most recent descendant instead
# of immediate child
deltatuple = deltabases.get(node, None)
if deltatuple is None:
deltabase, chainlen = nullid, 0
deltabases[node] = (nullid, 0)
nobase.add(node)
else:
deltabase, chainlen = deltatuple
referenced.add(deltabase)
# Use available ancestor information to inform our delta choices
ancestorinfo = ancestors.get(node)
if ancestorinfo:
p1, p2, linknode, copyfrom = ancestorinfo
# The presence of copyfrom means we're at a point where the
# file was copied from elsewhere. So don't attempt to do any
# deltas with the other file.
if copyfrom:
p1 = nullid
if chainlen < maxchainlen:
# Record this child as the delta base for its parents.
# This may be non optimal, since the parents may have
# many children, and this will only choose the last one.
# TODO: record all children and try all deltas to find
# best
if p1 != nullid:
deltabases[p1] = (node, chainlen + 1)
if p2 != nullid:
deltabases[p2] = (node, chainlen + 1)
# experimental config: repack.chainorphansbysize
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if ui.configbool(b'repack', b'chainorphansbysize'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 orphans = nobase - referenced
Augie Fackler
formatting: blacken the codebase...
r43346 orderednodes = self._chainorphans(
ui, filename, orderednodes, orphans, deltabases
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Compute deltas and write to the pack
for i, node in enumerate(orderednodes):
deltabase, chainlen = deltabases[node]
# Compute delta
# TODO: Optimize the deltachain fetching. Since we're
# iterating over the different version of the file, we may
# be fetching the same deltachain over and over again.
if deltabase != nullid:
deltaentry = self.data.getdelta(filename, node)
delta, deltabasename, origdeltabase, meta = deltaentry
size = meta.get(constants.METAKEYSIZE)
Augie Fackler
formatting: blacken the codebase...
r43346 if (
deltabasename != filename
or origdeltabase != deltabase
or size is None
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 deltabasetext = self.data.get(filename, deltabase)
original = self.data.get(filename, node)
size = len(original)
delta = mdiff.textdiff(deltabasetext, original)
else:
delta = self.data.get(filename, node)
size = len(delta)
meta = self.data.getmeta(filename, node)
# TODO: don't use the delta if it's larger than the fulltext
if constants.METAKEYSIZE not in meta:
meta[constants.METAKEYSIZE] = size
target.add(filename, node, deltabase, delta, meta)
entries[node].datarepacked = True
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 processprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 count += 1
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 repackprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 target.close(ledger=ledger)
def repackhistory(self, ledger, target):
ui = self.repo.ui
byfile = {}
Gregory Szorc
py3: define and use pycompat.itervalues()...
r43374 for entry in pycompat.itervalues(ledger.entries):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if entry.historysource:
byfile.setdefault(entry.filename, {})[entry.node] = entry
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repacking history"), unit=self.unit, total=len(byfile)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
py3: define and use pycompat.iteritems() for hgext/...
r43375 for filename, entries in sorted(pycompat.iteritems(byfile)):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 ancestors = {}
Pulkit Goyal
py3: don't use dict.iterkeys()...
r40649 nodes = list(node for node in entries)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
for node in nodes:
if node in ancestors:
continue
Augie Fackler
formatting: blacken the codebase...
r43346 ancestors.update(
self.history.getancestors(filename, node, known=ancestors)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Order the nodes children first
orderednodes = reversed(self._toposort(ancestors))
# Write to the pack
dontprocess = set()
for node in orderednodes:
p1, p2, linknode, copyfrom = ancestors[node]
# If the node is marked dontprocess, but it's also in the
# explicit entries set, that means the node exists both in this
# file and in another file that was copied to this file.
# Usually this happens if the file was copied to another file,
# then the copy was deleted, then reintroduced without copy
# metadata. The original add and the new add have the same hash
# since the content is identical and the parents are null.
if node in dontprocess and node not in entries:
# If copyfrom == filename, it means the copy history
# went to come other file, then came back to this one, so we
# should continue processing it.
if p1 != nullid and copyfrom != filename:
dontprocess.add(p1)
if p2 != nullid:
dontprocess.add(p2)
continue
if copyfrom:
dontprocess.add(p1)
target.add(filename, node, p1, p2, linknode, copyfrom)
if node in entries:
entries[node].historyrepacked = True
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 progress.increment()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 progress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 target.close(ledger=ledger)
def _toposort(self, ancestors):
def parentfunc(node):
p1, p2, linknode, copyfrom = ancestors[node]
parents = []
if p1 != nullid:
parents.append(p1)
if p2 != nullid:
parents.append(p2)
return parents
sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
return sortednodes
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class repackledger(object):
"""Storage for all the bookkeeping that happens during a repack. It contains
the list of revisions being repacked, what happened to each revision, and
which source store contained which revision originally (for later cleanup).
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def __init__(self):
self.entries = {}
self.sources = {}
self.created = set()
def markdataentry(self, source, filename, node):
"""Mark the given filename+node revision as having a data rev in the
given source.
"""
entry = self._getorcreateentry(filename, node)
entry.datasource = True
entries = self.sources.get(source)
if not entries:
entries = set()
self.sources[source] = entries
entries.add(entry)
def markhistoryentry(self, source, filename, node):
"""Mark the given filename+node revision as having a history rev in the
given source.
"""
entry = self._getorcreateentry(filename, node)
entry.historysource = True
entries = self.sources.get(source)
if not entries:
entries = set()
self.sources[source] = entries
entries.add(entry)
def _getorcreateentry(self, filename, node):
key = (filename, node)
value = self.entries.get(key)
if not value:
value = repackentry(filename, node)
self.entries[key] = value
return value
def addcreated(self, value):
self.created.add(value)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class repackentry(object):
"""Simple class representing a single revision entry in the repackledger.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
__slots__ = (
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 'filename',
'node',
'datasource',
'historysource',
'datarepacked',
'historyrepacked',
'gced',
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def __init__(self, filename, node):
self.filename = filename
self.node = node
# If the revision has a data entry in the source
self.datasource = False
# If the revision has a history entry in the source
self.historysource = False
# If the revision's data entry was repacked into the repack target
self.datarepacked = False
# If the revision's history entry was repacked into the repack target
self.historyrepacked = False
# If garbage collected
self.gced = False
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def repacklockvfs(repo):
Martin von Zweigbergk
py3: delete b'' prefix from safehasattr arguments...
r43385 if util.safehasattr(repo, 'name'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Lock in the shared cache so repacks across multiple copies of the same
# repo are coordinated.
sharedcachepath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return vfs.vfs(sharedcachepath)
else:
return repo.svfs