##// END OF EJS Templates
mq: stop using the `pycompat.open()` shim
mq: stop using the `pycompat.open()` shim

File last commit:

r52756:f4733654 default
r53269:e95b0013 default
Show More
repack.py
904 lines | 30.1 KiB | text/x-python | PythonLexer
Matt Harbison
typing: add `from __future__ import annotations` to most files...
r52756 from __future__ import annotations
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 import os
import time
from mercurial.i18n import _
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 from mercurial.node import short
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 from mercurial import (
encoding,
error,
Boris Feld
remotefilelog: replace repack lock to solve race condition...
r43213 lock as lockmod,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 mdiff,
policy,
scmutil,
util,
vfs,
)
from mercurial.utils import procutil
from . import (
constants,
contentstore,
datapack,
historypack,
metadatastore,
shallowutil,
)
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 osutil = policy.importmod('osutil')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class RepackAlreadyRunning(error.Abort):
pass
Augie Fackler
formatting: blacken the codebase...
r43346
remotefilelog: remove the `ensurestart` usage...
r44303 def backgroundrepack(repo, incremental=True, packsonly=False):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']
msg = _(b"(running background repack)\n")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if incremental:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd.append(b'--incremental')
msg = _(b"(running background incremental repack)\n")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if packsonly:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 cmd.append(b'--packsonly')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo.ui.warn(msg)
Augie Fackler
remotefilelog: tell runbgcommand to not block on child process startup...
r42697 # We know this command will find a binary, so don't block on it starting.
remotefilelog: add a developer option to wait for background processes...
r44298 kwargs = {}
if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):
kwargs['record_wait'] = repo.ui.atexit
remotefilelog: remove the `ensurestart` usage...
r44303 procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def fullrepack(repo, options=None):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """If ``packsonly`` is True, stores creating only loose objects are skipped."""
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(repo, 'shareddatastores'):
Augie Fackler
formatting: blacken the codebase...
r43346 datasource = contentstore.unioncontentstore(*repo.shareddatastores)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *repo.sharedhistorystores, allowincomplete=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
packpath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
_runrepack(
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo,
Augie Fackler
formatting: blacken the codebase...
r43346 datasource,
historysource,
packpath,
constants.FILEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(repo.manifestlog, 'datastore'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 localdata, shareddata = _getmanifeststores(repo)
lpackpath, ldstores, lhstores = localdata
spackpath, sdstores, shstores = shareddata
# Repack the shared manifest store
datasource = contentstore.unioncontentstore(*sdstores)
historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *shstores, allowincomplete=True
)
_runrepack(
repo,
datasource,
historysource,
spackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Repack the local manifest store
datasource = contentstore.unioncontentstore(
Augie Fackler
formatting: blacken the codebase...
r43346 *ldstores, allowincomplete=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 historysource = metadatastore.unionmetadatastore(
Augie Fackler
formatting: blacken the codebase...
r43346 *lhstores, allowincomplete=True
)
_runrepack(
repo,
datasource,
historysource,
lpackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def incrementalrepack(repo, options=None):
"""This repacks the repo by looking at the distribution of pack files in the
repo and performing the most minimal repack to keep the repo in good shape.
"""
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(repo, 'shareddatastores'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 packpath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
_incrementalrepack(
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 repo,
Augie Fackler
formatting: blacken the codebase...
r43346 repo.shareddatastores,
repo.sharedhistorystores,
packpath,
constants.FILEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(repo.manifestlog, 'datastore'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 localdata, shareddata = _getmanifeststores(repo)
lpackpath, ldstores, lhstores = localdata
spackpath, sdstores, shstores = shareddata
# Repack the shared manifest store
Augie Fackler
formatting: blacken the codebase...
r43346 _incrementalrepack(
repo,
sdstores,
shstores,
spackpath,
constants.TREEPACK_CATEGORY,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Repack the local manifest store
Augie Fackler
formatting: blacken the codebase...
r43346 _incrementalrepack(
repo,
ldstores,
lhstores,
lpackpath,
constants.TREEPACK_CATEGORY,
allowincompletedata=True,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _getmanifeststores(repo):
shareddatastores = repo.manifestlog.shareddatastores
localdatastores = repo.manifestlog.localdatastores
sharedhistorystores = repo.manifestlog.sharedhistorystores
localhistorystores = repo.manifestlog.localhistorystores
Augie Fackler
formatting: blacken the codebase...
r43346 sharedpackpath = shallowutil.getcachepackpath(
repo, constants.TREEPACK_CATEGORY
)
localpackpath = shallowutil.getlocalpackpath(
repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346 return (
(localpackpath, localdatastores, localhistorystores),
(sharedpackpath, shareddatastores, sharedhistorystores),
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _topacks(packpath, files, constructor):
paths = list(os.path.join(packpath, p) for p in files)
packs = list(constructor(p) for p in paths)
return packs
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _deletebigpacks(repo, folder, files):
"""Deletes packfiles that are bigger than ``packs.maxpacksize``.
Returns ``files` with the removed files omitted."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if maxsize <= 0:
return files
# This only considers datapacks today, but we could broaden it to include
# historypacks.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 VALIDEXTS = [b".datapack", b".dataidx"]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Either an oversize index or datapack will trigger cleanup of the whole
# pack:
Augie Fackler
formatting: blacken the codebase...
r43346 oversized = {
os.path.splitext(path)[0]
for path, ftype, stat in files
if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
for rootfname in oversized:
rootpath = os.path.join(folder, rootfname)
for ext in VALIDEXTS:
path = rootpath + ext
Augie Fackler
formatting: blacken the codebase...
r43346 repo.ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'removing oversize packfile %s (%s)\n'
Augie Fackler
formatting: blacken the codebase...
r43346 % (path, util.bytecount(os.stat(path).st_size))
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 os.unlink(path)
return [row for row in files if os.path.basename(row[0]) not in oversized]
Augie Fackler
formatting: blacken the codebase...
r43346
def _incrementalrepack(
repo,
datastore,
historystore,
packpath,
category,
allowincompletedata=False,
options=None,
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 shallowutil.mkstickygroupdir(repo.ui, packpath)
files = osutil.listdir(packpath, stat=True)
files = _deletebigpacks(repo, packpath, files)
Augie Fackler
formatting: blacken the codebase...
r43346 datapacks = _topacks(
packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
)
datapacks.extend(
s for s in datastore if not isinstance(s, datapack.datapackstore)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: blacken the codebase...
r43346 historypacks = _topacks(
packpath,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 _computeincrementalhistorypack(repo.ui, files),
Augie Fackler
formatting: blacken the codebase...
r43346 historypack.historypack,
)
historypacks.extend(
s
for s in historystore
if not isinstance(s, historypack.historypackstore)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# ``allhistory{files,packs}`` contains all known history packs, even ones we
# don't plan to repack. They are used during the datapack repack to ensure
# good ordering of nodes.
Augie Fackler
formatting: blacken the codebase...
r43346 allhistoryfiles = _allpackfileswithsuffix(
files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
)
allhistorypacks = _topacks(
packpath,
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 (f for f, mode, stat in allhistoryfiles),
Augie Fackler
formatting: blacken the codebase...
r43346 historypack.historypack,
)
allhistorypacks.extend(
s
for s in historystore
if not isinstance(s, historypack.historypackstore)
)
_runrepack(
repo,
contentstore.unioncontentstore(
*datapacks, allowincomplete=allowincompletedata
),
metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
packpath,
category,
fullhistory=metadatastore.unionmetadatastore(
*allhistorypacks, allowincomplete=True
),
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _computeincrementaldatapack(ui, files):
opts = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),
b'generations': ui.configlist(b'remotefilelog', b'data.generations'),
b'maxrepackpacks': ui.configint(
b'remotefilelog', b'data.maxrepackpacks'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackmaxpacksize': ui.configbytes(
b'remotefilelog', b'data.repackmaxpacksize'
),
b'repacksizelimit': ui.configbytes(
b'remotefilelog', b'data.repacksizelimit'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 }
packfiles = _allpackfileswithsuffix(
Augie Fackler
formatting: blacken the codebase...
r43346 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return _computeincrementalpack(packfiles, opts)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _computeincrementalhistorypack(ui, files):
opts = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'gencountlimit': ui.configint(
b'remotefilelog', b'history.gencountlimit'
),
b'generations': ui.configlist(
b'remotefilelog', b'history.generations', [b'100MB']
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'maxrepackpacks': ui.configint(
b'remotefilelog', b'history.maxrepackpacks'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackmaxpacksize': ui.configbytes(
b'remotefilelog', b'history.repackmaxpacksize', b'400MB'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repacksizelimit': ui.configbytes(
b'remotefilelog', b'history.repacksizelimit'
Augie Fackler
formatting: blacken the codebase...
r43346 ),
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 }
packfiles = _allpackfileswithsuffix(
Augie Fackler
formatting: blacken the codebase...
r43346 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return _computeincrementalpack(packfiles, opts)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
result = []
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 fileset = {fn for fn, mode, stat in files}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for filename, mode, stat in files:
if not filename.endswith(packsuffix):
continue
Augie Fackler
formatting: blacken the codebase...
r43346 prefix = filename[: -len(packsuffix)]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Don't process a pack if it doesn't have an index.
if (prefix + indexsuffix) not in fileset:
continue
result.append((prefix, mode, stat))
return result
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def _computeincrementalpack(files, opts):
"""Given a set of pack files along with the configuration options, this
function computes the list of files that should be packed as part of an
incremental repack.
It tries to strike a balance between keeping incremental repacks cheap (i.e.
packing small things when possible, and rolling the packs up to the big ones
over time).
"""
Augie Fackler
formatting: blacken the codebase...
r43346 limits = list(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 limits.append(0)
# Group the packs by generation (i.e. by size)
generations = []
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(len(limits)):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 generations.append([])
sizes = {}
for prefix, mode, stat in files:
size = stat.st_size
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if size > opts[b'repackmaxpacksize']:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 continue
sizes[prefix] = size
for i, limit in enumerate(limits):
if size > limit:
generations[i].append(prefix)
break
# Steps for picking what packs to repack:
# 1. Pick the largest generation with > gencountlimit pack files.
# 2. Take the smallest three packs.
# 3. While total-size-of-packs < repacksizelimit: add another pack
# Find the largest generation with more than gencountlimit packs
genpacks = []
for i, limit in enumerate(limits):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if len(generations[i]) > opts[b'gencountlimit']:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Sort to be smallest last, for easy popping later
Augie Fackler
formatting: blacken the codebase...
r43346 genpacks.extend(
sorted(generations[i], reverse=True, key=lambda x: sizes[x])
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 break
# Take as many packs from the generation as we can
chosenpacks = genpacks[-3:]
genpacks = genpacks[:-3]
repacksize = sum(sizes[n] for n in chosenpacks)
Augie Fackler
formatting: blacken the codebase...
r43346 while (
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repacksize < opts[b'repacksizelimit']
Augie Fackler
formatting: blacken the codebase...
r43346 and genpacks
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 and len(chosenpacks) < opts[b'maxrepackpacks']
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 chosenpacks.append(genpacks.pop())
repacksize += sizes[chosenpacks[-1]]
return chosenpacks
Augie Fackler
formatting: blacken the codebase...
r43346
def _runrepack(
repo, data, history, packpath, category, fullhistory=None, options=None
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 shallowutil.mkstickygroupdir(repo.ui, packpath)
def isold(repo, filename, node):
"""Check if the file node is older than a limit.
Unless a limit is specified in the config the default limit is taken.
"""
filectx = repo.filectx(filename, fileid=node)
filetime = repo[filectx.linkrev()].date()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ttl = repo.ui.configint(b'remotefilelog', b'nodettl')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
limit = time.time() - ttl
return filetime[0] < limit
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if not fullhistory:
fullhistory = history
Augie Fackler
formatting: blacken the codebase...
r43346 packer = repacker(
repo,
data,
history,
fullhistory,
category,
gc=garbagecollect,
isold=isold,
options=options,
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Kyle Lippincott
remotefilelog: do not specify an explicit version for repack...
r41971 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
try:
packer.run(dpack, hpack)
except error.LockHeld:
Augie Fackler
formatting: blacken the codebase...
r43346 raise RepackAlreadyRunning(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(
b"skipping repack - another repack "
b"is already running"
)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def keepset(repo, keyfn, lastkeepkeys=None):
"""Computes a keepset which is not garbage collected.
'keyfn' is a function that maps filename, node to a unique key.
'lastkeepkeys' is an optional argument and if provided the keepset
function updates lastkeepkeys with more keys and returns the result.
"""
if not lastkeepkeys:
keepkeys = set()
else:
keepkeys = lastkeepkeys
# We want to keep:
# 1. Working copy parent
# 2. Draft commits
# 3. Parents of draft commits
# 4. Pullprefetch and bgprefetchrevs revsets if specified
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs = [b'.', b'draft()', b'parents(draft())']
prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if prefetchrevs:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs.append(b'(%s)' % prefetchrevs)
prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if prefetchrevs:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs.append(b'(%s)' % prefetchrevs)
revs = b'+'.join(revs)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revs = [b'sort((%s), "topo")' % revs]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 keep = scmutil.revrange(repo, revs)
processed = set()
lastmanifest = None
# process the commits in toposorted order starting from the oldest
for r in reversed(keep._list):
manifest: use `read_any_fast_delta` during remotefilelog's repack...
r52671 delta_from, m = repo[r].manifestctx().read_any_fast_delta(processed)
if delta_from is None and lastmanifest is not None:
# could not find a delta, compute one.
# XXX (is this really faster?)
full = m
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if lastmanifest:
manifest: use `read_any_fast_delta` during remotefilelog's repack...
r52671 m = m.diff(lastmanifest)
lastmanifest = full
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 processed.add(r)
# populate keepkeys with keys from the current manifest
if type(m) is dict:
# m is a result of diff of two manifests and is a dictionary that
# maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for filename, diff in m.items():
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if diff[0][0] is not None:
keepkeys.add(keyfn(filename, diff[0][0]))
else:
# m is a manifest object
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for filename, filenode in m.items():
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 keepkeys.add(keyfn(filename, filenode))
return keepkeys
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class repacker:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 """Class for orchestrating the repack of data and history information into a
new format.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
def __init__(
self,
repo,
data,
history,
fullhistory,
category,
gc=False,
isold=None,
options=None,
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self.repo = repo
self.data = data
self.history = history
self.fullhistory = fullhistory
self.unit = constants.getunits(category)
self.garbagecollect = gc
self.options = options
if self.garbagecollect:
if not isold:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"Function 'isold' is not properly specified")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # use (filename, node) tuple as a keepset key
Augie Fackler
formatting: blacken the codebase...
r43346 self.keepkeys = keepset(repo, lambda f, n: (f, n))
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self.isold = isold
def run(self, targetdata, targethistory):
ledger = repackledger()
Augie Fackler
formatting: blacken the codebase...
r43346 with lockmod.lock(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0
Augie Fackler
formatting: blacken the codebase...
r43346 ):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self.repo.hook(b'prerepack')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Populate ledger from source
self.data.markledger(ledger, options=self.options)
self.history.markledger(ledger, options=self.options)
# Run repack
self.repackdata(ledger, targetdata)
self.repackhistory(ledger, targethistory)
# Call cleanup on each source
for source in ledger.sources:
source.cleanup(ledger)
def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
"""Reorderes ``orphans`` into a single chain inside ``nodes`` and
``deltabases``.
We often have orphan entries (nodes without a base that aren't
referenced by other nodes -- i.e., part of a chain) due to gaps in
history. Rather than store them as individual fulltexts, we prefer to
insert them as one chain sorted by size.
"""
if not orphans:
return nodes
def getsize(node, default=0):
meta = self.data.getmeta(filename, node)
if constants.METAKEYSIZE in meta:
return meta[constants.METAKEYSIZE]
else:
return default
# Sort orphans by size; biggest first is preferred, since it's more
# likely to be the newest version assuming files grow over time.
# (Sort by node first to ensure the sort is stable.)
orphans = sorted(orphans)
orphans = list(sorted(orphans, key=getsize, reverse=True))
if ui.debugflag:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"%s: orphan chain: %s\n"
% (filename, b", ".join([short(s) for s in orphans]))
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Create one contiguous chain and reassign deltabases.
for i, node in enumerate(orphans):
if i == 0:
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 deltabases[node] = (self.repo.nullid, 0)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 else:
parent = orphans[i - 1]
deltabases[node] = (parent, deltabases[parent][1] + 1)
Augie Fackler
remotefilelog: use list comprehension instead of filter for py3 portability...
r41291 nodes = [n for n in nodes if n not in orphans]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 nodes += orphans
return nodes
def repackdata(self, ledger, target):
ui = self.repo.ui
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
byfile = {}
Gregory Szorc
py3: replace pycompat.itervalues(x) with x.values()...
r49790 for entry in ledger.entries.values():
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if entry.datasource:
byfile.setdefault(entry.filename, {})[entry.node] = entry
count = 0
Augie Fackler
formatting: blacken the codebase...
r43346 repackprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repacking data"), unit=self.unit, total=len(byfile)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for filename, entries in sorted(byfile.items()):
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 repackprogress.update(count)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
ancestors = {}
Pulkit Goyal
py3: don't use dict.iterkeys()...
r40649 nodes = list(node for node in entries)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 nohistory = []
Augie Fackler
formatting: blacken the codebase...
r43346 buildprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"building history"), unit=b'nodes', total=len(nodes)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for i, node in enumerate(nodes):
if node in ancestors:
continue
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 buildprogress.update(i)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 try:
Augie Fackler
formatting: blacken the codebase...
r43346 ancestors.update(
self.fullhistory.getancestors(
filename, node, known=ancestors
)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 except KeyError:
# Since we're packing data entries, we may not have the
# corresponding history entries for them. It's not a big
# deal, but the entries won't be delta'd perfectly.
nohistory.append(node)
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 buildprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Order the nodes children first, so we can produce reverse deltas
orderednodes = list(reversed(self._toposort(ancestors)))
if len(nohistory) > 0:
Augie Fackler
formatting: blacken the codebase...
r43346 ui.debug(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'repackdata: %d nodes without history\n' % len(nohistory)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 orderednodes.extend(sorted(nohistory))
# Filter orderednodes to just the nodes we want to serialize (it
# currently also has the edge nodes' ancestors).
Augie Fackler
formatting: blacken the codebase...
r43346 orderednodes = list(
filter(lambda node: node in nodes, orderednodes)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Garbage collect old nodes:
if self.garbagecollect:
neworderednodes = []
for node in orderednodes:
# If the node is old and is not in the keepset, we skip it,
# and mark as garbage collected
Augie Fackler
formatting: blacken the codebase...
r43346 if (filename, node) not in self.keepkeys and self.isold(
self.repo, filename, node
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 entries[node].gced = True
continue
neworderednodes.append(node)
orderednodes = neworderednodes
# Compute delta bases for nodes:
deltabases = {}
nobase = set()
referenced = set()
nodes = set(nodes)
Augie Fackler
formatting: blacken the codebase...
r43346 processprogress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"processing nodes"), unit=b'nodes', total=len(orderednodes)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for i, node in enumerate(orderednodes):
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 processprogress.update(i)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Find delta base
# TODO: allow delta'ing against most recent descendant instead
# of immediate child
deltatuple = deltabases.get(node, None)
if deltatuple is None:
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 deltabase, chainlen = self.repo.nullid, 0
deltabases[node] = (self.repo.nullid, 0)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 nobase.add(node)
else:
deltabase, chainlen = deltatuple
referenced.add(deltabase)
# Use available ancestor information to inform our delta choices
ancestorinfo = ancestors.get(node)
if ancestorinfo:
p1, p2, linknode, copyfrom = ancestorinfo
# The presence of copyfrom means we're at a point where the
# file was copied from elsewhere. So don't attempt to do any
# deltas with the other file.
if copyfrom:
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 p1 = self.repo.nullid
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
if chainlen < maxchainlen:
# Record this child as the delta base for its parents.
# This may be non optimal, since the parents may have
# many children, and this will only choose the last one.
# TODO: record all children and try all deltas to find
# best
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p1 != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 deltabases[p1] = (node, chainlen + 1)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p2 != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 deltabases[p2] = (node, chainlen + 1)
# experimental config: repack.chainorphansbysize
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if ui.configbool(b'repack', b'chainorphansbysize'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 orphans = nobase - referenced
Augie Fackler
formatting: blacken the codebase...
r43346 orderednodes = self._chainorphans(
ui, filename, orderednodes, orphans, deltabases
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Compute deltas and write to the pack
for i, node in enumerate(orderednodes):
deltabase, chainlen = deltabases[node]
# Compute delta
# TODO: Optimize the deltachain fetching. Since we're
# iterating over the different version of the file, we may
# be fetching the same deltachain over and over again.
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if deltabase != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 deltaentry = self.data.getdelta(filename, node)
delta, deltabasename, origdeltabase, meta = deltaentry
size = meta.get(constants.METAKEYSIZE)
Augie Fackler
formatting: blacken the codebase...
r43346 if (
deltabasename != filename
or origdeltabase != deltabase
or size is None
):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 deltabasetext = self.data.get(filename, deltabase)
original = self.data.get(filename, node)
size = len(original)
delta = mdiff.textdiff(deltabasetext, original)
else:
delta = self.data.get(filename, node)
size = len(delta)
meta = self.data.getmeta(filename, node)
# TODO: don't use the delta if it's larger than the fulltext
if constants.METAKEYSIZE not in meta:
meta[constants.METAKEYSIZE] = size
target.add(filename, node, deltabase, delta, meta)
entries[node].datarepacked = True
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 processprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 count += 1
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 repackprogress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 target.close(ledger=ledger)
def repackhistory(self, ledger, target):
ui = self.repo.ui
byfile = {}
Gregory Szorc
py3: replace pycompat.itervalues(x) with x.values()...
r49790 for entry in ledger.entries.values():
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if entry.historysource:
byfile.setdefault(entry.filename, {})[entry.node] = entry
Augie Fackler
formatting: blacken the codebase...
r43346 progress = ui.makeprogress(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _(b"repacking history"), unit=self.unit, total=len(byfile)
Augie Fackler
formatting: blacken the codebase...
r43346 )
Gregory Szorc
global: bulk replace simple pycompat.iteritems(x) with x.items()...
r49768 for filename, entries in sorted(byfile.items()):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 ancestors = {}
Pulkit Goyal
py3: don't use dict.iterkeys()...
r40649 nodes = list(node for node in entries)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
for node in nodes:
if node in ancestors:
continue
Augie Fackler
formatting: blacken the codebase...
r43346 ancestors.update(
self.history.getancestors(filename, node, known=ancestors)
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# Order the nodes children first
orderednodes = reversed(self._toposort(ancestors))
# Write to the pack
dontprocess = set()
for node in orderednodes:
p1, p2, linknode, copyfrom = ancestors[node]
# If the node is marked dontprocess, but it's also in the
# explicit entries set, that means the node exists both in this
# file and in another file that was copied to this file.
# Usually this happens if the file was copied to another file,
# then the copy was deleted, then reintroduced without copy
# metadata. The original add and the new add have the same hash
# since the content is identical and the parents are null.
if node in dontprocess and node not in entries:
# If copyfrom == filename, it means the copy history
# went to come other file, then came back to this one, so we
# should continue processing it.
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p1 != self.repo.nullid and copyfrom != filename:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 dontprocess.add(p1)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p2 != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 dontprocess.add(p2)
continue
if copyfrom:
dontprocess.add(p1)
target.add(filename, node, p1, p2, linknode, copyfrom)
if node in entries:
entries[node].historyrepacked = True
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 progress.increment()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
Martin von Zweigbergk
remotefilelog: use progress helper in repack...
r40878 progress.complete()
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 target.close(ledger=ledger)
def _toposort(self, ancestors):
def parentfunc(node):
p1, p2, linknode, copyfrom = ancestors[node]
parents = []
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p1 != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 parents.append(p1)
Joerg Sonnenberger
node: replace nullid and friends with nodeconstants class...
r47771 if p2 != self.repo.nullid:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 parents.append(p2)
return parents
sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
return sortednodes
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class repackledger:
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 """Storage for all the bookkeeping that happens during a repack. It contains
the list of revisions being repacked, what happened to each revision, and
which source store contained which revision originally (for later cleanup).
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def __init__(self):
self.entries = {}
self.sources = {}
self.created = set()
def markdataentry(self, source, filename, node):
"""Mark the given filename+node revision as having a data rev in the
given source.
"""
entry = self._getorcreateentry(filename, node)
entry.datasource = True
entries = self.sources.get(source)
if not entries:
entries = set()
self.sources[source] = entries
entries.add(entry)
def markhistoryentry(self, source, filename, node):
"""Mark the given filename+node revision as having a history rev in the
given source.
"""
entry = self._getorcreateentry(filename, node)
entry.historysource = True
entries = self.sources.get(source)
if not entries:
entries = set()
self.sources[source] = entries
entries.add(entry)
def _getorcreateentry(self, filename, node):
key = (filename, node)
value = self.entries.get(key)
if not value:
value = repackentry(filename, node)
self.entries[key] = value
return value
def addcreated(self, value):
self.created.add(value)
Augie Fackler
formatting: blacken the codebase...
r43346
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class repackentry:
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Simple class representing a single revision entry in the repackledger."""
Augie Fackler
formatting: blacken the codebase...
r43346
__slots__ = (
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 'filename',
'node',
'datasource',
'historysource',
'datarepacked',
'historyrepacked',
'gced',
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def __init__(self, filename, node):
self.filename = filename
self.node = node
# If the revision has a data entry in the source
self.datasource = False
# If the revision has a history entry in the source
self.historysource = False
# If the revision's data entry was repacked into the repack target
self.datarepacked = False
# If the revision's history entry was repacked into the repack target
self.historyrepacked = False
# If garbage collected
self.gced = False
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 def repacklockvfs(repo):
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(repo, 'name'):
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # Lock in the shared cache so repacks across multiple copies of the same
# repo are coordinated.
sharedcachepath = shallowutil.getcachepackpath(
Augie Fackler
formatting: blacken the codebase...
r43346 repo, constants.FILEPACK_CATEGORY
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return vfs.vfs(sharedcachepath)
else:
return repo.svfs