__init__.py
390 lines
| 13.8 KiB
| text/x-python
|
PythonLexer
Matt Harbison
|
r35097 | # lfs - hash-preserving large file support using Git-LFS protocol | ||
# | ||||
# Copyright 2017 Facebook, Inc. | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
"""lfs - large file support (EXPERIMENTAL) | ||||
Matt Harbison
|
r35786 | This extension allows large files to be tracked outside of the normal | ||
repository storage and stored on a centralized server, similar to the | ||||
``largefiles`` extension. The ``git-lfs`` protocol is used when | ||||
communicating with the server, so existing git infrastructure can be | ||||
harnessed. Even though the files are stored outside of the repository, | ||||
they are still integrity checked in the same manner as normal files. | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35786 | The files stored outside of the repository are downloaded on demand, | ||
which reduces the time to clone, and possibly the local disk usage. | ||||
This changes fundamental workflows in a DVCS, so careful thought | ||||
should be given before deploying it. :hg:`convert` can be used to | ||||
convert LFS repositories to normal repositories that no longer | ||||
require this extension, and do so without changing the commit hashes. | ||||
This allows the extension to be disabled if the centralized workflow | ||||
becomes burdensome. However, the pre and post convert clones will | ||||
not be able to communicate with each other unless the extension is | ||||
enabled on both. | ||||
Matt Harbison
|
r35825 | To start a new repository, or to add LFS files to an existing one, just | ||
create an ``.hglfs`` file as described below in the root directory of | ||||
the repository. Typically, this file should be put under version | ||||
control, so that the settings will propagate to other repositories with | ||||
push and pull. During any commit, Mercurial will consult this file to | ||||
determine if an added or modified file should be stored externally. The | ||||
type of storage depends on the characteristics of the file at each | ||||
commit. A file that is near a size threshold may switch back and forth | ||||
between LFS and normal storage, as needed. | ||||
Matt Harbison
|
r35786 | |||
Alternately, both normal repositories and largefile controlled | ||||
repositories can be converted to LFS by using :hg:`convert` and the | ||||
``lfs.track`` config option described below. The ``.hglfs`` file | ||||
should then be created and added, to control subsequent LFS selection. | ||||
The hashes are also unchanged in this case. The LFS and non-LFS | ||||
repositories can be distinguished because the LFS repository will | ||||
abort any command if this extension is disabled. | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35786 | Committed LFS files are held locally, until the repository is pushed. | ||
Prior to pushing the normal repository data, the LFS files that are | ||||
tracked by the outgoing commits are automatically uploaded to the | ||||
configured central server. No LFS files are transferred on | ||||
:hg:`pull` or :hg:`clone`. Instead, the files are downloaded on | ||||
demand as they need to be read, if a cached copy cannot be found | ||||
locally. Both committing and downloading an LFS file will link the | ||||
file to a usercache, to speed up future access. See the `usercache` | ||||
config setting described below. | ||||
.hglfs:: | ||||
The extension reads its configuration from a versioned ``.hglfs`` | ||||
configuration file found in the root of the working directory. The | ||||
``.hglfs`` file uses the same syntax as all other Mercurial | ||||
configuration files. It uses a single section, ``[track]``. | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35786 | The ``[track]`` section specifies which files are stored as LFS (or | ||
not). Each line is keyed by a file pattern, with a predicate value. | ||||
The first file pattern match is used, so put more specific patterns | ||||
first. The available predicates are ``all()``, ``none()``, and | ||||
``size()``. See "hg help filesets.size" for the latter. | ||||
Example versioned ``.hglfs`` file:: | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35786 | [track] | ||
# No Makefile or python file, anywhere, will be LFS | ||||
**Makefile = none() | ||||
**.py = none() | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35786 | **.zip = all() | ||
**.exe = size(">1MB") | ||||
# Catchall for everything not matched above | ||||
** = size(">10MB") | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35097 | Configs:: | ||
[lfs] | ||||
# Remote endpoint. Multiple protocols are supported: | ||||
# - http(s)://user:pass@example.com/path | ||||
# git-lfs endpoint | ||||
# - file:///tmp/path | ||||
# local filesystem, usually for testing | ||||
# if unset, lfs will prompt setting this when it must use this value. | ||||
# (default: unset) | ||||
Matt Harbison
|
r35786 | url = https://example.com/repo.git/info/lfs | ||
Matt Harbison
|
r35097 | |||
Matt Harbison
|
r35636 | # Which files to track in LFS. Path tests are "**.extname" for file | ||
# extensions, and "path:under/some/directory" for path prefix. Both | ||||
Yuya Nishihara
|
r35759 | # are relative to the repository root. | ||
Matt Harbison
|
r35636 | # File size can be tested with the "size()" fileset, and tests can be | ||
# joined with fileset operators. (See "hg help filesets.operators".) | ||||
# | ||||
# Some examples: | ||||
# - all() # everything | ||||
# - none() # nothing | ||||
# - size(">20MB") # larger than 20MB | ||||
# - !**.txt # anything not a *.txt file | ||||
# - **.zip | **.tar.gz | **.7z # some types of compressed files | ||||
Yuya Nishihara
|
r35759 | # - path:bin # files under "bin" in the project root | ||
Matt Harbison
|
r35636 | # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz | ||
Yuya Nishihara
|
r35759 | # | (path:bin & !path:/bin/README) | size(">1GB") | ||
Matt Harbison
|
r35636 | # (default: none()) | ||
Matt Harbison
|
r35683 | # | ||
# This is ignored if there is a tracked '.hglfs' file, and this setting | ||||
# will eventually be deprecated and removed. | ||||
Matt Harbison
|
r35636 | track = size(">10M") | ||
Matt Harbison
|
r35097 | |||
# how many times to retry before giving up on transferring an object | ||||
retry = 5 | ||||
Matt Harbison
|
r35281 | |||
# the local directory to store lfs files for sharing across local clones. | ||||
# If not set, the cache is located in an OS specific cache location. | ||||
usercache = /path/to/global/cache | ||||
Matt Harbison
|
r35097 | """ | ||
from __future__ import absolute_import | ||||
Matt Harbison
|
r35098 | from mercurial.i18n import _ | ||
Matt Harbison
|
r35097 | from mercurial import ( | ||
bundle2, | ||||
changegroup, | ||||
Matt Harbison
|
r35681 | cmdutil, | ||
Matt Harbison
|
r35683 | config, | ||
Matt Harbison
|
r35097 | context, | ||
Matt Harbison
|
r35683 | error, | ||
Matt Harbison
|
r35097 | exchange, | ||
extensions, | ||||
filelog, | ||||
Matt Harbison
|
r35636 | fileset, | ||
Matt Harbison
|
r35214 | hg, | ||
Matt Harbison
|
r35167 | localrepo, | ||
Matt Harbison
|
r35636 | minifileset, | ||
Matt Harbison
|
r35520 | node, | ||
Matt Harbison
|
r35675 | pycompat, | ||
Matt Harbison
|
r35097 | registrar, | ||
revlog, | ||||
scmutil, | ||||
Yuya Nishihara
|
r36939 | templateutil, | ||
Boris Feld
|
r35347 | upgrade, | ||
Matt Harbison
|
r35749 | util, | ||
Matt Harbison
|
r35097 | vfs as vfsmod, | ||
Matt Harbison
|
r35522 | wireproto, | ||
Matt Harbison
|
r35097 | ) | ||
from . import ( | ||||
blobstore, | ||||
wrapper, | ||||
) | ||||
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | ||||
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | ||||
# be specifying the version(s) of Mercurial they are tested with, or | ||||
# leave the attribute unspecified. | ||||
testedwith = 'ships-with-hg-core' | ||||
Matt Harbison
|
r35099 | configtable = {} | ||
configitem = registrar.configitem(configtable) | ||||
Matt Harbison
|
r35456 | configitem('experimental', 'lfs.user-agent', | ||
default=None, | ||||
) | ||||
Matt Harbison
|
r35750 | configitem('experimental', 'lfs.worker-enable', | ||
default=False, | ||||
) | ||||
Matt Harbison
|
r35456 | |||
Matt Harbison
|
r35099 | configitem('lfs', 'url', | ||
Matt Harbison
|
r35632 | default=None, | ||
Matt Harbison
|
r35099 | ) | ||
Matt Harbison
|
r35281 | configitem('lfs', 'usercache', | ||
default=None, | ||||
) | ||||
Matt Harbison
|
r35636 | # Deprecated | ||
Matt Harbison
|
r35099 | configitem('lfs', 'threshold', | ||
default=None, | ||||
) | ||||
Matt Harbison
|
r35636 | configitem('lfs', 'track', | ||
default='none()', | ||||
) | ||||
Matt Harbison
|
r35099 | configitem('lfs', 'retry', | ||
default=5, | ||||
) | ||||
Matt Harbison
|
r35097 | cmdtable = {} | ||
command = registrar.command(cmdtable) | ||||
templatekeyword = registrar.templatekeyword() | ||||
Matt Harbison
|
r36008 | filesetpredicate = registrar.filesetpredicate() | ||
Matt Harbison
|
r35097 | |||
Matt Harbison
|
r35167 | def featuresetup(ui, supported): | ||
# don't die on seeing a repo with the lfs requirement | ||||
supported |= {'lfs'} | ||||
def uisetup(ui): | ||||
localrepo.localrepository.featuresetupfuncs.add(featuresetup) | ||||
Matt Harbison
|
r35097 | def reposetup(ui, repo): | ||
# Nothing to do with a remote repo | ||||
if not repo.local(): | ||||
return | ||||
repo.svfs.lfslocalblobstore = blobstore.local(repo) | ||||
repo.svfs.lfsremoteblobstore = blobstore.remote(repo) | ||||
Matt Harbison
|
r35683 | class lfsrepo(repo.__class__): | ||
@localrepo.unfilteredmethod | ||||
def commitctx(self, ctx, error=False): | ||||
Matt Harbison
|
r35898 | repo.svfs.options['lfstrack'] = _trackedmatcher(self) | ||
Matt Harbison
|
r35683 | return super(lfsrepo, self).commitctx(ctx, error) | ||
repo.__class__ = lfsrepo | ||||
Matt Harbison
|
r35167 | if 'lfs' not in repo.requirements: | ||
def checkrequireslfs(ui, repo, **kwargs): | ||||
if 'lfs' not in repo.requirements: | ||||
Pulkit Goyal
|
r36418 | last = kwargs.get(r'node_last') | ||
Matt Harbison
|
r35520 | _bin = node.bin | ||
if last: | ||||
Pulkit Goyal
|
r36418 | s = repo.set('%n:%n', _bin(kwargs[r'node']), _bin(last)) | ||
Matt Harbison
|
r35520 | else: | ||
Pulkit Goyal
|
r36418 | s = repo.set('%n', _bin(kwargs[r'node'])) | ||
Matt Harbison
|
r35520 | for ctx in s: | ||
Matt Harbison
|
r35167 | # TODO: is there a way to just walk the files in the commit? | ||
Jun Wu
|
r35468 | if any(ctx[f].islfs() for f in ctx.files() if f in ctx): | ||
Matt Harbison
|
r35167 | repo.requirements.add('lfs') | ||
repo._writerequirements() | ||||
Matt Harbison
|
r35753 | repo.prepushoutgoinghooks.add('lfs', wrapper.prepush) | ||
Matt Harbison
|
r35520 | break | ||
Matt Harbison
|
r35167 | |||
ui.setconfig('hooks', 'commit.lfs', checkrequireslfs, 'lfs') | ||||
Matt Harbison
|
r35520 | ui.setconfig('hooks', 'pretxnchangegroup.lfs', checkrequireslfs, 'lfs') | ||
Matt Harbison
|
r35753 | else: | ||
repo.prepushoutgoinghooks.add('lfs', wrapper.prepush) | ||||
Matt Harbison
|
r35167 | |||
Matt Harbison
|
r35898 | def _trackedmatcher(repo): | ||
Matt Harbison
|
r35682 | """Return a function (path, size) -> bool indicating whether or not to | ||
track a given file with lfs.""" | ||||
Matt Harbison
|
r35825 | if not repo.wvfs.exists('.hglfs'): | ||
# No '.hglfs' in wdir. Fallback to config for now. | ||||
trackspec = repo.ui.config('lfs', 'track') | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35825 | # deprecated config: lfs.threshold | ||
threshold = repo.ui.configbytes('lfs', 'threshold') | ||||
if threshold: | ||||
fileset.parse(trackspec) # make sure syntax errors are confined | ||||
trackspec = "(%s) | size('>%d')" % (trackspec, threshold) | ||||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35825 | return minifileset.compile(trackspec) | ||
Matt Harbison
|
r35683 | |||
Matt Harbison
|
r35825 | data = repo.wvfs.tryread('.hglfs') | ||
Matt Harbison
|
r35683 | if not data: | ||
return lambda p, s: False | ||||
# Parse errors here will abort with a message that points to the .hglfs file | ||||
# and line number. | ||||
cfg = config.config() | ||||
cfg.parse('.hglfs', data) | ||||
Matt Harbison
|
r35682 | |||
Matt Harbison
|
r35683 | try: | ||
rules = [(minifileset.compile(pattern), minifileset.compile(rule)) | ||||
for pattern, rule in cfg.items('track')] | ||||
except error.ParseError as e: | ||||
# The original exception gives no indicator that the error is in the | ||||
# .hglfs file, so add that. | ||||
# TODO: See if the line number of the file can be made available. | ||||
raise error.Abort(_('parse error in .hglfs: %s') % e) | ||||
def _match(path, size): | ||||
for pat, rule in rules: | ||||
if pat(path, size): | ||||
return rule(path, size) | ||||
return False | ||||
return _match | ||||
Matt Harbison
|
r35682 | |||
Matt Harbison
|
r35097 | def wrapfilelog(filelog): | ||
wrapfunction = extensions.wrapfunction | ||||
wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision) | ||||
wrapfunction(filelog, 'renamed', wrapper.filelogrenamed) | ||||
wrapfunction(filelog, 'size', wrapper.filelogsize) | ||||
def extsetup(ui): | ||||
wrapfilelog(filelog.filelog) | ||||
wrapfunction = extensions.wrapfunction | ||||
Matt Harbison
|
r35170 | |||
Matt Harbison
|
r35681 | wrapfunction(cmdutil, '_updatecatformatter', wrapper._updatecatformatter) | ||
Matt Harbison
|
r35170 | wrapfunction(scmutil, 'wrapconvertsink', wrapper.convertsink) | ||
Matt Harbison
|
r35364 | wrapfunction(upgrade, '_finishdatamigration', | ||
wrapper.upgradefinishdatamigration) | ||||
Boris Feld
|
r35347 | wrapfunction(upgrade, 'preservedrequirements', | ||
wrapper.upgraderequirements) | ||||
wrapfunction(upgrade, 'supporteddestrequirements', | ||||
wrapper.upgraderequirements) | ||||
Matt Harbison
|
r35097 | wrapfunction(changegroup, | ||
'supportedoutgoingversions', | ||||
wrapper.supportedoutgoingversions) | ||||
wrapfunction(changegroup, | ||||
'allsupportedversions', | ||||
wrapper.allsupportedversions) | ||||
Matt Harbison
|
r35522 | wrapfunction(exchange, 'push', wrapper.push) | ||
wrapfunction(wireproto, '_capabilities', wrapper._capabilities) | ||||
Matt Harbison
|
r35097 | wrapfunction(context.basefilectx, 'cmp', wrapper.filectxcmp) | ||
wrapfunction(context.basefilectx, 'isbinary', wrapper.filectxisbinary) | ||||
context.basefilectx.islfs = wrapper.filectxislfs | ||||
revlog.addflagprocessor( | ||||
revlog.REVIDX_EXTSTORED, | ||||
( | ||||
wrapper.readfromstore, | ||||
wrapper.writetostore, | ||||
wrapper.bypasscheckhash, | ||||
), | ||||
) | ||||
Matt Harbison
|
r35214 | wrapfunction(hg, 'clone', wrapper.hgclone) | ||
Matt Harbison
|
r35215 | wrapfunction(hg, 'postshare', wrapper.hgpostshare) | ||
Matt Harbison
|
r35214 | |||
Matt Harbison
|
r36155 | scmutil.fileprefetchhooks.add('lfs', wrapper._prefetchfiles) | ||
Matt Harbison
|
r35940 | |||
Matt Harbison
|
r35097 | # Make bundle choose changegroup3 instead of changegroup2. This affects | ||
# "hg bundle" command. Note: it does not cover all bundle formats like | ||||
# "packed1". Using "packed1" with lfs will likely cause trouble. | ||||
names = [k for k, v in exchange._bundlespeccgversions.items() if v == '02'] | ||||
for k in names: | ||||
exchange._bundlespeccgversions[k] = '03' | ||||
# bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs | ||||
# options and blob stores are passed from othervfs to the new readonlyvfs. | ||||
wrapfunction(vfsmod.readonlyvfs, '__init__', wrapper.vfsinit) | ||||
# when writing a bundle via "hg bundle" command, upload related LFS blobs | ||||
wrapfunction(bundle2, 'writenewbundle', wrapper.writenewbundle) | ||||
Matt Harbison
|
r36018 | @filesetpredicate('lfs()', callstatus=True) | ||
Matt Harbison
|
r36008 | def lfsfileset(mctx, x): | ||
"""File that uses LFS storage.""" | ||||
# i18n: "lfs" is a keyword | ||||
fileset.getargs(x, 0, 0, _("lfs takes no arguments")) | ||||
return [f for f in mctx.subset | ||||
Matt Harbison
|
r36018 | if wrapper.pointerfromctx(mctx.ctx, f, removed=True) is not None] | ||
Matt Harbison
|
r36008 | |||
Yuya Nishihara
|
r37086 | @templatekeyword('lfs_files', requires={'ctx'}) | ||
Yuya Nishihara
|
r36616 | def lfsfiles(context, mapping): | ||
Matt Harbison
|
r36017 | """List of strings. All files modified, added, or removed by this | ||
changeset.""" | ||||
Yuya Nishihara
|
r36616 | ctx = context.resource(mapping, 'ctx') | ||
Matt Harbison
|
r35675 | |||
Matt Harbison
|
r36017 | pointers = wrapper.pointersfromctx(ctx, removed=True) # {path: pointer} | ||
Matt Harbison
|
r35675 | files = sorted(pointers.keys()) | ||
Matt Harbison
|
r35787 | def pointer(v): | ||
Matt Harbison
|
r35749 | # In the file spec, version is first and the other keys are sorted. | ||
sortkeyfunc = lambda x: (x[0] != 'version', x) | ||||
items = sorted(pointers[v].iteritems(), key=sortkeyfunc) | ||||
return util.sortdict(items) | ||||
Matt Harbison
|
r35675 | makemap = lambda v: { | ||
'file': v, | ||||
Matt Harbison
|
r36017 | 'lfsoid': pointers[v].oid() if pointers[v] else None, | ||
Yuya Nishihara
|
r36939 | 'lfspointer': templateutil.hybriddict(pointer(v)), | ||
Matt Harbison
|
r35675 | } | ||
# TODO: make the separator ', '? | ||||
Yuya Nishihara
|
r37086 | f = templateutil._showcompatlist(context, mapping, 'lfs_file', files) | ||
Yuya Nishihara
|
r36939 | return templateutil.hybrid(f, files, makemap, pycompat.identity) | ||
Matt Harbison
|
r35097 | |||
@command('debuglfsupload', | ||||
[('r', 'rev', [], _('upload large files introduced by REV'))]) | ||||
def debuglfsupload(ui, repo, **opts): | ||||
"""upload lfs blobs added by the working copy parent or given revisions""" | ||||
Pulkit Goyal
|
r36474 | revs = opts.get(r'rev', []) | ||
Matt Harbison
|
r35097 | pointers = wrapper.extractpointers(repo, scmutil.revrange(repo, revs)) | ||
wrapper.uploadblobs(repo, pointers) | ||||