diff --git a/hgext/largefiles/CONTRIBUTORS b/hgext/largefiles/CONTRIBUTORS new file mode 100644 --- /dev/null +++ b/hgext/largefiles/CONTRIBUTORS @@ -0,0 +1,4 @@ +Greg Ward, author of the original bfiles extension +Na'Tosha Bard of Unity Technologies +Fog Creek Software +Special thanks to the University of Toronto and the UCOSP program diff --git a/hgext/largefiles/__init__.py b/hgext/largefiles/__init__.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/__init__.py @@ -0,0 +1,40 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''track large binary files + +Large binary files tend to be not very compressible, not very "diffable", and +not at all mergeable. Such files are not handled well by Mercurial\'s storage +format (revlog), which is based on compressed binary deltas. largefiles solves +this problem by adding a centralized client-server layer on top of Mercurial: +largefiles live in a *central store* out on the network somewhere, and you only +fetch the ones that you need when you need them. + +largefiles works by maintaining a *standin* in .hglf/ for each largefile. The +standins are small (41 bytes: an SHA-1 hash plus newline) and are tracked by +Mercurial. Largefile revisions are identified by the SHA-1 hash of their +contents, which is written to the standin. largefiles uses that revision ID to +get/put largefile revisions from/to the central store. + +A complete tutorial for using lfiles is included in ``usage.txt`` in the lfiles +source distribution. See +https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles/File/usage.txt +''' + +from mercurial import commands + +import lfcommands +import reposetup +import uisetup + +reposetup = reposetup.reposetup +uisetup = uisetup.uisetup + +commands.norepo += " lfconvert" + +cmdtable = lfcommands.cmdtable diff --git a/hgext/largefiles/basestore.py b/hgext/largefiles/basestore.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/basestore.py @@ -0,0 +1,201 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''Base class for store implementations and store-related utility code.''' + +import os +import tempfile +import binascii +import re + +from mercurial import util, node, hg +from mercurial.i18n import _ + +import lfutil + +class StoreError(Exception): + '''Raised when there is a problem getting files from or putting + files to a central store.''' + def __init__(self, filename, hash, url, detail): + self.filename = filename + self.hash = hash + self.url = url + self.detail = detail + + def longmessage(self): + if self.url: + return ('%s: %s\n' + '(failed URL: %s)\n' + % (self.filename, self.detail, self.url)) + else: + return ('%s: %s\n' + '(no default or default-push path set in hgrc)\n' + % (self.filename, self.detail)) + + def __str__(self): + return "%s: %s" % (self.url, self.detail) + +class basestore(object): + def __init__(self, ui, repo, url): + self.ui = ui + self.repo = repo + self.url = url + + def put(self, source, hash): + '''Put source file into the store under /.''' + raise NotImplementedError('abstract method') + + def exists(self, hash): + '''Check to see if the store contains the given hash.''' + raise NotImplementedError('abstract method') + + def get(self, files): + '''Get the specified largefiles from the store and write to local + files under repo.root. files is a list of (filename, hash) + tuples. Return (success, missing), lists of files successfuly + downloaded and those not found in the store. success is a list + of (filename, hash) tuples; missing is a list of filenames that + we could not get. (The detailed error message will already have + been presented to the user, so missing is just supplied as a + summary.)''' + success = [] + missing = [] + ui = self.ui + + at = 0 + for filename, hash in files: + ui.progress(_('getting largefiles'), at, unit='lfile', + total=len(files)) + at += 1 + ui.note(_('getting %s:%s\n') % (filename, hash)) + + cachefilename = lfutil.cachepath(self.repo, hash) + cachedir = os.path.dirname(cachefilename) + + # No need to pass mode='wb' to fdopen(), since mkstemp() already + # opened the file in binary mode. + (tmpfd, tmpfilename) = tempfile.mkstemp( + dir=cachedir, prefix=os.path.basename(filename)) + tmpfile = os.fdopen(tmpfd, 'w') + + try: + hhash = binascii.hexlify(self._getfile(tmpfile, filename, hash)) + except StoreError, err: + ui.warn(err.longmessage()) + hhash = "" + + if hhash != hash: + if hhash != "": + ui.warn(_('%s: data corruption (expected %s, got %s)\n') + % (filename, hash, hhash)) + tmpfile.close() # no-op if it's already closed + os.remove(tmpfilename) + missing.append(filename) + continue + + if os.path.exists(cachefilename): # Windows + os.remove(cachefilename) + os.rename(tmpfilename, cachefilename) + lfutil.linktosystemcache(self.repo, hash) + success.append((filename, hhash)) + + ui.progress(_('getting largefiles'), None) + return (success, missing) + + def verify(self, revs, contents=False): + '''Verify the existence (and, optionally, contents) of every big + file revision referenced by every changeset in revs. + Return 0 if all is well, non-zero on any errors.''' + write = self.ui.write + failed = False + + write(_('searching %d changesets for largefiles\n') % len(revs)) + verified = set() # set of (filename, filenode) tuples + + for rev in revs: + cctx = self.repo[rev] + cset = "%d:%s" % (cctx.rev(), node.short(cctx.node())) + + failed = lfutil.any_(self._verifyfile( + cctx, cset, contents, standin, verified) for standin in cctx) + + num_revs = len(verified) + num_lfiles = len(set([fname for (fname, fnode) in verified])) + if contents: + write(_('verified contents of %d revisions of %d largefiles\n') + % (num_revs, num_lfiles)) + else: + write(_('verified existence of %d revisions of %d largefiles\n') + % (num_revs, num_lfiles)) + + return int(failed) + + def _getfile(self, tmpfile, filename, hash): + '''Fetch one revision of one file from the store and write it + to tmpfile. Compute the hash of the file on-the-fly as it + downloads and return the binary hash. Close tmpfile. Raise + StoreError if unable to download the file (e.g. it does not + exist in the store).''' + raise NotImplementedError('abstract method') + + def _verifyfile(self, cctx, cset, contents, standin, verified): + '''Perform the actual verification of a file in the store. + ''' + raise NotImplementedError('abstract method') + +import localstore, wirestore + +_storeprovider = { + 'file': [localstore.localstore], + 'http': [wirestore.wirestore], + 'https': [wirestore.wirestore], + 'ssh': [wirestore.wirestore], + } + +_scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://') + +# During clone this function is passed the src's ui object +# but it needs the dest's ui object so it can read out of +# the config file. Use repo.ui instead. +def _openstore(repo, remote=None, put=False): + ui = repo.ui + + if not remote: + path = getattr(repo, 'lfpullsource', None) or \ + ui.expandpath('default-push', 'default') + # If 'default-push' and 'default' can't be expanded + # they are just returned. In that case use the empty string which + # use the filescheme. + if path == 'default-push' or path == 'default': + path = '' + remote = repo + else: + remote = hg.peer(repo, {}, path) + + # The path could be a scheme so use Mercurial's normal functionality + # to resolve the scheme to a repository and use its path + path = hasattr(remote, 'url') and remote.url() or remote.path + + match = _scheme_re.match(path) + if not match: # regular filesystem path + scheme = 'file' + else: + scheme = match.group(1) + + try: + storeproviders = _storeprovider[scheme] + except KeyError: + raise util.Abort(_('unsupported URL scheme %r') % scheme) + + for class_obj in storeproviders: + try: + return class_obj(ui, repo, remote) + except lfutil.storeprotonotcapable: + pass + + raise util.Abort(_('%s does not appear to be a lfile store'), path) diff --git a/hgext/largefiles/design.txt b/hgext/largefiles/design.txt new file mode 100644 --- /dev/null +++ b/hgext/largefiles/design.txt @@ -0,0 +1,49 @@ += largefiles - manage large binary files = +This extension is based off of Greg Ward's bfiles extension which can be found +at http://mercurial.selenic.com/wiki/BfilesExtension. + +== The largefile store == + +largefile stores are, in the typical use case, centralized servers that have +every past revision of a given binary file. Each largefile is identified by +its sha1 hash, and all interactions with the store take one of the following +forms. + +-Download a bfile with this hash +-Upload a bfile with this hash +-Check if the store has a bfile with this hash + +largefiles stores can take one of two forms: + +-Directories on a network file share +-Mercurial wireproto servers, either via ssh or http (hgweb) + +== The Local Repository == + +The local repository has a largefile cache in .hg/largefiles which holds a +subset of the largefiles needed. On a clone only the largefiles at tip are +downloaded. When largefiles are downloaded from the central store, a copy is +saved in this store. + +== The Global Cache == + +largefiles in a local repository cache are hardlinked to files in the global +cache. Before a file is downloaded we check if it is in the global cache. + +== Implementation Details == + +Each largefile has a standin which is in .hglf. The standin is tracked by +Mercurial. The standin contains the SHA1 hash of the largefile. When a +largefile is added/removed/copied/renamed/etc the same operation is applied to +the standin. Thus the history of the standin is the history of the largefile. + +For performance reasons, the contents of a standin are only updated before a +commit. Standins are added/removed/copied/renamed from add/remove/copy/rename +Mercurial commands but their contents will not be updated. The contents of a +standin will always be the hash of the largefile as of the last commit. To +support some commands (revert) some standins are temporarily updated but will +be changed back after the command is finished. + +A Mercurial dirstate object tracks the state of the largefiles. The dirstate +uses the last modified time and current size to detect if a file has changed +(without reading the entire contents of the file). diff --git a/hgext/largefiles/lfcommands.py b/hgext/largefiles/lfcommands.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/lfcommands.py @@ -0,0 +1,483 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''High-level command functions: lfadd() et. al, plus the cmdtable.''' + +import os +import shutil + +from mercurial import util, match as match_, hg, node, context, error +from mercurial.i18n import _ + +import lfutil +import basestore + +# -- Commands ---------------------------------------------------------- + +def lfconvert(ui, src, dest, *pats, **opts): + '''Convert a normal repository to a largefiles repository + + Convert source repository creating an identical repository, except that all + files that match the patterns given, or are over the given size will be + added as largefiles. The size used to determine whether or not to track a + file as a largefile is the size of the first version of the file. After + running this command you will need to make sure that largefiles is enabled + anywhere you intend to push the new repository.''' + + if opts['tonormal']: + tolfile = False + else: + tolfile = True + size = opts['size'] + if not size: + size = ui.config(lfutil.longname, 'size', default=None) + try: + size = int(size) + except ValueError: + raise util.Abort(_('largefiles.size must be integer, was %s\n') % \ + size) + except TypeError: + raise util.Abort(_('size must be specified')) + + try: + rsrc = hg.repository(ui, src) + if not rsrc.local(): + raise util.Abort(_('%s is not a local Mercurial repo') % src) + except error.RepoError, err: + ui.traceback() + raise util.Abort(err.args[0]) + if os.path.exists(dest): + if not os.path.isdir(dest): + raise util.Abort(_('destination %s already exists') % dest) + elif os.listdir(dest): + raise util.Abort(_('destination %s is not empty') % dest) + try: + ui.status(_('initializing destination %s\n') % dest) + rdst = hg.repository(ui, dest, create=True) + if not rdst.local(): + raise util.Abort(_('%s is not a local Mercurial repo') % dest) + except error.RepoError: + ui.traceback() + raise util.Abort(_('%s is not a repo') % dest) + + try: + # Lock destination to prevent modification while it is converted to. + # Don't need to lock src because we are just reading from its history + # which can't change. + dst_lock = rdst.lock() + + # Get a list of all changesets in the source. The easy way to do this + # is to simply walk the changelog, using changelog.nodesbewteen(). + # Take a look at mercurial/revlog.py:639 for more details. + # Use a generator instead of a list to decrease memory usage + ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None, + rsrc.heads())[0]) + revmap = {node.nullid: node.nullid} + if tolfile: + lfiles = set() + normalfiles = set() + if not pats: + pats = ui.config(lfutil.longname, 'patterns', default=()) + if pats: + pats = pats.split(' ') + if pats: + matcher = match_.match(rsrc.root, '', list(pats)) + else: + matcher = None + + lfiletohash = {} + for ctx in ctxs: + ui.progress(_('converting revisions'), ctx.rev(), + unit=_('revision'), total=rsrc['tip'].rev()) + _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, + lfiles, normalfiles, matcher, size, lfiletohash) + ui.progress(_('converting revisions'), None) + + if os.path.exists(rdst.wjoin(lfutil.shortname)): + shutil.rmtree(rdst.wjoin(lfutil.shortname)) + + for f in lfiletohash.keys(): + if os.path.isfile(rdst.wjoin(f)): + os.unlink(rdst.wjoin(f)) + try: + os.removedirs(os.path.dirname(rdst.wjoin(f))) + except: + pass + + else: + for ctx in ctxs: + ui.progress(_('converting revisions'), ctx.rev(), + unit=_('revision'), total=rsrc['tip'].rev()) + _addchangeset(ui, rsrc, rdst, ctx, revmap) + + ui.progress(_('converting revisions'), None) + except: + # we failed, remove the new directory + shutil.rmtree(rdst.root) + raise + finally: + dst_lock.release() + +def _addchangeset(ui, rsrc, rdst, ctx, revmap): + # Convert src parents to dst parents + parents = [] + for p in ctx.parents(): + parents.append(revmap[p.node()]) + while len(parents) < 2: + parents.append(node.nullid) + + # Generate list of changed files + files = set(ctx.files()) + if node.nullid not in parents: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + files |= (set(mp1) | set(mp2)) - set(mc) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + + def getfilectx(repo, memctx, f): + if lfutil.standin(f) in files: + # if the file isn't in the manifest then it was removed + # or renamed, raise IOError to indicate this + try: + fctx = ctx.filectx(lfutil.standin(f)) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = lfutil.splitstandin(renamed[0]) + + hash = fctx.data().strip() + path = lfutil.findfile(rsrc, hash) + ### TODO: What if the file is not cached? + data = '' + fd = None + try: + fd = open(path, 'rb') + data = fd.read() + finally: + if fd: fd.close() + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + else: + try: + fctx = ctx.filectx(f) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = renamed[0] + data = fctx.data() + if f == '.hgtags': + newdata = [] + for line in data.splitlines(): + id, name = line.split(' ', 1) + newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), + name)) + data = ''.join(newdata) + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + + dstfiles = [] + for file in files: + if lfutil.isstandin(file): + dstfiles.append(lfutil.splitstandin(file)) + else: + dstfiles.append(file) + # Commit + mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, + getfilectx, ctx.user(), ctx.date(), ctx.extra()) + ret = rdst.commitctx(mctx) + rdst.dirstate.setparents(ret) + revmap[ctx.node()] = rdst.changelog.tip() + +def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles, + matcher, size, lfiletohash): + # Convert src parents to dst parents + parents = [] + for p in ctx.parents(): + parents.append(revmap[p.node()]) + while len(parents) < 2: + parents.append(node.nullid) + + # Generate list of changed files + files = set(ctx.files()) + if node.nullid not in parents: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + files |= (set(mp1) | set(mp2)) - set(mc) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + + dstfiles = [] + for f in files: + if f not in lfiles and f not in normalfiles: + islfile = _islfile(f, ctx, matcher, size) + # If this file was renamed or copied then copy + # the lfileness of its predecessor + if f in ctx.manifest(): + fctx = ctx.filectx(f) + renamed = fctx.renamed() + renamedlfile = renamed and renamed[0] in lfiles + islfile |= renamedlfile + if 'l' in fctx.flags(): + if renamedlfile: + raise util.Abort( + _('Renamed/copied largefile %s becomes symlink') % f) + islfile = False + if islfile: + lfiles.add(f) + else: + normalfiles.add(f) + + if f in lfiles: + dstfiles.append(lfutil.standin(f)) + # lfile in manifest if it has not been removed/renamed + if f in ctx.manifest(): + if 'l' in ctx.filectx(f).flags(): + if renamed and renamed[0] in lfiles: + raise util.Abort(_('largefile %s becomes symlink') % f) + + # lfile was modified, update standins + fullpath = rdst.wjoin(f) + lfutil.createdir(os.path.dirname(fullpath)) + m = util.sha1('') + m.update(ctx[f].data()) + hash = m.hexdigest() + if f not in lfiletohash or lfiletohash[f] != hash: + try: + fd = open(fullpath, 'wb') + fd.write(ctx[f].data()) + finally: + if fd: + fd.close() + executable = 'x' in ctx[f].flags() + os.chmod(fullpath, lfutil.getmode(executable)) + lfutil.writestandin(rdst, lfutil.standin(f), hash, + executable) + lfiletohash[f] = hash + else: + # normal file + dstfiles.append(f) + + def getfilectx(repo, memctx, f): + if lfutil.isstandin(f): + # if the file isn't in the manifest then it was removed + # or renamed, raise IOError to indicate this + srcfname = lfutil.splitstandin(f) + try: + fctx = ctx.filectx(srcfname) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + # standin is always a lfile because lfileness + # doesn't change after rename or copy + renamed = lfutil.standin(renamed[0]) + + return context.memfilectx(f, lfiletohash[srcfname], 'l' in + fctx.flags(), 'x' in fctx.flags(), renamed) + else: + try: + fctx = ctx.filectx(f) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = renamed[0] + + data = fctx.data() + if f == '.hgtags': + newdata = [] + for line in data.splitlines(): + id, name = line.split(' ', 1) + newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), + name)) + data = ''.join(newdata) + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + + # Commit + mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, + getfilectx, ctx.user(), ctx.date(), ctx.extra()) + ret = rdst.commitctx(mctx) + rdst.dirstate.setparents(ret) + revmap[ctx.node()] = rdst.changelog.tip() + +def _islfile(file, ctx, matcher, size): + ''' + A file is a lfile if it matches a pattern or is over + the given size. + ''' + # Never store hgtags or hgignore as lfiles + if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs': + return False + if matcher and matcher(file): + return True + try: + return ctx.filectx(file).size() >= size * 1024 * 1024 + except error.LookupError: + return False + +def uploadlfiles(ui, rsrc, rdst, files): + '''upload largefiles to the central store''' + + # Don't upload locally. All largefiles are in the system wide cache + # so the other repo can just get them from there. + if not files or rdst.local(): + return + + store = basestore._openstore(rsrc, rdst, put=True) + + at = 0 + files = filter(lambda h: not store.exists(h), files) + for hash in files: + ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files)) + source = lfutil.findfile(rsrc, hash) + if not source: + raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash) + # XXX check for errors here + store.put(source, hash) + at += 1 + ui.progress('uploading largefiles', None) + +def verifylfiles(ui, repo, all=False, contents=False): + '''Verify that every big file revision in the current changeset + exists in the central store. With --contents, also verify that + the contents of each big file revision are correct (SHA-1 hash + matches the revision ID). With --all, check every changeset in + this repository.''' + if all: + # Pass a list to the function rather than an iterator because we know a + # list will work. + revs = range(len(repo)) + else: + revs = ['.'] + + store = basestore._openstore(repo) + return store.verify(revs, contents=contents) + +def cachelfiles(ui, repo, node): + '''cachelfiles ensures that all largefiles needed by the specified revision + are present in the repository's largefile cache. + + returns a tuple (cached, missing). cached is the list of files downloaded + by this operation; missing is the list of files that were needed but could + not be found.''' + lfiles = lfutil.listlfiles(repo, node) + toget = [] + + for lfile in lfiles: + expectedhash = repo[node][lfutil.standin(lfile)].data().strip() + # if it exists and its hash matches, it might have been locally + # modified before updating and the user chose 'local'. in this case, + # it will not be in any store, so don't look for it. + if (not os.path.exists(repo.wjoin(lfile)) \ + or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \ + not lfutil.findfile(repo, expectedhash): + toget.append((lfile, expectedhash)) + + if toget: + store = basestore._openstore(repo) + ret = store.get(toget) + return ret + + return ([], []) + +def updatelfiles(ui, repo, filelist=None, printmessage=True): + wlock = repo.wlock() + try: + lfdirstate = lfutil.openlfdirstate(ui, repo) + lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate) + + if filelist is not None: + lfiles = [f for f in lfiles if f in filelist] + + printed = False + if printmessage and lfiles: + ui.status(_('getting changed largefiles\n')) + printed = True + cachelfiles(ui, repo, '.') + + updated, removed = 0, 0 + for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles): + # increment the appropriate counter according to _updatelfile's + # return value + updated += i > 0 and i or 0 + removed -= i < 0 and i or 0 + if printmessage and (removed or updated) and not printed: + ui.status(_('getting changed largefiles\n')) + printed = True + + lfdirstate.write() + if printed and printmessage: + ui.status(_('%d largefiles updated, %d removed\n') % (updated, + removed)) + finally: + wlock.release() + +def _updatelfile(repo, lfdirstate, lfile): + '''updates a single largefile and copies the state of its standin from + the repository's dirstate to its state in the lfdirstate. + + returns 1 if the file was modified, -1 if the file was removed, 0 if the + file was unchanged, and None if the needed largefile was missing from the + cache.''' + ret = 0 + abslfile = repo.wjoin(lfile) + absstandin = repo.wjoin(lfutil.standin(lfile)) + if os.path.exists(absstandin): + if os.path.exists(absstandin+'.orig'): + shutil.copyfile(abslfile, abslfile+'.orig') + expecthash = lfutil.readstandin(repo, lfile) + if expecthash != '' and \ + (not os.path.exists(abslfile) or \ + expecthash != lfutil.hashfile(abslfile)): + if not lfutil.copyfromcache(repo, expecthash, lfile): + return None # don't try to set the mode or update the dirstate + ret = 1 + mode = os.stat(absstandin).st_mode + if mode != os.stat(abslfile).st_mode: + os.chmod(abslfile, mode) + ret = 1 + else: + if os.path.exists(abslfile): + os.unlink(abslfile) + ret = -1 + state = repo.dirstate[lfutil.standin(lfile)] + if state == 'n': + lfdirstate.normal(lfile) + elif state == 'r': + lfdirstate.remove(lfile) + elif state == 'a': + lfdirstate.add(lfile) + elif state == '?': + try: + # Mercurial >= 1.9 + lfdirstate.drop(lfile) + except AttributeError: + # Mercurial <= 1.8 + lfdirstate.forget(lfile) + return ret + +# -- hg commands declarations ------------------------------------------------ + + +cmdtable = { + 'lfconvert': (lfconvert, + [('s', 'size', 0, 'All files over this size (in megabytes) ' + 'will be considered largefiles. This can also be specified in ' + 'your hgrc as [largefiles].size.'), + ('','tonormal',False, + 'Convert from a largefiles repo to a normal repo')], + _('hg lfconvert SOURCE DEST [FILE ...]')), + } diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/lfutil.py @@ -0,0 +1,502 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''largefiles utility code: must not import other modules in this package.''' + +import os +import errno +import inspect +import shutil +import stat +import hashlib + +from mercurial import cmdutil, dirstate, httpconnection, match as match_, \ + url as url_, util +from mercurial.i18n import _ + +try: + from mercurial import scmutil +except ImportError: + pass + +shortname = '.hglf' +longname = 'largefiles' + + +# -- Portability wrappers ---------------------------------------------- + +if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]: + # for Mercurial >= 1.5 + def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): + return dirstate.walk(matcher, [], unknown, ignored) +else: + # for Mercurial <= 1.4 + def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): + return dirstate.walk(matcher, unknown, ignored) + +def repo_add(repo, list): + try: + # Mercurial <= 1.5 + add = repo.add + except AttributeError: + # Mercurial >= 1.6 + add = repo[None].add + return add(list) + +def repo_remove(repo, list, unlink=False): + try: + # Mercurial <= 1.5 + remove = repo.remove + except AttributeError: + # Mercurial >= 1.6 + try: + # Mercurial <= 1.8 + remove = repo[None].remove + except AttributeError: + # Mercurial >= 1.9 + def remove(list, unlink): + wlock = repo.wlock() + try: + if unlink: + for f in list: + try: + util.unlinkpath(repo.wjoin(f)) + except OSError, inst: + if inst.errno != errno.ENOENT: + raise + repo[None].forget(list) + finally: + wlock.release() + + return remove(list, unlink=unlink) + +def repo_forget(repo, list): + try: + # Mercurial <= 1.5 + forget = repo.forget + except AttributeError: + # Mercurial >= 1.6 + forget = repo[None].forget + return forget(list) + +def findoutgoing(repo, remote, force): + # First attempt is for Mercurial <= 1.5 second is for >= 1.6 + try: + return repo.findoutgoing(remote) + except AttributeError: + from mercurial import discovery + try: + # Mercurial <= 1.8 + return discovery.findoutgoing(repo, remote, force=force) + except AttributeError: + # Mercurial >= 1.9 + common, _anyinc, _heads = discovery.findcommonincoming(repo, + remote, force=force) + return repo.changelog.findmissing(common) + +# -- Private worker functions ------------------------------------------ + +if os.name == 'nt': + from mercurial import win32 + try: + linkfn = win32.oslink + except: + linkfn = win32.os_link +else: + linkfn = os.link + +def link(src, dest): + try: + linkfn(src, dest) + except OSError: + # If hardlinks fail fall back on copy + shutil.copyfile(src, dest) + os.chmod(dest, os.stat(src).st_mode) + +def systemcachepath(ui, hash): + path = ui.config(longname, 'systemcache', None) + if path: + path = os.path.join(path, hash) + else: + if os.name == 'nt': + path = os.path.join(os.getenv('LOCALAPPDATA') or \ + os.getenv('APPDATA'), longname, hash) + elif os.name == 'posix': + path = os.path.join(os.getenv('HOME'), '.' + longname, hash) + else: + raise util.Abort(_('Unknown operating system: %s\n') % os.name) + return path + +def insystemcache(ui, hash): + return os.path.exists(systemcachepath(ui, hash)) + +def findfile(repo, hash): + if incache(repo, hash): + repo.ui.note(_('Found %s in cache\n') % hash) + return cachepath(repo, hash) + if insystemcache(repo.ui, hash): + repo.ui.note(_('Found %s in system cache\n') % hash) + return systemcachepath(repo.ui, hash) + return None + +class largefiles_dirstate(dirstate.dirstate): + def __getitem__(self, key): + return super(largefiles_dirstate, self).__getitem__(unixpath(key)) + def normal(self, f): + return super(largefiles_dirstate, self).normal(unixpath(f)) + def remove(self, f): + return super(largefiles_dirstate, self).remove(unixpath(f)) + def add(self, f): + return super(largefiles_dirstate, self).add(unixpath(f)) + def drop(self, f): + return super(largefiles_dirstate, self).drop(unixpath(f)) + def forget(self, f): + return super(largefiles_dirstate, self).forget(unixpath(f)) + +def openlfdirstate(ui, repo): + ''' + Return a dirstate object that tracks big files: i.e. its root is the + repo root, but it is saved in .hg/largefiles/dirstate. + ''' + admin = repo.join(longname) + try: + # Mercurial >= 1.9 + opener = scmutil.opener(admin) + except ImportError: + # Mercurial <= 1.8 + opener = util.opener(admin) + if hasattr(repo.dirstate, '_validate'): + lfdirstate = largefiles_dirstate(opener, ui, repo.root, + repo.dirstate._validate) + else: + lfdirstate = largefiles_dirstate(opener, ui, repo.root) + + # If the largefiles dirstate does not exist, populate and create it. This + # ensures that we create it on the first meaningful largefiles operation in + # a new clone. It also gives us an easy way to forcibly rebuild largefiles + # state: + # rm .hg/largefiles/dirstate && hg status + # Or even, if things are really messed up: + # rm -rf .hg/largefiles && hg status + if not os.path.exists(os.path.join(admin, 'dirstate')): + util.makedirs(admin) + matcher = getstandinmatcher(repo) + for standin in dirstate_walk(repo.dirstate, matcher): + lfile = splitstandin(standin) + hash = readstandin(repo, lfile) + lfdirstate.normallookup(lfile) + try: + if hash == hashfile(lfile): + lfdirstate.normal(lfile) + except IOError, err: + if err.errno != errno.ENOENT: + raise + + lfdirstate.write() + + return lfdirstate + +def lfdirstate_status(lfdirstate, repo, rev): + wlock = repo.wlock() + try: + match = match_.always(repo.root, repo.getcwd()) + s = lfdirstate.status(match, [], False, False, False) + unsure, modified, added, removed, missing, unknown, ignored, clean = s + for lfile in unsure: + if repo[rev][standin(lfile)].data().strip() != \ + hashfile(repo.wjoin(lfile)): + modified.append(lfile) + else: + clean.append(lfile) + lfdirstate.normal(lfile) + lfdirstate.write() + finally: + wlock.release() + return (modified, added, removed, missing, unknown, ignored, clean) + +def listlfiles(repo, rev=None, matcher=None): + '''list largefiles in the working copy or specified changeset''' + + if matcher is None: + matcher = getstandinmatcher(repo) + + # ignore unknown files in working directory + return [splitstandin(f) for f in repo[rev].walk(matcher) \ + if rev is not None or repo.dirstate[f] != '?'] + +def incache(repo, hash): + return os.path.exists(cachepath(repo, hash)) + +def createdir(dir): + if not os.path.exists(dir): + os.makedirs(dir) + +def cachepath(repo, hash): + return repo.join(os.path.join(longname, hash)) + +def copyfromcache(repo, hash, filename): + '''copyfromcache copies the specified largefile from the repo or system + cache to the specified location in the repository. It will not throw an + exception on failure, as it is meant to be called only after ensuring that + the needed largefile exists in the cache.''' + path = findfile(repo, hash) + if path is None: + return False + util.makedirs(os.path.dirname(repo.wjoin(filename))) + shutil.copy(path, repo.wjoin(filename)) + return True + +def copytocache(repo, rev, file, uploaded=False): + hash = readstandin(repo, file) + if incache(repo, hash): + return + copytocacheabsolute(repo, repo.wjoin(file), hash) + +def copytocacheabsolute(repo, file, hash): + createdir(os.path.dirname(cachepath(repo, hash))) + if insystemcache(repo.ui, hash): + link(systemcachepath(repo.ui, hash), cachepath(repo, hash)) + else: + shutil.copyfile(file, cachepath(repo, hash)) + os.chmod(cachepath(repo, hash), os.stat(file).st_mode) + linktosystemcache(repo, hash) + +def linktosystemcache(repo, hash): + createdir(os.path.dirname(systemcachepath(repo.ui, hash))) + link(cachepath(repo, hash), systemcachepath(repo.ui, hash)) + +def getstandinmatcher(repo, pats=[], opts={}): + '''Return a match object that applies pats to the standin directory''' + standindir = repo.pathto(shortname) + if pats: + # patterns supplied: search standin directory relative to current dir + cwd = repo.getcwd() + if os.path.isabs(cwd): + # cwd is an absolute path for hg -R + # work relative to the repository root in this case + cwd = '' + pats = [os.path.join(standindir, cwd, pat) for pat in pats] + elif os.path.isdir(standindir): + # no patterns: relative to repo root + pats = [standindir] + else: + # no patterns and no standin dir: return matcher that matches nothing + match = match_.match(repo.root, None, [], exact=True) + match.matchfn = lambda f: False + return match + return getmatcher(repo, pats, opts, showbad=False) + +def getmatcher(repo, pats=[], opts={}, showbad=True): + '''Wrapper around scmutil.match() that adds showbad: if false, neuter + the match object\'s bad() method so it does not print any warnings + about missing files or directories.''' + try: + # Mercurial >= 1.9 + match = scmutil.match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + match = cmdutil.match(repo, pats, opts) + + if not showbad: + match.bad = lambda f, msg: None + return match + +def composestandinmatcher(repo, rmatcher): + '''Return a matcher that accepts standins corresponding to the files + accepted by rmatcher. Pass the list of files in the matcher as the + paths specified by the user.''' + smatcher = getstandinmatcher(repo, rmatcher.files()) + isstandin = smatcher.matchfn + def composed_matchfn(f): + return isstandin(f) and rmatcher.matchfn(splitstandin(f)) + smatcher.matchfn = composed_matchfn + + return smatcher + +def standin(filename): + '''Return the repo-relative path to the standin for the specified big + file.''' + # Notes: + # 1) Most callers want an absolute path, but _create_standin() needs + # it repo-relative so lfadd() can pass it to repo_add(). So leave + # it up to the caller to use repo.wjoin() to get an absolute path. + # 2) Join with '/' because that's what dirstate always uses, even on + # Windows. Change existing separator to '/' first in case we are + # passed filenames from an external source (like the command line). + return shortname + '/' + filename.replace(os.sep, '/') + +def isstandin(filename): + '''Return true if filename is a big file standin. filename must + be in Mercurial\'s internal form (slash-separated).''' + return filename.startswith(shortname + '/') + +def splitstandin(filename): + # Split on / because that's what dirstate always uses, even on Windows. + # Change local separator to / first just in case we are passed filenames + # from an external source (like the command line). + bits = filename.replace(os.sep, '/').split('/', 1) + if len(bits) == 2 and bits[0] == shortname: + return bits[1] + else: + return None + +def updatestandin(repo, standin): + file = repo.wjoin(splitstandin(standin)) + if os.path.exists(file): + hash = hashfile(file) + executable = getexecutable(file) + writestandin(repo, standin, hash, executable) + +def readstandin(repo, filename, node=None): + '''read hex hash from standin for filename at given node, or working + directory if no node is given''' + return repo[node][standin(filename)].data().strip() + +def writestandin(repo, standin, hash, executable): + '''write hhash to /''' + writehash(hash, repo.wjoin(standin), executable) + +def copyandhash(instream, outfile): + '''Read bytes from instream (iterable) and write them to outfile, + computing the SHA-1 hash of the data along the way. Close outfile + when done and return the binary hash.''' + hasher = util.sha1('') + for data in instream: + hasher.update(data) + outfile.write(data) + + # Blecch: closing a file that somebody else opened is rude and + # wrong. But it's so darn convenient and practical! After all, + # outfile was opened just to copy and hash. + outfile.close() + + return hasher.digest() + +def hashrepofile(repo, file): + return hashfile(repo.wjoin(file)) + +def hashfile(file): + if not os.path.exists(file): + return '' + hasher = util.sha1('') + fd = open(file, 'rb') + for data in blockstream(fd): + hasher.update(data) + fd.close() + return hasher.hexdigest() + +class limitreader(object): + def __init__(self, f, limit): + self.f = f + self.limit = limit + + def read(self, length): + if self.limit == 0: + return '' + length = length > self.limit and self.limit or length + self.limit -= length + return self.f.read(length) + + def close(self): + pass + +def blockstream(infile, blocksize=128 * 1024): + """Generator that yields blocks of data from infile and closes infile.""" + while True: + data = infile.read(blocksize) + if not data: + break + yield data + # Same blecch as above. + infile.close() + +def readhash(filename): + rfile = open(filename, 'rb') + hash = rfile.read(40) + rfile.close() + if len(hash) < 40: + raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)') + % (filename, len(hash))) + return hash + +def writehash(hash, filename, executable): + util.makedirs(os.path.dirname(filename)) + if os.path.exists(filename): + os.unlink(filename) + wfile = open(filename, 'wb') + + try: + wfile.write(hash) + wfile.write('\n') + finally: + wfile.close() + if os.path.exists(filename): + os.chmod(filename, getmode(executable)) + +def getexecutable(filename): + mode = os.stat(filename).st_mode + return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \ + stat.S_IXOTH) + +def getmode(executable): + if executable: + return 0755 + else: + return 0644 + +def urljoin(first, second, *arg): + def join(left, right): + if not left.endswith('/'): + left += '/' + if right.startswith('/'): + right = right[1:] + return left + right + + url = join(first, second) + for a in arg: + url = join(url, a) + return url + +def hexsha1(data): + """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like + object data""" + h = hashlib.sha1() + for chunk in util.filechunkiter(data): + h.update(chunk) + return h.hexdigest() + +def httpsendfile(ui, filename): + try: + # Mercurial >= 1.9 + return httpconnection.httpsendfile(ui, filename, 'rb') + except ImportError: + if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]: + # Mercurial == 1.8 + return url_.httpsendfile(ui, filename, 'rb') + else: + # Mercurial <= 1.7 + return url_.httpsendfile(filename, 'rb') + +# Convert a path to a unix style path. This is used to give a +# canonical path to the lfdirstate. +def unixpath(path): + return os.path.normpath(path).replace(os.sep, '/') + +def islfilesrepo(repo): + return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in + repo.store.datafiles()) + +def any_(gen): + for x in gen: + if x: + return True + return False + +class storeprotonotcapable(BaseException): + def __init__(self, storetypes): + self.storetypes = storetypes diff --git a/hgext/largefiles/localstore.py b/hgext/largefiles/localstore.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/localstore.py @@ -0,0 +1,71 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''Store class for local filesystem.''' + +import os + +from mercurial import util +from mercurial.i18n import _ + +import lfutil +import basestore + +class localstore(basestore.basestore): + '''Because there is a system wide cache, the local store always uses that + cache. Since the cache is updated elsewhere, we can just read from it here + as if it were the store.''' + + def __init__(self, ui, repo, remote): + url = os.path.join(remote.path, '.hg', lfutil.longname) + super(localstore, self).__init__(ui, repo, util.expandpath(url)) + + def put(self, source, filename, hash): + '''Any file that is put must already be in the system wide cache so do + nothing.''' + return + + def exists(self, hash): + return lfutil.insystemcache(self.repo.ui, hash) + + def _getfile(self, tmpfile, filename, hash): + if lfutil.insystemcache(self.ui, hash): + return lfutil.systemcachepath(self.ui, hash) + raise basestore.StoreError(filename, hash, '', + _("Can't get file locally")) + + def _verifyfile(self, cctx, cset, contents, standin, verified): + filename = lfutil.splitstandin(standin) + if not filename: + return False + fctx = cctx[standin] + key = (filename, fctx.filenode()) + if key in verified: + return False + + expecthash = fctx.data()[0:40] + verified.add(key) + if not lfutil.insystemcache(self.ui, expecthash): + self.ui.warn( + _('changeset %s: %s missing\n' + ' (looked for hash %s)\n') + % (cset, filename, expecthash)) + return True # failed + + if contents: + storepath = lfutil.systemcachepath(self.ui, expecthash) + actualhash = lfutil.hashfile(storepath) + if actualhash != expecthash: + self.ui.warn( + _('changeset %s: %s: contents differ\n' + ' (%s:\n' + ' expected hash %s,\n' + ' but got %s)\n') + % (cset, filename, storepath, expecthash, actualhash)) + return True # failed + return False diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/overrides.py @@ -0,0 +1,902 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''Overridden Mercurial commands and functions for the largefiles extension''' + +import os +import copy + +from mercurial import hg, commands, util, cmdutil, match as match_, node, \ + archival, error, merge +from mercurial.i18n import _ +from mercurial.node import hex +from hgext import rebase + +try: + from mercurial import scmutil +except ImportError: + pass + +import lfutil +import lfcommands + +def installnormalfilesmatchfn(manifest): + '''overrides scmutil.match so that the matcher it returns will ignore all + largefiles''' + oldmatch = None # for the closure + def override_match(repo, pats=[], opts={}, globbed=False, + default='relpath'): + match = oldmatch(repo, pats, opts, globbed, default) + m = copy.copy(match) + notlfile = lambda f: not (lfutil.isstandin(f) or lfutil.standin(f) in + manifest) + m._files = filter(notlfile, m._files) + m._fmap = set(m._files) + orig_matchfn = m.matchfn + m.matchfn = lambda f: notlfile(f) and orig_matchfn(f) or None + return m + oldmatch = installmatchfn(override_match) + +def installmatchfn(f): + try: + # Mercurial >= 1.9 + oldmatch = scmutil.match + except ImportError: + # Mercurial <= 1.8 + oldmatch = cmdutil.match + setattr(f, 'oldmatch', oldmatch) + try: + # Mercurial >= 1.9 + scmutil.match = f + except ImportError: + # Mercurial <= 1.8 + cmdutil.match = f + return oldmatch + +def restorematchfn(): + '''restores scmutil.match to what it was before installnormalfilesmatchfn + was called. no-op if scmutil.match is its original function. + + Note that n calls to installnormalfilesmatchfn will require n calls to + restore matchfn to reverse''' + try: + # Mercurial >= 1.9 + scmutil.match = getattr(scmutil.match, 'oldmatch', scmutil.match) + except ImportError: + # Mercurial <= 1.8 + cmdutil.match = getattr(cmdutil.match, 'oldmatch', cmdutil.match) + +# -- Wrappers: modify existing commands -------------------------------- + +# Add works by going through the files that the user wanted to add +# and checking if they should be added as lfiles. Then making a new +# matcher which matches only the normal files and running the original +# version of add. +def override_add(orig, ui, repo, *pats, **opts): + large = opts.pop('large', None) + + lfsize = opts.pop('lfsize', None) + if not lfsize and lfutil.islfilesrepo(repo): + lfsize = ui.config(lfutil.longname, 'size', default='10') + if lfsize: + try: + lfsize = int(lfsize) + except ValueError: + raise util.Abort(_('largefiles: size must be an integer, was %s\n') % lfsize) + + lfmatcher = None + if os.path.exists(repo.wjoin(lfutil.shortname)): + lfpats = ui.config(lfutil.longname, 'patterns', default=()) + if lfpats: + lfpats = lfpats.split(' ') + lfmatcher = match_.match(repo.root, '', list(lfpats)) + + lfnames = [] + try: + # Mercurial >= 1.9 + m = scmutil.match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + m = cmdutil.match(repo, pats, opts) + m.bad = lambda x, y: None + wctx = repo[None] + for f in repo.walk(m): + exact = m.exact(f) + lfile = lfutil.standin(f) in wctx + nfile = f in wctx + exists = lfile or nfile + + # Don't warn the user when they attempt to add a normal tracked file. + # The normal add code will do that for us. + if exact and exists: + if lfile: + ui.warn(_('%s already a largefile\n') % f) + continue + + if exact or not exists: + if large or (lfsize and os.path.getsize(repo.wjoin(f)) >= \ + lfsize * 1024 * 1024) or (lfmatcher and lfmatcher(f)): + lfnames.append(f) + if ui.verbose or not exact: + ui.status(_('adding %s as a largefile\n') % m.rel(f)) + + bad = [] + standins = [] + + # Need to lock otherwise there could be a race condition inbetween when + # standins are created and added to the repo + wlock = repo.wlock() + try: + if not opts.get('dry_run'): + lfdirstate = lfutil.openlfdirstate(ui, repo) + for f in lfnames: + standinname = lfutil.standin(f) + lfutil.writestandin(repo, standinname, hash='', + executable=lfutil.getexecutable(repo.wjoin(f))) + standins.append(standinname) + if lfdirstate[f] == 'r': + lfdirstate.normallookup(f) + else: + lfdirstate.add(f) + lfdirstate.write() + bad += [lfutil.splitstandin(f) for f in lfutil.repo_add(repo, + standins) if f in m.files()] + finally: + wlock.release() + + installnormalfilesmatchfn(repo[None].manifest()) + result = orig(ui, repo, *pats, **opts) + restorematchfn() + + return (result == 1 or bad) and 1 or 0 + +def override_remove(orig, ui, repo, *pats, **opts): + manifest = repo[None].manifest() + installnormalfilesmatchfn(manifest) + orig(ui, repo, *pats, **opts) + restorematchfn() + + after, force = opts.get('after'), opts.get('force') + if not pats and not after: + raise util.Abort(_('no files specified')) + try: + # Mercurial >= 1.9 + m = scmutil.match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + m = cmdutil.match(repo, pats, opts) + try: + repo.lfstatus = True + s = repo.status(match=m, clean=True) + finally: + repo.lfstatus = False + modified, added, deleted, clean = [[f for f in list if lfutil.standin(f) \ + in manifest] for list in [s[0], s[1], s[3], s[6]]] + + def warn(files, reason): + for f in files: + ui.warn(_('not removing %s: file %s (use -f to force removal)\n') + % (m.rel(f), reason)) + + if force: + remove, forget = modified + deleted + clean, added + elif after: + remove, forget = deleted, [] + warn(modified + added + clean, _('still exists')) + else: + remove, forget = deleted + clean, [] + warn(modified, _('is modified')) + warn(added, _('has been marked for add')) + + for f in sorted(remove + forget): + if ui.verbose or not m.exact(f): + ui.status(_('removing %s\n') % m.rel(f)) + + # Need to lock because standin files are deleted then removed from the + # repository and we could race inbetween. + wlock = repo.wlock() + try: + lfdirstate = lfutil.openlfdirstate(ui, repo) + for f in remove: + if not after: + os.unlink(repo.wjoin(f)) + currentdir = os.path.split(f)[0] + while currentdir and not os.listdir(repo.wjoin(currentdir)): + os.rmdir(repo.wjoin(currentdir)) + currentdir = os.path.split(currentdir)[0] + lfdirstate.remove(f) + lfdirstate.write() + + forget = [lfutil.standin(f) for f in forget] + remove = [lfutil.standin(f) for f in remove] + lfutil.repo_forget(repo, forget) + lfutil.repo_remove(repo, remove, unlink=True) + finally: + wlock.release() + +def override_status(orig, ui, repo, *pats, **opts): + try: + repo.lfstatus = True + return orig(ui, repo, *pats, **opts) + finally: + repo.lfstatus = False + +def override_log(orig, ui, repo, *pats, **opts): + try: + repo.lfstatus = True + orig(ui, repo, *pats, **opts) + finally: + repo.lfstatus = False + +def override_verify(orig, ui, repo, *pats, **opts): + large = opts.pop('large', False) + all = opts.pop('lfa', False) + contents = opts.pop('lfc', False) + + result = orig(ui, repo, *pats, **opts) + if large: + result = result or lfcommands.verifylfiles(ui, repo, all, contents) + return result + +# Override needs to refresh standins so that update's normal merge +# will go through properly. Then the other update hook (overriding repo.update) +# will get the new files. Filemerge is also overriden so that the merge +# will merge standins correctly. +def override_update(orig, ui, repo, *pats, **opts): + lfdirstate = lfutil.openlfdirstate(ui, repo) + s = lfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False, + False, False) + (unsure, modified, added, removed, missing, unknown, ignored, clean) = s + + # Need to lock between the standins getting updated and their lfiles + # getting updated + wlock = repo.wlock() + try: + if opts['check']: + mod = len(modified) > 0 + for lfile in unsure: + standin = lfutil.standin(lfile) + if repo['.'][standin].data().strip() != \ + lfutil.hashfile(repo.wjoin(lfile)): + mod = True + else: + lfdirstate.normal(lfile) + lfdirstate.write() + if mod: + raise util.Abort(_('uncommitted local changes')) + # XXX handle removed differently + if not opts['clean']: + for lfile in unsure + modified + added: + lfutil.updatestandin(repo, lfutil.standin(lfile)) + finally: + wlock.release() + return orig(ui, repo, *pats, **opts) + +# Override filemerge to prompt the user about how they wish to merge lfiles. +# This will handle identical edits, and copy/rename + edit without prompting +# the user. +def override_filemerge(origfn, repo, mynode, orig, fcd, fco, fca): + # Use better variable names here. Because this is a wrapper we cannot + # change the variable names in the function declaration. + fcdest, fcother, fcancestor = fcd, fco, fca + if not lfutil.isstandin(orig): + return origfn(repo, mynode, orig, fcdest, fcother, fcancestor) + else: + if not fcother.cmp(fcdest): # files identical? + return None + + # backwards, use working dir parent as ancestor + if fcancestor == fcother: + fcancestor = fcdest.parents()[0] + + if orig != fcother.path(): + repo.ui.status(_('merging %s and %s to %s\n') + % (lfutil.splitstandin(orig), + lfutil.splitstandin(fcother.path()), + lfutil.splitstandin(fcdest.path()))) + else: + repo.ui.status(_('merging %s\n') + % lfutil.splitstandin(fcdest.path())) + + if fcancestor.path() != fcother.path() and fcother.data() == \ + fcancestor.data(): + return 0 + if fcancestor.path() != fcdest.path() and fcdest.data() == \ + fcancestor.data(): + repo.wwrite(fcdest.path(), fcother.data(), fcother.flags()) + return 0 + + if repo.ui.promptchoice(_('largefile %s has a merge conflict\n' + 'keep (l)ocal or take (o)ther?') % + lfutil.splitstandin(orig), + (_('&Local'), _('&Other')), 0) == 0: + return 0 + else: + repo.wwrite(fcdest.path(), fcother.data(), fcother.flags()) + return 0 + +# Copy first changes the matchers to match standins instead of lfiles. +# Then it overrides util.copyfile in that function it checks if the destination +# lfile already exists. It also keeps a list of copied files so that the lfiles +# can be copied and the dirstate updated. +def override_copy(orig, ui, repo, pats, opts, rename=False): + # doesn't remove lfile on rename + if len(pats) < 2: + # this isn't legal, let the original function deal with it + return orig(ui, repo, pats, opts, rename) + + def makestandin(relpath): + try: + # Mercurial >= 1.9 + path = scmutil.canonpath(repo.root, repo.getcwd(), relpath) + except ImportError: + # Mercurial <= 1.8 + path = util.canonpath(repo.root, repo.getcwd(), relpath) + return os.path.join(os.path.relpath('.', repo.getcwd()), + lfutil.standin(path)) + + try: + # Mercurial >= 1.9 + fullpats = scmutil.expandpats(pats) + except ImportError: + # Mercurial <= 1.8 + fullpats = cmdutil.expandpats(pats) + dest = fullpats[-1] + + if os.path.isdir(dest): + if not os.path.isdir(makestandin(dest)): + os.makedirs(makestandin(dest)) + # This could copy both lfiles and normal files in one command, but we don't + # want to do that first replace their matcher to only match normal files + # and run it then replace it to just match lfiles and run it again + nonormalfiles = False + nolfiles = False + try: + installnormalfilesmatchfn(repo[None].manifest()) + result = orig(ui, repo, pats, opts, rename) + except util.Abort, e: + if str(e) != 'no files to copy': + raise e + else: + nonormalfiles = True + result = 0 + finally: + restorematchfn() + + # The first rename can cause our current working directory to be removed. + # In that case there is nothing left to copy/rename so just quit. + try: + repo.getcwd() + except OSError: + return result + + try: + # When we call orig below it creates the standins but we don't add them + # to the dir state until later so lock during that time. + wlock = repo.wlock() + + manifest = repo[None].manifest() + oldmatch = None # for the closure + def override_match(repo, pats=[], opts={}, globbed=False, + default='relpath'): + newpats = [] + # The patterns were previously mangled to add the standin + # directory; we need to remove that now + for pat in pats: + if match_.patkind(pat) is None and lfutil.shortname in pat: + newpats.append(pat.replace(lfutil.shortname, '')) + else: + newpats.append(pat) + match = oldmatch(repo, newpats, opts, globbed, default) + m = copy.copy(match) + lfile = lambda f: lfutil.standin(f) in manifest + m._files = [lfutil.standin(f) for f in m._files if lfile(f)] + m._fmap = set(m._files) + orig_matchfn = m.matchfn + m.matchfn = lambda f: lfutil.isstandin(f) and \ + lfile(lfutil.splitstandin(f)) and \ + orig_matchfn(lfutil.splitstandin(f)) or None + return m + oldmatch = installmatchfn(override_match) + listpats = [] + for pat in pats: + if match_.patkind(pat) is not None: + listpats.append(pat) + else: + listpats.append(makestandin(pat)) + + try: + origcopyfile = util.copyfile + copiedfiles = [] + def override_copyfile(src, dest): + if lfutil.shortname in src and lfutil.shortname in dest: + destlfile = dest.replace(lfutil.shortname, '') + if not opts['force'] and os.path.exists(destlfile): + raise IOError('', + _('destination largefile already exists')) + copiedfiles.append((src, dest)) + origcopyfile(src, dest) + + util.copyfile = override_copyfile + result += orig(ui, repo, listpats, opts, rename) + finally: + util.copyfile = origcopyfile + + lfdirstate = lfutil.openlfdirstate(ui, repo) + for (src, dest) in copiedfiles: + if lfutil.shortname in src and lfutil.shortname in dest: + srclfile = src.replace(lfutil.shortname, '') + destlfile = dest.replace(lfutil.shortname, '') + destlfiledir = os.path.dirname(destlfile) or '.' + if not os.path.isdir(destlfiledir): + os.makedirs(destlfiledir) + if rename: + os.rename(srclfile, destlfile) + lfdirstate.remove(os.path.relpath(srclfile, + repo.root)) + else: + util.copyfile(srclfile, destlfile) + lfdirstate.add(os.path.relpath(destlfile, + repo.root)) + lfdirstate.write() + except util.Abort, e: + if str(e) != 'no files to copy': + raise e + else: + nolfiles = True + finally: + restorematchfn() + wlock.release() + + if nolfiles and nonormalfiles: + raise util.Abort(_('no files to copy')) + + return result + +# When the user calls revert, we have to be careful to not revert any changes +# to other lfiles accidentally. This means we have to keep track of the lfiles +# that are being reverted so we only pull down the necessary lfiles. +# +# Standins are only updated (to match the hash of lfiles) before commits. +# Update the standins then run the original revert (changing the matcher to hit +# standins instead of lfiles). Based on the resulting standins update the +# lfiles. Then return the standins to their proper state +def override_revert(orig, ui, repo, *pats, **opts): + # Because we put the standins in a bad state (by updating them) and then + # return them to a correct state we need to lock to prevent others from + # changing them in their incorrect state. + wlock = repo.wlock() + try: + lfdirstate = lfutil.openlfdirstate(ui, repo) + (modified, added, removed, missing, unknown, ignored, clean) = \ + lfutil.lfdirstate_status(lfdirstate, repo, repo['.'].rev()) + for lfile in modified: + lfutil.updatestandin(repo, lfutil.standin(lfile)) + + try: + ctx = repo[opts.get('rev')] + oldmatch = None # for the closure + def override_match(ctxorrepo, pats=[], opts={}, globbed=False, + default='relpath'): + if hasattr(ctxorrepo, 'match'): + ctx0 = ctxorrepo + else: + ctx0 = ctxorrepo[None] + match = oldmatch(ctxorrepo, pats, opts, globbed, default) + m = copy.copy(match) + def tostandin(f): + if lfutil.standin(f) in ctx0 or lfutil.standin(f) in ctx: + return lfutil.standin(f) + elif lfutil.standin(f) in repo[None]: + return None + return f + m._files = [tostandin(f) for f in m._files] + m._files = [f for f in m._files if f is not None] + m._fmap = set(m._files) + orig_matchfn = m.matchfn + def matchfn(f): + if lfutil.isstandin(f): + # We need to keep track of what lfiles are being + # matched so we know which ones to update later + # (otherwise we revert changes to other lfiles + # accidentally). This is repo specific, so duckpunch + # the repo object to keep the list of lfiles for us + # later. + if orig_matchfn(lfutil.splitstandin(f)) and \ + (f in repo[None] or f in ctx): + lfileslist = getattr(repo, '_lfilestoupdate', []) + lfileslist.append(lfutil.splitstandin(f)) + repo._lfilestoupdate = lfileslist + return True + else: + return False + return orig_matchfn(f) + m.matchfn = matchfn + return m + oldmatch = installmatchfn(override_match) + try: + # Mercurial >= 1.9 + scmutil.match + matches = override_match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + matches = override_match(repo, pats, opts) + orig(ui, repo, *pats, **opts) + finally: + restorematchfn() + lfileslist = getattr(repo, '_lfilestoupdate', []) + lfcommands.updatelfiles(ui, repo, filelist=lfileslist, printmessage=False) + # Empty out the lfiles list so we start fresh next time + repo._lfilestoupdate = [] + for lfile in modified: + if lfile in lfileslist: + if os.path.exists(repo.wjoin(lfutil.standin(lfile))) and lfile\ + in repo['.']: + lfutil.writestandin(repo, lfutil.standin(lfile), + repo['.'][lfile].data().strip(), + 'x' in repo['.'][lfile].flags()) + lfdirstate = lfutil.openlfdirstate(ui, repo) + for lfile in added: + standin = lfutil.standin(lfile) + if standin not in ctx and (standin in matches or opts.get('all')): + if lfile in lfdirstate: + try: + # Mercurial >= 1.9 + lfdirstate.drop(lfile) + except AttributeError: + # Mercurial <= 1.8 + lfdirstate.forget(lfile) + util.unlinkpath(repo.wjoin(standin)) + lfdirstate.write() + finally: + wlock.release() + +def hg_update(orig, repo, node): + result = orig(repo, node) + # XXX check if it worked first + lfcommands.updatelfiles(repo.ui, repo) + return result + +def hg_clean(orig, repo, node, show_stats=True): + result = orig(repo, node, show_stats) + lfcommands.updatelfiles(repo.ui, repo) + return result + +def hg_merge(orig, repo, node, force=None, remind=True): + result = orig(repo, node, force, remind) + lfcommands.updatelfiles(repo.ui, repo) + return result + +# When we rebase a repository with remotely changed lfiles, we need +# to take some extra care so that the lfiles are correctly updated +# in the working copy +def override_pull(orig, ui, repo, source=None, **opts): + if opts.get('rebase', False): + repo._isrebasing = True + try: + if opts.get('update'): + del opts['update'] + ui.debug('--update and --rebase are not compatible, ignoring ' + 'the update flag\n') + del opts['rebase'] + try: + # Mercurial >= 1.9 + cmdutil.bailifchanged(repo) + except AttributeError: + # Mercurial <= 1.8 + cmdutil.bail_if_changed(repo) + revsprepull = len(repo) + origpostincoming = commands.postincoming + def _dummy(*args, **kwargs): + pass + commands.postincoming = _dummy + repo.lfpullsource = source + if not source: + source = 'default' + try: + result = commands.pull(ui, repo, source, **opts) + finally: + commands.postincoming = origpostincoming + revspostpull = len(repo) + if revspostpull > revsprepull: + result = result or rebase.rebase(ui, repo) + finally: + repo._isrebasing = False + else: + repo.lfpullsource = source + if not source: + source = 'default' + result = orig(ui, repo, source, **opts) + return result + +def override_rebase(orig, ui, repo, **opts): + repo._isrebasing = True + try: + orig(ui, repo, **opts) + finally: + repo._isrebasing = False + +def override_archive(orig, repo, dest, node, kind, decode=True, matchfn=None, + prefix=None, mtime=None, subrepos=None): + # No need to lock because we are only reading history and lfile caches + # neither of which are modified + + lfcommands.cachelfiles(repo.ui, repo, node) + + if kind not in archival.archivers: + raise util.Abort(_("unknown archive type '%s'") % kind) + + ctx = repo[node] + + # In Mercurial <= 1.5 the prefix is passed to the archiver so try that + # if that doesn't work we are probably in Mercurial >= 1.6 where the + # prefix is not handled by the archiver + try: + archiver = archival.archivers[kind](dest, prefix, mtime or \ + ctx.date()[0]) + + def write(name, mode, islink, getdata): + if matchfn and not matchfn(name): + return + data = getdata() + if decode: + data = repo.wwritedata(name, data) + archiver.addfile(name, mode, islink, data) + except TypeError: + if kind == 'files': + if prefix: + raise util.Abort( + _('cannot give prefix when archiving to files')) + else: + prefix = archival.tidyprefix(dest, kind, prefix) + + def write(name, mode, islink, getdata): + if matchfn and not matchfn(name): + return + data = getdata() + if decode: + data = repo.wwritedata(name, data) + archiver.addfile(prefix + name, mode, islink, data) + + archiver = archival.archivers[kind](dest, mtime or ctx.date()[0]) + + if repo.ui.configbool("ui", "archivemeta", True): + def metadata(): + base = 'repo: %s\nnode: %s\nbranch: %s\n' % ( + hex(repo.changelog.node(0)), hex(node), ctx.branch()) + + tags = ''.join('tag: %s\n' % t for t in ctx.tags() + if repo.tagtype(t) == 'global') + if not tags: + repo.ui.pushbuffer() + opts = {'template': '{latesttag}\n{latesttagdistance}', + 'style': '', 'patch': None, 'git': None} + cmdutil.show_changeset(repo.ui, repo, opts).show(ctx) + ltags, dist = repo.ui.popbuffer().split('\n') + tags = ''.join('latesttag: %s\n' % t for t in ltags.split(':')) + tags += 'latesttagdistance: %s\n' % dist + + return base + tags + + write('.hg_archival.txt', 0644, False, metadata) + + for f in ctx: + ff = ctx.flags(f) + getdata = ctx[f].data + if lfutil.isstandin(f): + path = lfutil.findfile(repo, getdata().strip()) + f = lfutil.splitstandin(f) + + def getdatafn(): + try: + fd = open(path, 'rb') + return fd.read() + finally: + fd.close() + + getdata = getdatafn + write(f, 'x' in ff and 0755 or 0644, 'l' in ff, getdata) + + if subrepos: + for subpath in ctx.substate: + sub = ctx.sub(subpath) + try: + sub.archive(repo.ui, archiver, prefix) + except TypeError: + sub.archive(archiver, prefix) + + archiver.done() + +# If a lfile is modified the change is not reflected in its standin until a +# commit. cmdutil.bailifchanged raises an exception if the repo has +# uncommitted changes. Wrap it to also check if lfiles were changed. This is +# used by bisect and backout. +def override_bailifchanged(orig, repo): + orig(repo) + repo.lfstatus = True + modified, added, removed, deleted = repo.status()[:4] + repo.lfstatus = False + if modified or added or removed or deleted: + raise util.Abort(_('outstanding uncommitted changes')) + +# Fetch doesn't use cmdutil.bail_if_changed so override it to add the check +def override_fetch(orig, ui, repo, *pats, **opts): + repo.lfstatus = True + modified, added, removed, deleted = repo.status()[:4] + repo.lfstatus = False + if modified or added or removed or deleted: + raise util.Abort(_('outstanding uncommitted changes')) + return orig(ui, repo, *pats, **opts) + +def override_forget(orig, ui, repo, *pats, **opts): + installnormalfilesmatchfn(repo[None].manifest()) + orig(ui, repo, *pats, **opts) + restorematchfn() + try: + # Mercurial >= 1.9 + m = scmutil.match(repo[None], pats, opts) + except ImportError: + # Mercurial <= 1.8 + m = cmdutil.match(repo, pats, opts) + + try: + repo.lfstatus = True + s = repo.status(match=m, clean=True) + finally: + repo.lfstatus = False + forget = sorted(s[0] + s[1] + s[3] + s[6]) + forget = [f for f in forget if lfutil.standin(f) in repo[None].manifest()] + + for f in forget: + if lfutil.standin(f) not in repo.dirstate and not \ + os.path.isdir(m.rel(lfutil.standin(f))): + ui.warn(_('not removing %s: file is already untracked\n') + % m.rel(f)) + + for f in forget: + if ui.verbose or not m.exact(f): + ui.status(_('removing %s\n') % m.rel(f)) + + # Need to lock because standin files are deleted then removed from the + # repository and we could race inbetween. + wlock = repo.wlock() + try: + lfdirstate = lfutil.openlfdirstate(ui, repo) + for f in forget: + if lfdirstate[f] == 'a': + lfdirstate.drop(f) + else: + lfdirstate.remove(f) + lfdirstate.write() + lfutil.repo_remove(repo, [lfutil.standin(f) for f in forget], + unlink=True) + finally: + wlock.release() + +def getoutgoinglfiles(ui, repo, dest=None, **opts): + dest = ui.expandpath(dest or 'default-push', dest or 'default') + dest, branches = hg.parseurl(dest, opts.get('branch')) + revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get('rev')) + if revs: + revs = [repo.lookup(rev) for rev in revs] + + # Mercurial <= 1.5 had remoteui in cmdutil, then it moved to hg + try: + remoteui = cmdutil.remoteui + except AttributeError: + remoteui = hg.remoteui + + try: + remote = hg.repository(remoteui(repo, opts), dest) + except error.RepoError: + return None + o = lfutil.findoutgoing(repo, remote, False) + if not o: + return None + o = repo.changelog.nodesbetween(o, revs)[0] + if opts.get('newest_first'): + o.reverse() + + toupload = set() + for n in o: + parents = [p for p in repo.changelog.parents(n) if p != node.nullid] + ctx = repo[n] + files = set(ctx.files()) + if len(parents) == 2: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + for f in mp1: + if f not in mc: + files.add(f) + for f in mp2: + if f not in mc: + files.add(f) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + toupload = toupload.union(set([f for f in files if lfutil.isstandin(f)\ + and f in ctx])) + return toupload + +def override_outgoing(orig, ui, repo, dest=None, **opts): + orig(ui, repo, dest, **opts) + + if opts.pop('large', None): + toupload = getoutgoinglfiles(ui, repo, dest, **opts) + if toupload is None: + ui.status(_('largefiles: No remote repo\n')) + else: + ui.status(_('largefiles to upload:\n')) + for file in toupload: + ui.status(lfutil.splitstandin(file) + '\n') + ui.status('\n') + +def override_summary(orig, ui, repo, *pats, **opts): + orig(ui, repo, *pats, **opts) + + if opts.pop('large', None): + toupload = getoutgoinglfiles(ui, repo, None, **opts) + if toupload is None: + ui.status(_('largefiles: No remote repo\n')) + else: + ui.status(_('largefiles: %d to upload\n') % len(toupload)) + +def override_addremove(orig, ui, repo, *pats, **opts): + # Check if the parent or child has lfiles if they do don't allow it. If + # there is a symlink in the manifest then getting the manifest throws an + # exception catch it and let addremove deal with it. This happens in + # Mercurial's test test-addremove-symlink + try: + manifesttip = set(repo['tip'].manifest()) + except util.Abort: + manifesttip = set() + try: + manifestworking = set(repo[None].manifest()) + except util.Abort: + manifestworking = set() + + # Manifests are only iterable so turn them into sets then union + for file in manifesttip.union(manifestworking): + if file.startswith(lfutil.shortname): + raise util.Abort( + _('addremove cannot be run on a repo with largefiles')) + + return orig(ui, repo, *pats, **opts) + +# Calling purge with --all will cause the lfiles to be deleted. +# Override repo.status to prevent this from happening. +def override_purge(orig, ui, repo, *dirs, **opts): + oldstatus = repo.status + def override_status(node1='.', node2=None, match=None, ignored=False, + clean=False, unknown=False, listsubrepos=False): + r = oldstatus(node1, node2, match, ignored, clean, unknown, + listsubrepos) + lfdirstate = lfutil.openlfdirstate(ui, repo) + modified, added, removed, deleted, unknown, ignored, clean = r + unknown = [f for f in unknown if lfdirstate[f] == '?'] + ignored = [f for f in ignored if lfdirstate[f] == '?'] + return modified, added, removed, deleted, unknown, ignored, clean + repo.status = override_status + orig(ui, repo, *dirs, **opts) + repo.status = oldstatus + +def override_rollback(orig, ui, repo, **opts): + result = orig(ui, repo, **opts) + merge.update(repo, node=None, branchmerge=False, force=True, + partial=lfutil.isstandin) + lfdirstate = lfutil.openlfdirstate(ui, repo) + lfiles = lfutil.listlfiles(repo) + oldlfiles = lfutil.listlfiles(repo, repo[None].parents()[0].rev()) + for file in lfiles: + if file in oldlfiles: + lfdirstate.normallookup(file) + else: + lfdirstate.add(file) + lfdirstate.write() + return result diff --git a/hgext/largefiles/proto.py b/hgext/largefiles/proto.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/proto.py @@ -0,0 +1,161 @@ +# Copyright 2011 Fog Creek Software +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +import os +import tempfile +import urllib2 + +from mercurial import error, httprepo, util, wireproto +from mercurial.i18n import _ + +import lfutil + +LARGEFILES_REQUIRED_MSG = '\nThis repository uses the largefiles extension.' \ + '\n\nPlease enable it in your Mercurial config ' \ + 'file.\n' + +def putlfile(repo, proto, sha): + """putlfile puts a largefile into a repository's local cache and into the + system cache.""" + f = None + proto.redirect() + try: + try: + f = tempfile.NamedTemporaryFile(mode='wb+', prefix='hg-putlfile-') + proto.getfile(f) + f.seek(0) + if sha != lfutil.hexsha1(f): + return wireproto.pushres(1) + lfutil.copytocacheabsolute(repo, f.name, sha) + except IOError: + repo.ui.warn( + _('error: could not put received data into largefile store')) + return wireproto.pushres(1) + finally: + if f: + f.close() + + return wireproto.pushres(0) + +def getlfile(repo, proto, sha): + """getlfile retrieves a largefile from the repository-local cache or system + cache.""" + filename = lfutil.findfile(repo, sha) + if not filename: + raise util.Abort(_('requested largefile %s not present in cache') % sha) + f = open(filename, 'rb') + length = os.fstat(f.fileno())[6] + # since we can't set an HTTP content-length header here, and mercurial core + # provides no way to give the length of a streamres (and reading the entire + # file into RAM would be ill-advised), we just send the length on the first + # line of the response, like the ssh proto does for string responses. + def generator(): + yield '%d\n' % length + for chunk in f: + yield chunk + return wireproto.streamres(generator()) + +def statlfile(repo, proto, sha): + """statlfile sends '2\n' if the largefile is missing, '1\n' if it has a + mismatched checksum, or '0\n' if it is in good condition""" + filename = lfutil.findfile(repo, sha) + if not filename: + return '2\n' + fd = None + try: + fd = open(filename, 'rb') + return lfutil.hexsha1(fd) == sha and '0\n' or '1\n' + finally: + if fd: + fd.close() + +def wirereposetup(ui, repo): + class lfileswirerepository(repo.__class__): + def putlfile(self, sha, fd): + # unfortunately, httprepository._callpush tries to convert its + # input file-like into a bundle before sending it, so we can't use + # it ... + if issubclass(self.__class__, httprepo.httprepository): + try: + return int(self._call('putlfile', data=fd, sha=sha, + headers={'content-type':'application/mercurial-0.1'})) + except (ValueError, urllib2.HTTPError): + return 1 + # ... but we can't use sshrepository._call because the data= + # argument won't get sent, and _callpush does exactly what we want + # in this case: send the data straight through + else: + try: + ret, output = self._callpush("putlfile", fd, sha=sha) + if ret == "": + raise error.ResponseError(_('putlfile failed:'), + output) + return int(ret) + except IOError: + return 1 + except ValueError: + raise error.ResponseError( + _('putlfile failed (unexpected response):'), ret) + + def getlfile(self, sha): + stream = self._callstream("getlfile", sha=sha) + length = stream.readline() + try: + length = int(length) + except ValueError: + self._abort(error.ResponseError(_("unexpected response:"), length)) + return (length, stream) + + def statlfile(self, sha): + try: + return int(self._call("statlfile", sha=sha)) + except (ValueError, urllib2.HTTPError): + # if the server returns anything but an integer followed by a + # newline, newline, it's not speaking our language; if we get + # an HTTP error, we can't be sure the largefile is present; + # either way, consider it missing + return 2 + + repo.__class__ = lfileswirerepository + +# advertise the largefiles=serve capability +def capabilities(repo, proto): + return capabilities_orig(repo, proto) + ' largefiles=serve' + +# duplicate what Mercurial's new out-of-band errors mechanism does, because +# clients old and new alike both handle it well +def webproto_refuseclient(self, message): + self.req.header([('Content-Type', 'application/hg-error')]) + return message + +def sshproto_refuseclient(self, message): + self.ui.write_err('%s\n-\n' % message) + self.fout.write('\n') + self.fout.flush() + + return '' + +def heads(repo, proto): + if lfutil.islfilesrepo(repo): + try: + # Mercurial >= f4522df38c65 + return wireproto.ooberror(LARGEFILES_REQUIRED_MSG) + except AttributeError: + return proto.refuseclient(LARGEFILES_REQUIRED_MSG) + return wireproto.heads(repo, proto) + +def sshrepo_callstream(self, cmd, **args): + if cmd == 'heads' and self.capable('largefiles'): + cmd = 'lheads' + if cmd == 'batch' and self.capable('largefiles'): + args['cmds'] = args['cmds'].replace('heads ', 'lheads ') + return ssh_oldcallstream(self, cmd, **args) + +def httprepo_callstream(self, cmd, **args): + if cmd == 'heads' and self.capable('largefiles'): + cmd = 'lheads' + if cmd == 'batch' and self.capable('largefiles'): + args['cmds'] = args['cmds'].replace('heads ', 'lheads ') + return http_oldcallstream(self, cmd, **args) diff --git a/hgext/largefiles/remotestore.py b/hgext/largefiles/remotestore.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/remotestore.py @@ -0,0 +1,106 @@ +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''Remote largefile store; the base class for servestore''' + +import urllib2 +import HTTPError + +from mercurial import util +from mercurial.i18n import _ + +import lfutil +import basestore + +class remotestore(basestore.basestore): + """A largefile store accessed over a network""" + def __init__(self, ui, repo, url): + super(remotestore, self).__init__(ui, repo, url) + + def put(self, source, hash): + if self._verify(hash): + return + if self.sendfile(source, hash): + raise util.Abort( + _('remotestore: could not put %s to remote store %s') + % (source, self.url)) + self.ui.debug( + _('remotestore: put %s to remote store %s') % (source, self.url)) + + def exists(self, hash): + return self._verify(hash) + + def sendfile(self, filename, hash): + self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash)) + fd = None + try: + try: + fd = lfutil.httpsendfile(self.ui, filename) + except IOError, e: + raise util.Abort( + _('remotestore: could not open file %s: %s') + % (filename, str(e))) + return self._put(hash, fd) + finally: + if fd: + fd.close() + + def _getfile(self, tmpfile, filename, hash): + # quit if the largefile isn't there + stat = self._stat(hash) + if stat: + raise util.Abort(_('remotestore: largefile %s is %s') % + (hash, stat == 1 and 'invalid' or 'missing')) + + try: + length, infile = self._get(hash) + except HTTPError, e: + # 401s get converted to util.Aborts; everything else is fine being + # turned into a StoreError + raise basestore.StoreError(filename, hash, self.url, str(e)) + except urllib2.URLError, e: + # This usually indicates a connection problem, so don't + # keep trying with the other files... they will probably + # all fail too. + raise util.Abort('%s: %s' % (self.url, str(e.reason))) + except IOError, e: + raise basestore.StoreError(filename, hash, self.url, str(e)) + + # Mercurial does not close its SSH connections after writing a stream + if length is not None: + infile = lfutil.limitreader(infile, length) + return lfutil.copyandhash(lfutil.blockstream(infile), tmpfile) + + def _verify(self, hash): + return not self._stat(hash) + + def _verifyfile(self, cctx, cset, contents, standin, verified): + filename = lfutil.splitstandin(standin) + if not filename: + return False + fctx = cctx[standin] + key = (filename, fctx.filenode()) + if key in verified: + return False + + verified.add(key) + + stat = self._stat(hash) + if not stat: + return False + elif stat == 1: + self.ui.warn( + _('changeset %s: %s: contents differ\n') + % (cset, filename)) + return True # failed + elif stat == 2: + self.ui.warn( + _('changeset %s: %s missing\n') + % (cset, filename)) + return True # failed + else: + raise util.Abort(_('check failed, unexpected response' + 'statlfile: %d') % stat) diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/reposetup.py @@ -0,0 +1,411 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''setup for largefiles repositories: reposetup''' +import copy +import types +import os +import re + +from mercurial import context, error, manifest, match as match_, \ + node, util +from mercurial.i18n import _ + +import lfcommands +import proto +import lfutil + +def reposetup(ui, repo): + # wire repositories should be given new wireproto functions but not the + # other largefiles modifications + if not repo.local(): + return proto.wirereposetup(ui, repo) + + for name in ('status', 'commitctx', 'commit', 'push'): + method = getattr(repo, name) + #if not (isinstance(method, types.MethodType) and + # method.im_func is repo.__class__.commitctx.im_func): + if isinstance(method, types.FunctionType) and method.func_name == \ + 'wrap': + ui.warn(_('largefiles: repo method %r appears to have already been' + ' wrapped by another extension: ' + 'largefiles may behave incorrectly\n') + % name) + + class lfiles_repo(repo.__class__): + lfstatus = False + def status_nolfiles(self, *args, **kwargs): + return super(lfiles_repo, self).status(*args, **kwargs) + + # When lfstatus is set, return a context that gives the names of lfiles + # instead of their corresponding standins and identifies the lfiles as + # always binary, regardless of their actual contents. + def __getitem__(self, changeid): + ctx = super(lfiles_repo, self).__getitem__(changeid) + if self.lfstatus: + class lfiles_manifestdict(manifest.manifestdict): + def __contains__(self, filename): + if super(lfiles_manifestdict, + self).__contains__(filename): + return True + return super(lfiles_manifestdict, + self).__contains__(lfutil.shortname+'/' + filename) + class lfiles_ctx(ctx.__class__): + def files(self): + filenames = super(lfiles_ctx, self).files() + return [re.sub('^\\'+lfutil.shortname+'/', '', filename) for filename + in filenames] + def manifest(self): + man1 = super(lfiles_ctx, self).manifest() + man1.__class__ = lfiles_manifestdict + return man1 + def filectx(self, path, fileid=None, filelog=None): + try: + result = super(lfiles_ctx, self).filectx(path, + fileid, filelog) + except error.LookupError: + # Adding a null character will cause Mercurial to + # identify this as a binary file. + result = super(lfiles_ctx, self).filectx( + lfutil.shortname + '/' + path, fileid, + filelog) + olddata = result.data + result.data = lambda: olddata() + '\0' + return result + ctx.__class__ = lfiles_ctx + return ctx + + # Figure out the status of big files and insert them into the + # appropriate list in the result. Also removes standin files from + # the listing. This function reverts to the original status if + # self.lfstatus is False + def status(self, node1='.', node2=None, match=None, ignored=False, + clean=False, unknown=False, listsubrepos=False): + listignored, listclean, listunknown = ignored, clean, unknown + if not self.lfstatus: + try: + return super(lfiles_repo, self).status(node1, node2, match, + listignored, listclean, listunknown, listsubrepos) + except TypeError: + return super(lfiles_repo, self).status(node1, node2, match, + listignored, listclean, listunknown) + else: + # some calls in this function rely on the old version of status + self.lfstatus = False + if isinstance(node1, context.changectx): + ctx1 = node1 + else: + ctx1 = repo[node1] + if isinstance(node2, context.changectx): + ctx2 = node2 + else: + ctx2 = repo[node2] + working = ctx2.rev() is None + parentworking = working and ctx1 == self['.'] + + def inctx(file, ctx): + try: + if ctx.rev() is None: + return file in ctx.manifest() + ctx[file] + return True + except: + return False + + # create a copy of match that matches standins instead of + # lfiles if matcher not set then it is the always matcher so + # overwrite that + if match is None: + match = match_.always(self.root, self.getcwd()) + + def tostandin(file): + if inctx(lfutil.standin(file), ctx2): + return lfutil.standin(file) + return file + + m = copy.copy(match) + m._files = [tostandin(f) for f in m._files] + + # get ignored clean and unknown but remove them later if they + # were not asked for + try: + result = super(lfiles_repo, self).status(node1, node2, m, + True, True, True, listsubrepos) + except TypeError: + result = super(lfiles_repo, self).status(node1, node2, m, + True, True, True) + if working: + # Hold the wlock while we read lfiles and update the + # lfdirstate + wlock = repo.wlock() + try: + # Any non lfiles that were explicitly listed must be + # taken out or lfdirstate.status will report an error. + # The status of these files was already computed using + # super's status. + lfdirstate = lfutil.openlfdirstate(ui, self) + match._files = [f for f in match._files if f in + lfdirstate] + s = lfdirstate.status(match, [], listignored, + listclean, listunknown) + (unsure, modified, added, removed, missing, unknown, + ignored, clean) = s + if parentworking: + for lfile in unsure: + if ctx1[lfutil.standin(lfile)].data().strip() \ + != lfutil.hashfile(self.wjoin(lfile)): + modified.append(lfile) + else: + clean.append(lfile) + lfdirstate.normal(lfile) + lfdirstate.write() + else: + tocheck = unsure + modified + added + clean + modified, added, clean = [], [], [] + + for lfile in tocheck: + standin = lfutil.standin(lfile) + if inctx(standin, ctx1): + if ctx1[standin].data().strip() != \ + lfutil.hashfile(self.wjoin(lfile)): + modified.append(lfile) + else: + clean.append(lfile) + else: + added.append(lfile) + finally: + wlock.release() + + for standin in ctx1.manifest(): + if not lfutil.isstandin(standin): + continue + lfile = lfutil.splitstandin(standin) + if not match(lfile): + continue + if lfile not in lfdirstate: + removed.append(lfile) + # Handle unknown and ignored differently + lfiles = (modified, added, removed, missing, [], [], clean) + result = list(result) + # Unknown files + result[4] = [f for f in unknown if repo.dirstate[f] == '?'\ + and not lfutil.isstandin(f)] + # Ignored files must be ignored by both the dirstate and + # lfdirstate + result[5] = set(ignored).intersection(set(result[5])) + # combine normal files and lfiles + normals = [[fn for fn in filelist if not \ + lfutil.isstandin(fn)] for filelist in result] + result = [sorted(list1 + list2) for (list1, list2) in \ + zip(normals, lfiles)] + else: + def toname(f): + if lfutil.isstandin(f): + return lfutil.splitstandin(f) + return f + result = [[toname(f) for f in items] for items in result] + + if not listunknown: + result[4] = [] + if not listignored: + result[5] = [] + if not listclean: + result[6] = [] + self.lfstatus = True + return result + + # This call happens after a commit has occurred. Copy all of the lfiles + # into the cache + def commitctx(self, *args, **kwargs): + node = super(lfiles_repo, self).commitctx(*args, **kwargs) + ctx = self[node] + for filename in ctx.files(): + if lfutil.isstandin(filename) and filename in ctx.manifest(): + realfile = lfutil.splitstandin(filename) + lfutil.copytocache(self, ctx.node(), realfile) + + return node + + # This call happens before a commit has occurred. The lfile standins + # have not had their contents updated (to reflect the hash of their + # lfile). Do that here. + def commit(self, text="", user=None, date=None, match=None, + force=False, editor=False, extra={}): + orig = super(lfiles_repo, self).commit + + wlock = repo.wlock() + try: + if getattr(repo, "_isrebasing", False): + # We have to take the time to pull down the new lfiles now. + # Otherwise if we are rebasing, any lfiles that were + # modified in the changesets we are rebasing on top of get + # overwritten either by the rebase or in the first commit + # after the rebase. + lfcommands.updatelfiles(repo.ui, repo) + # Case 1: user calls commit with no specific files or + # include/exclude patterns: refresh and commit everything. + if (match is None) or (not match.anypats() and not \ + match.files()): + lfiles = lfutil.listlfiles(self) + lfdirstate = lfutil.openlfdirstate(ui, self) + # this only loops through lfiles that exist (not + # removed/renamed) + for lfile in lfiles: + if os.path.exists(self.wjoin(lfutil.standin(lfile))): + # this handles the case where a rebase is being + # performed and the working copy is not updated + # yet. + if os.path.exists(self.wjoin(lfile)): + lfutil.updatestandin(self, + lfutil.standin(lfile)) + lfdirstate.normal(lfile) + for lfile in lfdirstate: + if not os.path.exists( + repo.wjoin(lfutil.standin(lfile))): + try: + # Mercurial >= 1.9 + lfdirstate.drop(lfile) + except AttributeError: + # Mercurial <= 1.8 + lfdirstate.forget(lfile) + lfdirstate.write() + + return orig(text=text, user=user, date=date, match=match, + force=force, editor=editor, extra=extra) + + for file in match.files(): + if lfutil.isstandin(file): + raise util.Abort( + "Don't commit largefile standin. Commit largefile.") + + # Case 2: user calls commit with specified patterns: refresh + # any matching big files. + smatcher = lfutil.composestandinmatcher(self, match) + standins = lfutil.dirstate_walk(self.dirstate, smatcher) + + # No matching big files: get out of the way and pass control to + # the usual commit() method. + if not standins: + return orig(text=text, user=user, date=date, match=match, + force=force, editor=editor, extra=extra) + + # Refresh all matching big files. It's possible that the + # commit will end up failing, in which case the big files will + # stay refreshed. No harm done: the user modified them and + # asked to commit them, so sooner or later we're going to + # refresh the standins. Might as well leave them refreshed. + lfdirstate = lfutil.openlfdirstate(ui, self) + for standin in standins: + lfile = lfutil.splitstandin(standin) + if lfdirstate[lfile] <> 'r': + lfutil.updatestandin(self, standin) + lfdirstate.normal(lfile) + else: + try: + # Mercurial >= 1.9 + lfdirstate.drop(lfile) + except AttributeError: + # Mercurial <= 1.8 + lfdirstate.forget(lfile) + lfdirstate.write() + + # Cook up a new matcher that only matches regular files or + # standins corresponding to the big files requested by the + # user. Have to modify _files to prevent commit() from + # complaining "not tracked" for big files. + lfiles = lfutil.listlfiles(repo) + match = copy.copy(match) + orig_matchfn = match.matchfn + + # Check both the list of lfiles and the list of standins + # because if a lfile was removed, it won't be in the list of + # lfiles at this point + match._files += sorted(standins) + + actualfiles = [] + for f in match._files: + fstandin = lfutil.standin(f) + + # Ignore known lfiles and standins + if f in lfiles or fstandin in standins: + continue + + # Append directory separator to avoid collisions + if not fstandin.endswith(os.sep): + fstandin += os.sep + + # Prevalidate matching standin directories + if lfutil.any_(st for st in match._files if \ + st.startswith(fstandin)): + continue + actualfiles.append(f) + match._files = actualfiles + + def matchfn(f): + if orig_matchfn(f): + return f not in lfiles + else: + return f in standins + + match.matchfn = matchfn + return orig(text=text, user=user, date=date, match=match, + force=force, editor=editor, extra=extra) + finally: + wlock.release() + + def push(self, remote, force=False, revs=None, newbranch=False): + o = lfutil.findoutgoing(repo, remote, force) + if o: + toupload = set() + o = repo.changelog.nodesbetween(o, revs)[0] + for n in o: + parents = [p for p in repo.changelog.parents(n) if p != \ + node.nullid] + ctx = repo[n] + files = set(ctx.files()) + if len(parents) == 2: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + for f in mp1: + if f not in mc: + files.add(f) + for f in mp2: + if f not in mc: + files.add(f) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, + None): + files.add(f) + + toupload = toupload.union(set([ctx[f].data().strip() for f\ + in files if lfutil.isstandin(f) and f in ctx])) + lfcommands.uploadlfiles(ui, self, remote, toupload) + # Mercurial >= 1.6 takes the newbranch argument, try that first. + try: + return super(lfiles_repo, self).push(remote, force, revs, + newbranch) + except TypeError: + return super(lfiles_repo, self).push(remote, force, revs) + + repo.__class__ = lfiles_repo + + def checkrequireslfiles(ui, repo, **kwargs): + if 'largefiles' not in repo.requirements and lfutil.any_( + lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()): + # work around bug in mercurial 1.9 whereby requirements is a list + # on newly-cloned repos + repo.requirements = set(repo.requirements) + + repo.requirements |= set(['largefiles']) + repo._writerequirements() + + checkrequireslfiles(ui, repo) + + ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles) + ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles) diff --git a/hgext/largefiles/uisetup.py b/hgext/largefiles/uisetup.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/uisetup.py @@ -0,0 +1,125 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''setup for largefiles extension: uisetup''' + +from mercurial import archival, cmdutil, commands, extensions, filemerge, hg, \ + httprepo, localrepo, sshrepo, sshserver, wireproto +from mercurial.i18n import _ +from mercurial.hgweb import hgweb_mod, protocol + +import overrides +import proto + +def uisetup(ui): + # Disable auto-status for some commands which assume that all + # files in the result are under Mercurial's control + + entry = extensions.wrapcommand(commands.table, 'add', overrides.override_add) + addopt = [('', 'large', None, _('add as largefile')), + ('', 'lfsize', '', _('add all files above this size (in megabytes)' + 'as largefiles (default: 10)'))] + entry[1].extend(addopt) + + entry = extensions.wrapcommand(commands.table, 'addremove', + overrides.override_addremove) + entry = extensions.wrapcommand(commands.table, 'remove', overrides.override_remove) + entry = extensions.wrapcommand(commands.table, 'forget', overrides.override_forget) + entry = extensions.wrapcommand(commands.table, 'status', overrides.override_status) + entry = extensions.wrapcommand(commands.table, 'log', overrides.override_log) + entry = extensions.wrapcommand(commands.table, 'rollback', + overrides.override_rollback) + + entry = extensions.wrapcommand(commands.table, 'verify', overrides.override_verify) + verifyopt = [('', 'large', None, _('verify largefiles')), + ('', 'lfa', None, + _('verify all revisions of largefiles not just current')), + ('', 'lfc', None, + _('verify largefile contents not just existence'))] + entry[1].extend(verifyopt) + + entry = extensions.wrapcommand(commands.table, 'outgoing', + overrides.override_outgoing) + outgoingopt = [('', 'large', None, _('display outgoing largefiles'))] + entry[1].extend(outgoingopt) + entry = extensions.wrapcommand(commands.table, 'summary', overrides.override_summary) + summaryopt = [('', 'large', None, _('display outgoing largefiles'))] + entry[1].extend(summaryopt) + + entry = extensions.wrapcommand(commands.table, 'update', overrides.override_update) + entry = extensions.wrapcommand(commands.table, 'pull', overrides.override_pull) + entry = extensions.wrapfunction(filemerge, 'filemerge', overrides.override_filemerge) + entry = extensions.wrapfunction(cmdutil, 'copy', overrides.override_copy) + + # Backout calls revert so we need to override both the command and the + # function + entry = extensions.wrapcommand(commands.table, 'revert', overrides.override_revert) + entry = extensions.wrapfunction(commands, 'revert', overrides.override_revert) + + # clone uses hg._update instead of hg.update even though they are the + # same function... so wrap both of them) + extensions.wrapfunction(hg, 'update', overrides.hg_update) + extensions.wrapfunction(hg, '_update', overrides.hg_update) + extensions.wrapfunction(hg, 'clean', overrides.hg_clean) + extensions.wrapfunction(hg, 'merge', overrides.hg_merge) + + extensions.wrapfunction(archival, 'archive', overrides.override_archive) + if hasattr(cmdutil, 'bailifchanged'): + extensions.wrapfunction(cmdutil, 'bailifchanged', + overrides.override_bailifchanged) + else: + extensions.wrapfunction(cmdutil, 'bail_if_changed', + overrides.override_bailifchanged) + + # create the new wireproto commands ... + wireproto.commands['putlfile'] = (proto.putlfile, 'sha') + wireproto.commands['getlfile'] = (proto.getlfile, 'sha') + wireproto.commands['statlfile'] = (proto.statlfile, 'sha') + + # ... and wrap some existing ones + wireproto.commands['capabilities'] = (proto.capabilities, '') + wireproto.commands['heads'] = (proto.heads, '') + wireproto.commands['lheads'] = (wireproto.heads, '') + + # make putlfile behave the same as push and {get,stat}lfile behave the same + # as pull w.r.t. permissions checks + hgweb_mod.perms['putlfile'] = 'push' + hgweb_mod.perms['getlfile'] = 'pull' + hgweb_mod.perms['statlfile'] = 'pull' + + # the hello wireproto command uses wireproto.capabilities, so it won't see + # our largefiles capability unless we replace the actual function as well. + proto.capabilities_orig = wireproto.capabilities + wireproto.capabilities = proto.capabilities + + # these let us reject non-lfiles clients and make them display our error + # messages + protocol.webproto.refuseclient = proto.webproto_refuseclient + sshserver.sshserver.refuseclient = proto.sshproto_refuseclient + + # can't do this in reposetup because it needs to have happened before + # wirerepo.__init__ is called + proto.ssh_oldcallstream = sshrepo.sshrepository._callstream + proto.http_oldcallstream = httprepo.httprepository._callstream + sshrepo.sshrepository._callstream = proto.sshrepo_callstream + httprepo.httprepository._callstream = proto.httprepo_callstream + + # don't die on seeing a repo with the largefiles requirement + localrepo.localrepository.supported |= set(['largefiles']) + + # override some extensions' stuff as well + for name, module in extensions.extensions(): + if name == 'fetch': + extensions.wrapcommand(getattr(module, 'cmdtable'), 'fetch', + overrides.override_fetch) + if name == 'purge': + extensions.wrapcommand(getattr(module, 'cmdtable'), 'purge', + overrides.override_purge) + if name == 'rebase': + extensions.wrapcommand(getattr(module, 'cmdtable'), 'rebase', + overrides.override_rebase) diff --git a/hgext/largefiles/usage.txt b/hgext/largefiles/usage.txt new file mode 100644 --- /dev/null +++ b/hgext/largefiles/usage.txt @@ -0,0 +1,51 @@ +Largefiles allows for tracking large, incompressible binary files in Mercurial +without requiring excessive bandwidth for clones and pulls. Files added as +largefiles are not tracked directly by Mercurial; rather, their revisions are +identified by a checksum, and Mercurial tracks these checksums. This way, when +you clone a repository or pull in changesets, the large files in older +revisions of the repository are not needed, and only the ones needed to update +to the current version are downloaded. This saves both disk space and +bandwidth. + +If you are starting a new repository or adding new large binary files, using +largefiles for them is as easy as adding '--large' to your hg add command. For +example: + +$ dd if=/dev/urandom of=thisfileislarge count=2000 +$ hg add --large thisfileislarge +$ hg commit -m 'add thisfileislarge, which is large, as a largefile' + +When you push a changeset that affects largefiles to a remote repository, its +largefile revisions will be uploaded along with it. Note that the remote +Mercurial must also have the largefiles extension enabled for this to work. + +When you pull a changeset that affects largefiles from a remote repository, +nothing different from Mercurial's normal behavior happens. However, when you +update to such a revision, any largefiles needed by that revision are +downloaded and cached if they have never been downloaded before. This means +that network access is required to update to revision you have not yet updated +to. + +If you already have large files tracked by Mercurial without the largefiles +extension, you will need to convert your repository in order to benefit from +largefiles. This is done with the 'hg lfconvert' command: + +$ hg lfconvert --size 10 oldrepo newrepo + +By default, in repositories that already have largefiles in them, any new file +over 10MB will automatically be added as largefiles. To change this +threshhold, set [largefiles].size in your Mercurial config file to the minimum +size in megabytes to track as a largefile, or use the --lfsize option to the +add command (also in megabytes): + +[largefiles] +size = 2 + +$ hg add --lfsize 2 + +The [largefiles].patterns config option allows you to specify specific +space-separated filename patterns (in shell glob syntax) that should always be +tracked as largefiles: + +[largefiles] +pattens = *.jpg *.{png,bmp} library.zip content/audio/* diff --git a/hgext/largefiles/wirestore.py b/hgext/largefiles/wirestore.py new file mode 100644 --- /dev/null +++ b/hgext/largefiles/wirestore.py @@ -0,0 +1,29 @@ +# Copyright 2010-2011 Fog Creek Software +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''largefile store working over mercurial's wire protocol''' + +import lfutil +import remotestore + +class wirestore(remotestore.remotestore): + def __init__(self, ui, repo, remote): + cap = remote.capable('largefiles') + if not cap: + raise lfutil.storeprotonotcapable([]) + storetypes = cap.split(',') + if not 'serve' in storetypes: + raise lfutil.storeprotonotcapable(storetypes) + self.remote = remote + super(wirestore, self).__init__(ui, repo, remote.url()) + + def _put(self, hash, fd): + return self.remote.putlfile(hash, fd) + + def _get(self, hash): + return self.remote.getlfile(hash) + + def _stat(self, hash): + return self.remote.statlfile(hash)