# Copyright 2009-2010 Gregory P. Ward # Copyright 2009-2010 Intelerad Medical Systems Incorporated # Copyright 2010-2011 Fog Creek Software # Copyright 2010-2011 Unity Technologies # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. '''High-level command functions: lfadd() et. al, plus the cmdtable.''' import os import shutil from mercurial import util, match as match_, hg, node, context, error from mercurial.i18n import _ import lfutil import basestore # -- Commands ---------------------------------------------------------- def lfconvert(ui, src, dest, *pats, **opts): '''Convert a normal repository to a largefiles repository Convert source repository creating an identical repository, except that all files that match the patterns given, or are over the given size will be added as largefiles. The size used to determine whether or not to track a file as a largefile is the size of the first version of the file. After running this command you will need to make sure that largefiles is enabled anywhere you intend to push the new repository.''' if opts['tonormal']: tolfile = False else: tolfile = True size = opts['size'] if not size: size = ui.config(lfutil.longname, 'size', default=None) try: size = int(size) except ValueError: raise util.Abort(_('largefiles.size must be integer, was %s\n') % \ size) except TypeError: raise util.Abort(_('size must be specified')) try: rsrc = hg.repository(ui, src) if not rsrc.local(): raise util.Abort(_('%s is not a local Mercurial repo') % src) except error.RepoError, err: ui.traceback() raise util.Abort(err.args[0]) if os.path.exists(dest): if not os.path.isdir(dest): raise util.Abort(_('destination %s already exists') % dest) elif os.listdir(dest): raise util.Abort(_('destination %s is not empty') % dest) try: ui.status(_('initializing destination %s\n') % dest) rdst = hg.repository(ui, dest, create=True) if not rdst.local(): raise util.Abort(_('%s is not a local Mercurial repo') % dest) except error.RepoError: ui.traceback() raise util.Abort(_('%s is not a repo') % dest) try: # Lock destination to prevent modification while it is converted to. # Don't need to lock src because we are just reading from its history # which can't change. dst_lock = rdst.lock() # Get a list of all changesets in the source. The easy way to do this # is to simply walk the changelog, using changelog.nodesbewteen(). # Take a look at mercurial/revlog.py:639 for more details. # Use a generator instead of a list to decrease memory usage ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]) revmap = {node.nullid: node.nullid} if tolfile: lfiles = set() normalfiles = set() if not pats: pats = ui.config(lfutil.longname, 'patterns', default=()) if pats: pats = pats.split(' ') if pats: matcher = match_.match(rsrc.root, '', list(pats)) else: matcher = None lfiletohash = {} for ctx in ctxs: ui.progress(_('converting revisions'), ctx.rev(), unit=_('revision'), total=rsrc['tip'].rev()) _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash) ui.progress(_('converting revisions'), None) if os.path.exists(rdst.wjoin(lfutil.shortname)): shutil.rmtree(rdst.wjoin(lfutil.shortname)) for f in lfiletohash.keys(): if os.path.isfile(rdst.wjoin(f)): os.unlink(rdst.wjoin(f)) try: os.removedirs(os.path.dirname(rdst.wjoin(f))) except: pass else: for ctx in ctxs: ui.progress(_('converting revisions'), ctx.rev(), unit=_('revision'), total=rsrc['tip'].rev()) _addchangeset(ui, rsrc, rdst, ctx, revmap) ui.progress(_('converting revisions'), None) except: # we failed, remove the new directory shutil.rmtree(rdst.root) raise finally: dst_lock.release() def _addchangeset(ui, rsrc, rdst, ctx, revmap): # Convert src parents to dst parents parents = [] for p in ctx.parents(): parents.append(revmap[p.node()]) while len(parents) < 2: parents.append(node.nullid) # Generate list of changed files files = set(ctx.files()) if node.nullid not in parents: mc = ctx.manifest() mp1 = ctx.parents()[0].manifest() mp2 = ctx.parents()[1].manifest() files |= (set(mp1) | set(mp2)) - set(mc) for f in mc: if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): files.add(f) def getfilectx(repo, memctx, f): if lfutil.standin(f) in files: # if the file isn't in the manifest then it was removed # or renamed, raise IOError to indicate this try: fctx = ctx.filectx(lfutil.standin(f)) except error.LookupError: raise IOError() renamed = fctx.renamed() if renamed: renamed = lfutil.splitstandin(renamed[0]) hash = fctx.data().strip() path = lfutil.findfile(rsrc, hash) ### TODO: What if the file is not cached? data = '' fd = None try: fd = open(path, 'rb') data = fd.read() finally: if fd: fd.close() return context.memfilectx(f, data, 'l' in fctx.flags(), 'x' in fctx.flags(), renamed) else: try: fctx = ctx.filectx(f) except error.LookupError: raise IOError() renamed = fctx.renamed() if renamed: renamed = renamed[0] data = fctx.data() if f == '.hgtags': newdata = [] for line in data.splitlines(): id, name = line.split(' ', 1) newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), name)) data = ''.join(newdata) return context.memfilectx(f, data, 'l' in fctx.flags(), 'x' in fctx.flags(), renamed) dstfiles = [] for file in files: if lfutil.isstandin(file): dstfiles.append(lfutil.splitstandin(file)) else: dstfiles.append(file) # Commit mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, getfilectx, ctx.user(), ctx.date(), ctx.extra()) ret = rdst.commitctx(mctx) rdst.dirstate.setparents(ret) revmap[ctx.node()] = rdst.changelog.tip() def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash): # Convert src parents to dst parents parents = [] for p in ctx.parents(): parents.append(revmap[p.node()]) while len(parents) < 2: parents.append(node.nullid) # Generate list of changed files files = set(ctx.files()) if node.nullid not in parents: mc = ctx.manifest() mp1 = ctx.parents()[0].manifest() mp2 = ctx.parents()[1].manifest() files |= (set(mp1) | set(mp2)) - set(mc) for f in mc: if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): files.add(f) dstfiles = [] for f in files: if f not in lfiles and f not in normalfiles: islfile = _islfile(f, ctx, matcher, size) # If this file was renamed or copied then copy # the lfileness of its predecessor if f in ctx.manifest(): fctx = ctx.filectx(f) renamed = fctx.renamed() renamedlfile = renamed and renamed[0] in lfiles islfile |= renamedlfile if 'l' in fctx.flags(): if renamedlfile: raise util.Abort( _('Renamed/copied largefile %s becomes symlink') % f) islfile = False if islfile: lfiles.add(f) else: normalfiles.add(f) if f in lfiles: dstfiles.append(lfutil.standin(f)) # lfile in manifest if it has not been removed/renamed if f in ctx.manifest(): if 'l' in ctx.filectx(f).flags(): if renamed and renamed[0] in lfiles: raise util.Abort(_('largefile %s becomes symlink') % f) # lfile was modified, update standins fullpath = rdst.wjoin(f) lfutil.createdir(os.path.dirname(fullpath)) m = util.sha1('') m.update(ctx[f].data()) hash = m.hexdigest() if f not in lfiletohash or lfiletohash[f] != hash: try: fd = open(fullpath, 'wb') fd.write(ctx[f].data()) finally: if fd: fd.close() executable = 'x' in ctx[f].flags() os.chmod(fullpath, lfutil.getmode(executable)) lfutil.writestandin(rdst, lfutil.standin(f), hash, executable) lfiletohash[f] = hash else: # normal file dstfiles.append(f) def getfilectx(repo, memctx, f): if lfutil.isstandin(f): # if the file isn't in the manifest then it was removed # or renamed, raise IOError to indicate this srcfname = lfutil.splitstandin(f) try: fctx = ctx.filectx(srcfname) except error.LookupError: raise IOError() renamed = fctx.renamed() if renamed: # standin is always a lfile because lfileness # doesn't change after rename or copy renamed = lfutil.standin(renamed[0]) return context.memfilectx(f, lfiletohash[srcfname], 'l' in fctx.flags(), 'x' in fctx.flags(), renamed) else: try: fctx = ctx.filectx(f) except error.LookupError: raise IOError() renamed = fctx.renamed() if renamed: renamed = renamed[0] data = fctx.data() if f == '.hgtags': newdata = [] for line in data.splitlines(): id, name = line.split(' ', 1) newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), name)) data = ''.join(newdata) return context.memfilectx(f, data, 'l' in fctx.flags(), 'x' in fctx.flags(), renamed) # Commit mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, getfilectx, ctx.user(), ctx.date(), ctx.extra()) ret = rdst.commitctx(mctx) rdst.dirstate.setparents(ret) revmap[ctx.node()] = rdst.changelog.tip() def _islfile(file, ctx, matcher, size): ''' A file is a lfile if it matches a pattern or is over the given size. ''' # Never store hgtags or hgignore as lfiles if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs': return False if matcher and matcher(file): return True try: return ctx.filectx(file).size() >= size * 1024 * 1024 except error.LookupError: return False def uploadlfiles(ui, rsrc, rdst, files): '''upload largefiles to the central store''' # Don't upload locally. All largefiles are in the system wide cache # so the other repo can just get them from there. if not files or rdst.local(): return store = basestore._openstore(rsrc, rdst, put=True) at = 0 files = filter(lambda h: not store.exists(h), files) for hash in files: ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files)) source = lfutil.findfile(rsrc, hash) if not source: raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash) # XXX check for errors here store.put(source, hash) at += 1 ui.progress('uploading largefiles', None) def verifylfiles(ui, repo, all=False, contents=False): '''Verify that every big file revision in the current changeset exists in the central store. With --contents, also verify that the contents of each big file revision are correct (SHA-1 hash matches the revision ID). With --all, check every changeset in this repository.''' if all: # Pass a list to the function rather than an iterator because we know a # list will work. revs = range(len(repo)) else: revs = ['.'] store = basestore._openstore(repo) return store.verify(revs, contents=contents) def cachelfiles(ui, repo, node): '''cachelfiles ensures that all largefiles needed by the specified revision are present in the repository's largefile cache. returns a tuple (cached, missing). cached is the list of files downloaded by this operation; missing is the list of files that were needed but could not be found.''' lfiles = lfutil.listlfiles(repo, node) toget = [] for lfile in lfiles: expectedhash = repo[node][lfutil.standin(lfile)].data().strip() # if it exists and its hash matches, it might have been locally # modified before updating and the user chose 'local'. in this case, # it will not be in any store, so don't look for it. if (not os.path.exists(repo.wjoin(lfile)) \ or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \ not lfutil.findfile(repo, expectedhash): toget.append((lfile, expectedhash)) if toget: store = basestore._openstore(repo) ret = store.get(toget) return ret return ([], []) def updatelfiles(ui, repo, filelist=None, printmessage=True): wlock = repo.wlock() try: lfdirstate = lfutil.openlfdirstate(ui, repo) lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate) if filelist is not None: lfiles = [f for f in lfiles if f in filelist] printed = False if printmessage and lfiles: ui.status(_('getting changed largefiles\n')) printed = True cachelfiles(ui, repo, '.') updated, removed = 0, 0 for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles): # increment the appropriate counter according to _updatelfile's # return value updated += i > 0 and i or 0 removed -= i < 0 and i or 0 if printmessage and (removed or updated) and not printed: ui.status(_('getting changed largefiles\n')) printed = True lfdirstate.write() if printed and printmessage: ui.status(_('%d largefiles updated, %d removed\n') % (updated, removed)) finally: wlock.release() def _updatelfile(repo, lfdirstate, lfile): '''updates a single largefile and copies the state of its standin from the repository's dirstate to its state in the lfdirstate. returns 1 if the file was modified, -1 if the file was removed, 0 if the file was unchanged, and None if the needed largefile was missing from the cache.''' ret = 0 abslfile = repo.wjoin(lfile) absstandin = repo.wjoin(lfutil.standin(lfile)) if os.path.exists(absstandin): if os.path.exists(absstandin+'.orig'): shutil.copyfile(abslfile, abslfile+'.orig') expecthash = lfutil.readstandin(repo, lfile) if expecthash != '' and \ (not os.path.exists(abslfile) or \ expecthash != lfutil.hashfile(abslfile)): if not lfutil.copyfromcache(repo, expecthash, lfile): return None # don't try to set the mode or update the dirstate ret = 1 mode = os.stat(absstandin).st_mode if mode != os.stat(abslfile).st_mode: os.chmod(abslfile, mode) ret = 1 else: if os.path.exists(abslfile): os.unlink(abslfile) ret = -1 state = repo.dirstate[lfutil.standin(lfile)] if state == 'n': lfdirstate.normal(lfile) elif state == 'r': lfdirstate.remove(lfile) elif state == 'a': lfdirstate.add(lfile) elif state == '?': try: # Mercurial >= 1.9 lfdirstate.drop(lfile) except AttributeError: # Mercurial <= 1.8 lfdirstate.forget(lfile) return ret # -- hg commands declarations ------------------------------------------------ cmdtable = { 'lfconvert': (lfconvert, [('s', 'size', 0, 'All files over this size (in megabytes) ' 'will be considered largefiles. This can also be specified in ' 'your hgrc as [largefiles].size.'), ('','tonormal',False, 'Convert from a largefiles repo to a normal repo')], _('hg lfconvert SOURCE DEST [FILE ...]')), }