# HG changeset patch # User Jesse Glick # Date 2009-11-05 22:38:03 # Node ID aa9ccab5af3753a0ebe7a8a240ba8f1acb78df4a # Parent acb1c59b45145937b19ecff9bef88b4905a67e64 Issue919: add a standard extension to recreate hardlinks between repositories. Having to run a standalone Python script from the contrib dir is a nuisance. Also makes various improvements to locking, file discovery, etc. Should also update: http://www.selenic.com/mercurial/wiki/index.cgi/RecreateHardlinksBetweenRepositories diff --git a/contrib/hg-relink b/hgext/relink.py old mode 100755 new mode 100644 rename from contrib/hg-relink rename to hgext/relink.py --- a/contrib/hg-relink +++ b/hgext/relink.py @@ -1,52 +1,74 @@ -#!/usr/bin/env python +# Mercurial extension to provide 'hg relink' command # # Copyright (C) 2007 Brendan Cully # # This software may be used and distributed according to the terms of the # GNU General Public License version 2, incorporated herein by reference. -import os, sys +"""recreates hardlinks between repository clones""" -class ConfigError(Exception): pass +from mercurial import cmdutil, hg, util +from mercurial.i18n import _ +import os, stat + +def relink(ui, repo, origin=None, **opts): + """recreate hardlinks between two repositories -def usage(): - print """relink - Recreate hard links between source and destination repositories""" + When repositories are cloned locally, their data files will be hardlinked + so that they only use the space of a single repository. + + Unfortunately, subsequent pulls into either repository will break hardlinks + for any files touched by the new changesets, even if both repositories end + up pulling the same changes. + + Similarly, passing --rev to "hg clone" will fail to use + any hardlinks, falling back to a complete copy of the source repository. + + This command lets you recreate those hardlinks and reclaim that wasted + space. -class Config: - def __init__(self, args): - if len(args) != 3: - raise ConfigError("wrong number of arguments") - self.src = os.path.abspath(args[1]) - self.dst = os.path.abspath(args[2]) - for d in (self.src, self.dst): - if not os.path.exists(os.path.join(d, '.hg')): - raise ConfigError("%s: not a mercurial repository" % d) + This repository will be relinked to share space with ORIGIN, which must be + on the same local disk. If ORIGIN is omitted, looks for "default-relink", + then "default", in [paths]. -def collect(src): + Do not attempt any read operations on this repository while the command is + running. (Both repositories will be locked against writes.) + """ + src = hg.repository( + cmdutil.remoteui(repo, opts), + ui.expandpath(origin or 'default-relink', origin or 'default')) + if not src.local(): + raise util.Abort('must specify local origin repository') + ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path)) + locallock = repo.lock() + try: + remotelock = src.lock() + try: + candidates = collect(src.store.path, ui) + targets = prune(candidates, repo.store.path, ui) + do_relink(src.store.path, repo.store.path, targets, ui) + finally: + remotelock.release() + finally: + locallock.release() + +def collect(src, ui): seplen = len(os.path.sep) candidates = [] for dirpath, dirnames, filenames in os.walk(src): relpath = dirpath[len(src) + seplen:] for filename in filenames: - if not filename.endswith('.i'): + if not filename[-2:] in ('.d', '.i'): continue st = os.stat(os.path.join(dirpath, filename)) + if not stat.S_ISREG(st.st_mode): + continue candidates.append((os.path.join(relpath, filename), st)) + ui.status(_('collected %d candidate storage files\n') % len(candidates)) return candidates -def prune(candidates, dst): - def getdatafile(path): - if not path.endswith('.i'): - return None, None - df = path[:-1] + 'd' - try: - st = os.stat(df) - except OSError: - return None, None - return df, st - +def prune(candidates, dst, ui): def linkfilter(dst, st): try: ts = os.stat(dst) @@ -57,9 +79,9 @@ def prune(candidates, dst): return False if st.st_dev != ts.st_dev: # No point in continuing - raise Exception('Source and destination are on different devices') + raise util.Abort( + _('source and destination are on different devices')) if st.st_size != ts.st_size: - # TODO: compare revlog heads return False return st @@ -68,15 +90,14 @@ def prune(candidates, dst): tgt = os.path.join(dst, fn) ts = linkfilter(tgt, st) if not ts: + ui.debug(_('not linkable: %s\n') % fn) continue targets.append((fn, ts.st_size)) - df, ts = getdatafile(tgt) - if df: - targets.append((fn[:-1] + 'd', ts.st_size)) + ui.status(_('pruned down to %d probably relinkable files\n') % len(targets)) return targets -def relink(src, dst, files): +def do_relink(src, dst, files, ui): def relinkfile(src, dst): bak = dst + '.bak' os.rename(dst, bak) @@ -91,7 +112,10 @@ def relink(src, dst, files): relinked = 0 savedbytes = 0 + pos = 0 + total = len(files) for f, sz in files: + pos += 1 source = os.path.join(src, f) tgt = os.path.join(dst, f) sfp = file(source) @@ -103,26 +127,23 @@ def relink(src, dst, files): break sin = sfp.read(CHUNKLEN) if sin: + ui.debug(_('not linkable: %s\n') % f) continue try: relinkfile(source, tgt) - print 'Relinked %s' % f + ui.progress(_('relink'), pos, f, _(' files'), total) relinked += 1 savedbytes += sz except OSError, inst: - print '%s: %s' % (tgt, str(inst)) + ui.warn(_('%s: %s\n') % (tgt, str(inst))) - print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes) + ui.status(_('relinked %d files (%d bytes reclaimed)\n') % + (relinked, savedbytes)) -try: - cfg = Config(sys.argv) -except ConfigError, inst: - print str(inst) - usage() - sys.exit(1) - -src = os.path.join(cfg.src, '.hg') -dst = os.path.join(cfg.dst, '.hg') -candidates = collect(src) -targets = prune(candidates, dst) -relink(src, dst, targets) +cmdtable = { + 'relink': ( + relink, + [], + _('[ORIGIN]') + ) +}