##// END OF EJS Templates
Issue919: add a standard extension to recreate hardlinks between repositories....
Jesse Glick -
r9729:aa9ccab5 default
parent child Browse files
Show More
@@ -1,52 +1,74 b''
1 #!/usr/bin/env python
1 # Mercurial extension to provide 'hg relink' command
2 2 #
3 3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 import os, sys
8 """recreates hardlinks between repository clones"""
9 9
10 class ConfigError(Exception): pass
10 from mercurial import cmdutil, hg, util
11 from mercurial.i18n import _
12 import os, stat
13
14 def relink(ui, repo, origin=None, **opts):
15 """recreate hardlinks between two repositories
11 16
12 def usage():
13 print """relink <source> <destination>
14 Recreate hard links between source and destination repositories"""
17 When repositories are cloned locally, their data files will be hardlinked
18 so that they only use the space of a single repository.
19
20 Unfortunately, subsequent pulls into either repository will break hardlinks
21 for any files touched by the new changesets, even if both repositories end
22 up pulling the same changes.
23
24 Similarly, passing --rev to "hg clone" will fail to use
25 any hardlinks, falling back to a complete copy of the source repository.
26
27 This command lets you recreate those hardlinks and reclaim that wasted
28 space.
15 29
16 class Config:
17 def __init__(self, args):
18 if len(args) != 3:
19 raise ConfigError("wrong number of arguments")
20 self.src = os.path.abspath(args[1])
21 self.dst = os.path.abspath(args[2])
22 for d in (self.src, self.dst):
23 if not os.path.exists(os.path.join(d, '.hg')):
24 raise ConfigError("%s: not a mercurial repository" % d)
30 This repository will be relinked to share space with ORIGIN, which must be
31 on the same local disk. If ORIGIN is omitted, looks for "default-relink",
32 then "default", in [paths].
25 33
26 def collect(src):
34 Do not attempt any read operations on this repository while the command is
35 running. (Both repositories will be locked against writes.)
36 """
37 src = hg.repository(
38 cmdutil.remoteui(repo, opts),
39 ui.expandpath(origin or 'default-relink', origin or 'default'))
40 if not src.local():
41 raise util.Abort('must specify local origin repository')
42 ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path))
43 locallock = repo.lock()
44 try:
45 remotelock = src.lock()
46 try:
47 candidates = collect(src.store.path, ui)
48 targets = prune(candidates, repo.store.path, ui)
49 do_relink(src.store.path, repo.store.path, targets, ui)
50 finally:
51 remotelock.release()
52 finally:
53 locallock.release()
54
55 def collect(src, ui):
27 56 seplen = len(os.path.sep)
28 57 candidates = []
29 58 for dirpath, dirnames, filenames in os.walk(src):
30 59 relpath = dirpath[len(src) + seplen:]
31 60 for filename in filenames:
32 if not filename.endswith('.i'):
61 if not filename[-2:] in ('.d', '.i'):
33 62 continue
34 63 st = os.stat(os.path.join(dirpath, filename))
64 if not stat.S_ISREG(st.st_mode):
65 continue
35 66 candidates.append((os.path.join(relpath, filename), st))
36 67
68 ui.status(_('collected %d candidate storage files\n') % len(candidates))
37 69 return candidates
38 70
39 def prune(candidates, dst):
40 def getdatafile(path):
41 if not path.endswith('.i'):
42 return None, None
43 df = path[:-1] + 'd'
44 try:
45 st = os.stat(df)
46 except OSError:
47 return None, None
48 return df, st
49
71 def prune(candidates, dst, ui):
50 72 def linkfilter(dst, st):
51 73 try:
52 74 ts = os.stat(dst)
@@ -57,9 +79,9 b' def prune(candidates, dst):'
57 79 return False
58 80 if st.st_dev != ts.st_dev:
59 81 # No point in continuing
60 raise Exception('Source and destination are on different devices')
82 raise util.Abort(
83 _('source and destination are on different devices'))
61 84 if st.st_size != ts.st_size:
62 # TODO: compare revlog heads
63 85 return False
64 86 return st
65 87
@@ -68,15 +90,14 b' def prune(candidates, dst):'
68 90 tgt = os.path.join(dst, fn)
69 91 ts = linkfilter(tgt, st)
70 92 if not ts:
93 ui.debug(_('not linkable: %s\n') % fn)
71 94 continue
72 95 targets.append((fn, ts.st_size))
73 df, ts = getdatafile(tgt)
74 if df:
75 targets.append((fn[:-1] + 'd', ts.st_size))
76 96
97 ui.status(_('pruned down to %d probably relinkable files\n') % len(targets))
77 98 return targets
78 99
79 def relink(src, dst, files):
100 def do_relink(src, dst, files, ui):
80 101 def relinkfile(src, dst):
81 102 bak = dst + '.bak'
82 103 os.rename(dst, bak)
@@ -91,7 +112,10 b' def relink(src, dst, files):'
91 112 relinked = 0
92 113 savedbytes = 0
93 114
115 pos = 0
116 total = len(files)
94 117 for f, sz in files:
118 pos += 1
95 119 source = os.path.join(src, f)
96 120 tgt = os.path.join(dst, f)
97 121 sfp = file(source)
@@ -103,26 +127,23 b' def relink(src, dst, files):'
103 127 break
104 128 sin = sfp.read(CHUNKLEN)
105 129 if sin:
130 ui.debug(_('not linkable: %s\n') % f)
106 131 continue
107 132 try:
108 133 relinkfile(source, tgt)
109 print 'Relinked %s' % f
134 ui.progress(_('relink'), pos, f, _(' files'), total)
110 135 relinked += 1
111 136 savedbytes += sz
112 137 except OSError, inst:
113 print '%s: %s' % (tgt, str(inst))
138 ui.warn(_('%s: %s\n') % (tgt, str(inst)))
114 139
115 print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
140 ui.status(_('relinked %d files (%d bytes reclaimed)\n') %
141 (relinked, savedbytes))
116 142
117 try:
118 cfg = Config(sys.argv)
119 except ConfigError, inst:
120 print str(inst)
121 usage()
122 sys.exit(1)
123
124 src = os.path.join(cfg.src, '.hg')
125 dst = os.path.join(cfg.dst, '.hg')
126 candidates = collect(src)
127 targets = prune(candidates, dst)
128 relink(src, dst, targets)
143 cmdtable = {
144 'relink': (
145 relink,
146 [],
147 _('[ORIGIN]')
148 )
149 }
General Comments 0
You need to be logged in to leave comments. Login now