##// END OF EJS Templates
Issue919: add a standard extension to recreate hardlinks between repositories....
Jesse Glick -
r9729:aa9ccab5 default
parent child Browse files
Show More
@@ -1,128 +1,149
1 #!/usr/bin/env python
1 # Mercurial extension to provide 'hg relink' command
2 #
2 #
3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 import os, sys
8 """recreates hardlinks between repository clones"""
9
9
10 class ConfigError(Exception): pass
10 from mercurial import cmdutil, hg, util
11 from mercurial.i18n import _
12 import os, stat
13
14 def relink(ui, repo, origin=None, **opts):
15 """recreate hardlinks between two repositories
11
16
12 def usage():
17 When repositories are cloned locally, their data files will be hardlinked
13 print """relink <source> <destination>
18 so that they only use the space of a single repository.
14 Recreate hard links between source and destination repositories"""
19
20 Unfortunately, subsequent pulls into either repository will break hardlinks
21 for any files touched by the new changesets, even if both repositories end
22 up pulling the same changes.
23
24 Similarly, passing --rev to "hg clone" will fail to use
25 any hardlinks, falling back to a complete copy of the source repository.
26
27 This command lets you recreate those hardlinks and reclaim that wasted
28 space.
15
29
16 class Config:
30 This repository will be relinked to share space with ORIGIN, which must be
17 def __init__(self, args):
31 on the same local disk. If ORIGIN is omitted, looks for "default-relink",
18 if len(args) != 3:
32 then "default", in [paths].
19 raise ConfigError("wrong number of arguments")
20 self.src = os.path.abspath(args[1])
21 self.dst = os.path.abspath(args[2])
22 for d in (self.src, self.dst):
23 if not os.path.exists(os.path.join(d, '.hg')):
24 raise ConfigError("%s: not a mercurial repository" % d)
25
33
26 def collect(src):
34 Do not attempt any read operations on this repository while the command is
35 running. (Both repositories will be locked against writes.)
36 """
37 src = hg.repository(
38 cmdutil.remoteui(repo, opts),
39 ui.expandpath(origin or 'default-relink', origin or 'default'))
40 if not src.local():
41 raise util.Abort('must specify local origin repository')
42 ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path))
43 locallock = repo.lock()
44 try:
45 remotelock = src.lock()
46 try:
47 candidates = collect(src.store.path, ui)
48 targets = prune(candidates, repo.store.path, ui)
49 do_relink(src.store.path, repo.store.path, targets, ui)
50 finally:
51 remotelock.release()
52 finally:
53 locallock.release()
54
55 def collect(src, ui):
27 seplen = len(os.path.sep)
56 seplen = len(os.path.sep)
28 candidates = []
57 candidates = []
29 for dirpath, dirnames, filenames in os.walk(src):
58 for dirpath, dirnames, filenames in os.walk(src):
30 relpath = dirpath[len(src) + seplen:]
59 relpath = dirpath[len(src) + seplen:]
31 for filename in filenames:
60 for filename in filenames:
32 if not filename.endswith('.i'):
61 if not filename[-2:] in ('.d', '.i'):
33 continue
62 continue
34 st = os.stat(os.path.join(dirpath, filename))
63 st = os.stat(os.path.join(dirpath, filename))
64 if not stat.S_ISREG(st.st_mode):
65 continue
35 candidates.append((os.path.join(relpath, filename), st))
66 candidates.append((os.path.join(relpath, filename), st))
36
67
68 ui.status(_('collected %d candidate storage files\n') % len(candidates))
37 return candidates
69 return candidates
38
70
39 def prune(candidates, dst):
71 def prune(candidates, dst, ui):
40 def getdatafile(path):
41 if not path.endswith('.i'):
42 return None, None
43 df = path[:-1] + 'd'
44 try:
45 st = os.stat(df)
46 except OSError:
47 return None, None
48 return df, st
49
50 def linkfilter(dst, st):
72 def linkfilter(dst, st):
51 try:
73 try:
52 ts = os.stat(dst)
74 ts = os.stat(dst)
53 except OSError:
75 except OSError:
54 # Destination doesn't have this file?
76 # Destination doesn't have this file?
55 return False
77 return False
56 if st.st_ino == ts.st_ino:
78 if st.st_ino == ts.st_ino:
57 return False
79 return False
58 if st.st_dev != ts.st_dev:
80 if st.st_dev != ts.st_dev:
59 # No point in continuing
81 # No point in continuing
60 raise Exception('Source and destination are on different devices')
82 raise util.Abort(
83 _('source and destination are on different devices'))
61 if st.st_size != ts.st_size:
84 if st.st_size != ts.st_size:
62 # TODO: compare revlog heads
63 return False
85 return False
64 return st
86 return st
65
87
66 targets = []
88 targets = []
67 for fn, st in candidates:
89 for fn, st in candidates:
68 tgt = os.path.join(dst, fn)
90 tgt = os.path.join(dst, fn)
69 ts = linkfilter(tgt, st)
91 ts = linkfilter(tgt, st)
70 if not ts:
92 if not ts:
93 ui.debug(_('not linkable: %s\n') % fn)
71 continue
94 continue
72 targets.append((fn, ts.st_size))
95 targets.append((fn, ts.st_size))
73 df, ts = getdatafile(tgt)
74 if df:
75 targets.append((fn[:-1] + 'd', ts.st_size))
76
96
97 ui.status(_('pruned down to %d probably relinkable files\n') % len(targets))
77 return targets
98 return targets
78
99
79 def relink(src, dst, files):
100 def do_relink(src, dst, files, ui):
80 def relinkfile(src, dst):
101 def relinkfile(src, dst):
81 bak = dst + '.bak'
102 bak = dst + '.bak'
82 os.rename(dst, bak)
103 os.rename(dst, bak)
83 try:
104 try:
84 os.link(src, dst)
105 os.link(src, dst)
85 except OSError:
106 except OSError:
86 os.rename(bak, dst)
107 os.rename(bak, dst)
87 raise
108 raise
88 os.remove(bak)
109 os.remove(bak)
89
110
90 CHUNKLEN = 65536
111 CHUNKLEN = 65536
91 relinked = 0
112 relinked = 0
92 savedbytes = 0
113 savedbytes = 0
93
114
115 pos = 0
116 total = len(files)
94 for f, sz in files:
117 for f, sz in files:
118 pos += 1
95 source = os.path.join(src, f)
119 source = os.path.join(src, f)
96 tgt = os.path.join(dst, f)
120 tgt = os.path.join(dst, f)
97 sfp = file(source)
121 sfp = file(source)
98 dfp = file(tgt)
122 dfp = file(tgt)
99 sin = sfp.read(CHUNKLEN)
123 sin = sfp.read(CHUNKLEN)
100 while sin:
124 while sin:
101 din = dfp.read(CHUNKLEN)
125 din = dfp.read(CHUNKLEN)
102 if sin != din:
126 if sin != din:
103 break
127 break
104 sin = sfp.read(CHUNKLEN)
128 sin = sfp.read(CHUNKLEN)
105 if sin:
129 if sin:
130 ui.debug(_('not linkable: %s\n') % f)
106 continue
131 continue
107 try:
132 try:
108 relinkfile(source, tgt)
133 relinkfile(source, tgt)
109 print 'Relinked %s' % f
134 ui.progress(_('relink'), pos, f, _(' files'), total)
110 relinked += 1
135 relinked += 1
111 savedbytes += sz
136 savedbytes += sz
112 except OSError, inst:
137 except OSError, inst:
113 print '%s: %s' % (tgt, str(inst))
138 ui.warn(_('%s: %s\n') % (tgt, str(inst)))
114
139
115 print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
140 ui.status(_('relinked %d files (%d bytes reclaimed)\n') %
141 (relinked, savedbytes))
116
142
117 try:
143 cmdtable = {
118 cfg = Config(sys.argv)
144 'relink': (
119 except ConfigError, inst:
145 relink,
120 print str(inst)
146 [],
121 usage()
147 _('[ORIGIN]')
122 sys.exit(1)
148 )
123
149 }
124 src = os.path.join(cfg.src, '.hg')
125 dst = os.path.join(cfg.dst, '.hg')
126 candidates = collect(src)
127 targets = prune(candidates, dst)
128 relink(src, dst, targets)
General Comments 0
You need to be logged in to leave comments. Login now