Show More
@@ -1,128 +1,149 | |||||
1 | #!/usr/bin/env python |
|
1 | # Mercurial extension to provide 'hg relink' command | |
2 | # |
|
2 | # | |
3 | # Copyright (C) 2007 Brendan Cully <brendan@kublai.com> |
|
3 | # Copyright (C) 2007 Brendan Cully <brendan@kublai.com> | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2, incorporated herein by reference. |
|
6 | # GNU General Public License version 2, incorporated herein by reference. | |
7 |
|
7 | |||
8 | import os, sys |
|
8 | """recreates hardlinks between repository clones""" | |
9 |
|
9 | |||
10 | class ConfigError(Exception): pass |
|
10 | from mercurial import cmdutil, hg, util | |
|
11 | from mercurial.i18n import _ | |||
|
12 | import os, stat | |||
|
13 | ||||
|
14 | def relink(ui, repo, origin=None, **opts): | |||
|
15 | """recreate hardlinks between two repositories | |||
11 |
|
16 | |||
12 | def usage(): |
|
17 | When repositories are cloned locally, their data files will be hardlinked | |
13 | print """relink <source> <destination> |
|
18 | so that they only use the space of a single repository. | |
14 | Recreate hard links between source and destination repositories""" |
|
19 | ||
|
20 | Unfortunately, subsequent pulls into either repository will break hardlinks | |||
|
21 | for any files touched by the new changesets, even if both repositories end | |||
|
22 | up pulling the same changes. | |||
|
23 | ||||
|
24 | Similarly, passing --rev to "hg clone" will fail to use | |||
|
25 | any hardlinks, falling back to a complete copy of the source repository. | |||
|
26 | ||||
|
27 | This command lets you recreate those hardlinks and reclaim that wasted | |||
|
28 | space. | |||
15 |
|
29 | |||
16 | class Config: |
|
30 | This repository will be relinked to share space with ORIGIN, which must be | |
17 | def __init__(self, args): |
|
31 | on the same local disk. If ORIGIN is omitted, looks for "default-relink", | |
18 | if len(args) != 3: |
|
32 | then "default", in [paths]. | |
19 | raise ConfigError("wrong number of arguments") |
|
|||
20 | self.src = os.path.abspath(args[1]) |
|
|||
21 | self.dst = os.path.abspath(args[2]) |
|
|||
22 | for d in (self.src, self.dst): |
|
|||
23 | if not os.path.exists(os.path.join(d, '.hg')): |
|
|||
24 | raise ConfigError("%s: not a mercurial repository" % d) |
|
|||
25 |
|
33 | |||
26 | def collect(src): |
|
34 | Do not attempt any read operations on this repository while the command is | |
|
35 | running. (Both repositories will be locked against writes.) | |||
|
36 | """ | |||
|
37 | src = hg.repository( | |||
|
38 | cmdutil.remoteui(repo, opts), | |||
|
39 | ui.expandpath(origin or 'default-relink', origin or 'default')) | |||
|
40 | if not src.local(): | |||
|
41 | raise util.Abort('must specify local origin repository') | |||
|
42 | ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path)) | |||
|
43 | locallock = repo.lock() | |||
|
44 | try: | |||
|
45 | remotelock = src.lock() | |||
|
46 | try: | |||
|
47 | candidates = collect(src.store.path, ui) | |||
|
48 | targets = prune(candidates, repo.store.path, ui) | |||
|
49 | do_relink(src.store.path, repo.store.path, targets, ui) | |||
|
50 | finally: | |||
|
51 | remotelock.release() | |||
|
52 | finally: | |||
|
53 | locallock.release() | |||
|
54 | ||||
|
55 | def collect(src, ui): | |||
27 | seplen = len(os.path.sep) |
|
56 | seplen = len(os.path.sep) | |
28 | candidates = [] |
|
57 | candidates = [] | |
29 | for dirpath, dirnames, filenames in os.walk(src): |
|
58 | for dirpath, dirnames, filenames in os.walk(src): | |
30 | relpath = dirpath[len(src) + seplen:] |
|
59 | relpath = dirpath[len(src) + seplen:] | |
31 | for filename in filenames: |
|
60 | for filename in filenames: | |
32 |
if not filename |
|
61 | if not filename[-2:] in ('.d', '.i'): | |
33 | continue |
|
62 | continue | |
34 | st = os.stat(os.path.join(dirpath, filename)) |
|
63 | st = os.stat(os.path.join(dirpath, filename)) | |
|
64 | if not stat.S_ISREG(st.st_mode): | |||
|
65 | continue | |||
35 | candidates.append((os.path.join(relpath, filename), st)) |
|
66 | candidates.append((os.path.join(relpath, filename), st)) | |
36 |
|
67 | |||
|
68 | ui.status(_('collected %d candidate storage files\n') % len(candidates)) | |||
37 | return candidates |
|
69 | return candidates | |
38 |
|
70 | |||
39 | def prune(candidates, dst): |
|
71 | def prune(candidates, dst, ui): | |
40 | def getdatafile(path): |
|
|||
41 | if not path.endswith('.i'): |
|
|||
42 | return None, None |
|
|||
43 | df = path[:-1] + 'd' |
|
|||
44 | try: |
|
|||
45 | st = os.stat(df) |
|
|||
46 | except OSError: |
|
|||
47 | return None, None |
|
|||
48 | return df, st |
|
|||
49 |
|
||||
50 | def linkfilter(dst, st): |
|
72 | def linkfilter(dst, st): | |
51 | try: |
|
73 | try: | |
52 | ts = os.stat(dst) |
|
74 | ts = os.stat(dst) | |
53 | except OSError: |
|
75 | except OSError: | |
54 | # Destination doesn't have this file? |
|
76 | # Destination doesn't have this file? | |
55 | return False |
|
77 | return False | |
56 | if st.st_ino == ts.st_ino: |
|
78 | if st.st_ino == ts.st_ino: | |
57 | return False |
|
79 | return False | |
58 | if st.st_dev != ts.st_dev: |
|
80 | if st.st_dev != ts.st_dev: | |
59 | # No point in continuing |
|
81 | # No point in continuing | |
60 | raise Exception('Source and destination are on different devices') |
|
82 | raise util.Abort( | |
|
83 | _('source and destination are on different devices')) | |||
61 | if st.st_size != ts.st_size: |
|
84 | if st.st_size != ts.st_size: | |
62 | # TODO: compare revlog heads |
|
|||
63 | return False |
|
85 | return False | |
64 | return st |
|
86 | return st | |
65 |
|
87 | |||
66 | targets = [] |
|
88 | targets = [] | |
67 | for fn, st in candidates: |
|
89 | for fn, st in candidates: | |
68 | tgt = os.path.join(dst, fn) |
|
90 | tgt = os.path.join(dst, fn) | |
69 | ts = linkfilter(tgt, st) |
|
91 | ts = linkfilter(tgt, st) | |
70 | if not ts: |
|
92 | if not ts: | |
|
93 | ui.debug(_('not linkable: %s\n') % fn) | |||
71 | continue |
|
94 | continue | |
72 | targets.append((fn, ts.st_size)) |
|
95 | targets.append((fn, ts.st_size)) | |
73 | df, ts = getdatafile(tgt) |
|
|||
74 | if df: |
|
|||
75 | targets.append((fn[:-1] + 'd', ts.st_size)) |
|
|||
76 |
|
96 | |||
|
97 | ui.status(_('pruned down to %d probably relinkable files\n') % len(targets)) | |||
77 | return targets |
|
98 | return targets | |
78 |
|
99 | |||
79 | def relink(src, dst, files): |
|
100 | def do_relink(src, dst, files, ui): | |
80 | def relinkfile(src, dst): |
|
101 | def relinkfile(src, dst): | |
81 | bak = dst + '.bak' |
|
102 | bak = dst + '.bak' | |
82 | os.rename(dst, bak) |
|
103 | os.rename(dst, bak) | |
83 | try: |
|
104 | try: | |
84 | os.link(src, dst) |
|
105 | os.link(src, dst) | |
85 | except OSError: |
|
106 | except OSError: | |
86 | os.rename(bak, dst) |
|
107 | os.rename(bak, dst) | |
87 | raise |
|
108 | raise | |
88 | os.remove(bak) |
|
109 | os.remove(bak) | |
89 |
|
110 | |||
90 | CHUNKLEN = 65536 |
|
111 | CHUNKLEN = 65536 | |
91 | relinked = 0 |
|
112 | relinked = 0 | |
92 | savedbytes = 0 |
|
113 | savedbytes = 0 | |
93 |
|
114 | |||
|
115 | pos = 0 | |||
|
116 | total = len(files) | |||
94 | for f, sz in files: |
|
117 | for f, sz in files: | |
|
118 | pos += 1 | |||
95 | source = os.path.join(src, f) |
|
119 | source = os.path.join(src, f) | |
96 | tgt = os.path.join(dst, f) |
|
120 | tgt = os.path.join(dst, f) | |
97 | sfp = file(source) |
|
121 | sfp = file(source) | |
98 | dfp = file(tgt) |
|
122 | dfp = file(tgt) | |
99 | sin = sfp.read(CHUNKLEN) |
|
123 | sin = sfp.read(CHUNKLEN) | |
100 | while sin: |
|
124 | while sin: | |
101 | din = dfp.read(CHUNKLEN) |
|
125 | din = dfp.read(CHUNKLEN) | |
102 | if sin != din: |
|
126 | if sin != din: | |
103 | break |
|
127 | break | |
104 | sin = sfp.read(CHUNKLEN) |
|
128 | sin = sfp.read(CHUNKLEN) | |
105 | if sin: |
|
129 | if sin: | |
|
130 | ui.debug(_('not linkable: %s\n') % f) | |||
106 | continue |
|
131 | continue | |
107 | try: |
|
132 | try: | |
108 | relinkfile(source, tgt) |
|
133 | relinkfile(source, tgt) | |
109 | print 'Relinked %s' % f |
|
134 | ui.progress(_('relink'), pos, f, _(' files'), total) | |
110 | relinked += 1 |
|
135 | relinked += 1 | |
111 | savedbytes += sz |
|
136 | savedbytes += sz | |
112 | except OSError, inst: |
|
137 | except OSError, inst: | |
113 |
|
|
138 | ui.warn(_('%s: %s\n') % (tgt, str(inst))) | |
114 |
|
139 | |||
115 |
|
|
140 | ui.status(_('relinked %d files (%d bytes reclaimed)\n') % | |
|
141 | (relinked, savedbytes)) | |||
116 |
|
142 | |||
117 | try: |
|
143 | cmdtable = { | |
118 | cfg = Config(sys.argv) |
|
144 | 'relink': ( | |
119 | except ConfigError, inst: |
|
145 | relink, | |
120 | print str(inst) |
|
146 | [], | |
121 | usage() |
|
147 | _('[ORIGIN]') | |
122 | sys.exit(1) |
|
148 | ) | |
123 |
|
149 | } | ||
124 | src = os.path.join(cfg.src, '.hg') |
|
|||
125 | dst = os.path.join(cfg.dst, '.hg') |
|
|||
126 | candidates = collect(src) |
|
|||
127 | targets = prune(candidates, dst) |
|
|||
128 | relink(src, dst, targets) |
|
General Comments 0
You need to be logged in to leave comments.
Login now