##// END OF EJS Templates
relink/progress: Adding progress for collecting stage
timeless -
r11355:9011036b default
parent child Browse files
Show More
@@ -1,164 +1,179 b''
1 1 # Mercurial extension to provide 'hg relink' command
2 2 #
3 3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """recreates hardlinks between repository clones"""
9 9
10 10 from mercurial import cmdutil, hg, util
11 11 from mercurial.i18n import _
12 12 import os, stat
13 13
14 14 def relink(ui, repo, origin=None, **opts):
15 15 """recreate hardlinks between two repositories
16 16
17 17 When repositories are cloned locally, their data files will be
18 18 hardlinked so that they only use the space of a single repository.
19 19
20 20 Unfortunately, subsequent pulls into either repository will break
21 21 hardlinks for any files touched by the new changesets, even if
22 22 both repositories end up pulling the same changes.
23 23
24 24 Similarly, passing --rev to "hg clone" will fail to use any
25 25 hardlinks, falling back to a complete copy of the source
26 26 repository.
27 27
28 28 This command lets you recreate those hardlinks and reclaim that
29 29 wasted space.
30 30
31 31 This repository will be relinked to share space with ORIGIN, which
32 32 must be on the same local disk. If ORIGIN is omitted, looks for
33 33 "default-relink", then "default", in [paths].
34 34
35 35 Do not attempt any read operations on this repository while the
36 36 command is running. (Both repositories will be locked against
37 37 writes.)
38 38 """
39 39 if not hasattr(util, 'samefile') or not hasattr(util, 'samedevice'):
40 40 raise util.Abort(_('hardlinks are not supported on this system'))
41 41 src = hg.repository(
42 42 hg.remoteui(repo, opts),
43 43 ui.expandpath(origin or 'default-relink', origin or 'default'))
44 44 if not src.local():
45 45 raise util.Abort('must specify local origin repository')
46 46 ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path))
47 47 locallock = repo.lock()
48 48 try:
49 49 remotelock = src.lock()
50 50 try:
51 candidates = sorted(collect(src.store.path, ui))
51 candidates = sorted(collect(src, ui))
52 52 targets = prune(candidates, src.store.path, repo.store.path, ui)
53 53 do_relink(src.store.path, repo.store.path, targets, ui)
54 54 finally:
55 55 remotelock.release()
56 56 finally:
57 57 locallock.release()
58 58
59 59 def collect(src, ui):
60 60 seplen = len(os.path.sep)
61 61 candidates = []
62 live = len(src['tip'].manifest())
63 # Your average repository has some files which were deleted before
64 # the tip revision. We account for that by assuming that there are
65 # 3 tracked files for every 2 live files as of the tip version of
66 # the repository.
67 #
68 # mozilla-central as of 2010-06-10 had a ratio of just over 7:5.
69 total = live * 3 // 2
70 src = src.store.path
71 pos = 0
72 ui.status(_("tip has %d files, estimated total number of files: %s\n")
73 % (live, total))
62 74 for dirpath, dirnames, filenames in os.walk(src):
63 75 relpath = dirpath[len(src) + seplen:]
64 76 for filename in filenames:
65 77 if not filename[-2:] in ('.d', '.i'):
66 78 continue
67 79 st = os.stat(os.path.join(dirpath, filename))
68 80 if not stat.S_ISREG(st.st_mode):
69 81 continue
82 pos += 1
70 83 candidates.append((os.path.join(relpath, filename), st))
84 ui.progress(_('collecting'), pos, filename, _('files'), total)
71 85
86 ui.progress(_('collecting'), None)
72 87 ui.status(_('collected %d candidate storage files\n') % len(candidates))
73 88 return candidates
74 89
75 90 def prune(candidates, src, dst, ui):
76 91 def linkfilter(src, dst, st):
77 92 try:
78 93 ts = os.stat(dst)
79 94 except OSError:
80 95 # Destination doesn't have this file?
81 96 return False
82 97 if util.samefile(src, dst):
83 98 return False
84 99 if not util.samedevice(src, dst):
85 100 # No point in continuing
86 101 raise util.Abort(
87 102 _('source and destination are on different devices'))
88 103 if st.st_size != ts.st_size:
89 104 return False
90 105 return st
91 106
92 107 targets = []
93 108 total = len(candidates)
94 109 pos = 0
95 110 for fn, st in candidates:
96 111 pos += 1
97 112 srcpath = os.path.join(src, fn)
98 113 tgt = os.path.join(dst, fn)
99 114 ts = linkfilter(srcpath, tgt, st)
100 115 if not ts:
101 116 ui.debug(_('not linkable: %s\n') % fn)
102 117 continue
103 118 targets.append((fn, ts.st_size))
104 119 ui.progress(_('pruning'), pos, fn, _(' files'), total)
105 120
106 121 ui.progress(_('pruning'), None)
107 122 ui.status(_('pruned down to %d probably relinkable files\n') % len(targets))
108 123 return targets
109 124
110 125 def do_relink(src, dst, files, ui):
111 126 def relinkfile(src, dst):
112 127 bak = dst + '.bak'
113 128 os.rename(dst, bak)
114 129 try:
115 130 util.os_link(src, dst)
116 131 except OSError:
117 132 os.rename(bak, dst)
118 133 raise
119 134 os.remove(bak)
120 135
121 136 CHUNKLEN = 65536
122 137 relinked = 0
123 138 savedbytes = 0
124 139
125 140 pos = 0
126 141 total = len(files)
127 142 for f, sz in files:
128 143 pos += 1
129 144 source = os.path.join(src, f)
130 145 tgt = os.path.join(dst, f)
131 146 # Binary mode, so that read() works correctly, especially on Windows
132 147 sfp = file(source, 'rb')
133 148 dfp = file(tgt, 'rb')
134 149 sin = sfp.read(CHUNKLEN)
135 150 while sin:
136 151 din = dfp.read(CHUNKLEN)
137 152 if sin != din:
138 153 break
139 154 sin = sfp.read(CHUNKLEN)
140 155 sfp.close()
141 156 dfp.close()
142 157 if sin:
143 158 ui.debug(_('not linkable: %s\n') % f)
144 159 continue
145 160 try:
146 161 relinkfile(source, tgt)
147 162 ui.progress(_('relinking'), pos, f, _(' files'), total)
148 163 relinked += 1
149 164 savedbytes += sz
150 165 except OSError, inst:
151 166 ui.warn('%s: %s\n' % (tgt, str(inst)))
152 167
153 168 ui.progress(_('relinking'), None)
154 169
155 170 ui.status(_('relinked %d files (%d bytes reclaimed)\n') %
156 171 (relinked, savedbytes))
157 172
158 173 cmdtable = {
159 174 'relink': (
160 175 relink,
161 176 [],
162 177 _('[ORIGIN]')
163 178 )
164 179 }
@@ -1,27 +1,33 b''
1 1 % create source repository
2 2 adding a
3 3 adding b
4 4 % clone and pull to break links
5 5 requesting all changes
6 6 adding changesets
7 7 adding manifests
8 8 adding file changes
9 9 added 1 changesets with 2 changes to 2 files
10 10 updating to branch default
11 11 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
12 12 created new head
13 13 % relink
14 14 relinking .hg/store
15 tip has 2 files, estimated total number of files: 3
16 collecting: 00changelog.i 1/3 files (33.33%)
17 collecting: 00manifest.i 2/3 files (66.67%)
18 collecting: dummy.i 3/3 files (100.00%)
19 collecting: b.i 4/3 files (133.33%)
20 collecting: a.i 5/3 files (166.67%)
15 21 collected 5 candidate storage files
16 22 not linkable: 00changelog.i
17 23 not linkable: 00manifest.i
18 24 pruning: data/a.i 3/5 files (60.00%)
19 25 not linkable: data/b.i
20 26 pruning: data/dummy.i 5/5 files (100.00%)
21 27 pruned down to 2 probably relinkable files
22 28 relinking: data/a.i 1/2 files (50.00%)
23 29 not linkable: data/dummy.i
24 30 relinked 1 files (136 bytes reclaimed)
25 31 % check hardlinks
26 32 repo/.hg/store/data/a.i == clone/.hg/store/data/a.i
27 33 repo/.hg/store/data/b.i != clone/.hg/store/data/b.i
General Comments 0
You need to be logged in to leave comments. Login now