##// END OF EJS Templates
convert: add progress support
Patrick Mezard -
r11135:73a4ed3b default
parent child Browse files
Show More
@@ -1,404 +1,409
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import svn_source, svn_sink
14 14 from monotone import monotone_source
15 15 from gnuarch import gnuarch_source
16 16 from bzr import bzr_source
17 17 from p4 import p4_source
18 18 import filemap
19 19
20 20 import os, shutil
21 21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
25 25
26 26 def recode(s):
27 27 if isinstance(s, unicode):
28 28 return s.encode(orig_encoding, 'replace')
29 29 else:
30 30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31 31
32 32 source_converters = [
33 33 ('cvs', convert_cvs, 'branchsort'),
34 34 ('git', convert_git, 'branchsort'),
35 35 ('svn', svn_source, 'branchsort'),
36 36 ('hg', mercurial_source, 'sourcesort'),
37 37 ('darcs', darcs_source, 'branchsort'),
38 38 ('mtn', monotone_source, 'branchsort'),
39 39 ('gnuarch', gnuarch_source, 'branchsort'),
40 40 ('bzr', bzr_source, 'branchsort'),
41 41 ('p4', p4_source, 'branchsort'),
42 42 ]
43 43
44 44 sink_converters = [
45 45 ('hg', mercurial_sink),
46 46 ('svn', svn_sink),
47 47 ]
48 48
49 49 def convertsource(ui, path, type, rev):
50 50 exceptions = []
51 51 if type and type not in [s[0] for s in source_converters]:
52 52 raise util.Abort(_('%s: invalid source repository type') % type)
53 53 for name, source, sortmode in source_converters:
54 54 try:
55 55 if not type or name == type:
56 56 return source(ui, path, rev), sortmode
57 57 except (NoRepo, MissingTool), inst:
58 58 exceptions.append(inst)
59 59 if not ui.quiet:
60 60 for inst in exceptions:
61 61 ui.write("%s\n" % inst)
62 62 raise util.Abort(_('%s: missing or unsupported repository') % path)
63 63
64 64 def convertsink(ui, path, type):
65 65 if type and type not in [s[0] for s in sink_converters]:
66 66 raise util.Abort(_('%s: invalid destination repository type') % type)
67 67 for name, sink in sink_converters:
68 68 try:
69 69 if not type or name == type:
70 70 return sink(ui, path)
71 71 except NoRepo, inst:
72 72 ui.note(_("convert: %s\n") % inst)
73 73 raise util.Abort(_('%s: unknown repository type') % path)
74 74
75 75 class converter(object):
76 76 def __init__(self, ui, source, dest, revmapfile, opts):
77 77
78 78 self.source = source
79 79 self.dest = dest
80 80 self.ui = ui
81 81 self.opts = opts
82 82 self.commitcache = {}
83 83 self.authors = {}
84 84 self.authorfile = None
85 85
86 86 # Record converted revisions persistently: maps source revision
87 87 # ID to target revision ID (both strings). (This is how
88 88 # incremental conversions work.)
89 89 self.map = mapfile(ui, revmapfile)
90 90
91 91 # Read first the dst author map if any
92 92 authorfile = self.dest.authorfile()
93 93 if authorfile and os.path.exists(authorfile):
94 94 self.readauthormap(authorfile)
95 95 # Extend/Override with new author map if necessary
96 96 if opts.get('authors'):
97 97 self.readauthormap(opts.get('authors'))
98 98 self.authorfile = self.dest.authorfile()
99 99
100 100 self.splicemap = mapfile(ui, opts.get('splicemap'))
101 101 self.branchmap = mapfile(ui, opts.get('branchmap'))
102 102
103 103 def walktree(self, heads):
104 104 '''Return a mapping that identifies the uncommitted parents of every
105 105 uncommitted changeset.'''
106 106 visit = heads
107 107 known = set()
108 108 parents = {}
109 109 while visit:
110 110 n = visit.pop(0)
111 111 if n in known or n in self.map:
112 112 continue
113 113 known.add(n)
114 self.ui.progress(_('scanning'), len(known), unit=_('revisions'))
114 115 commit = self.cachecommit(n)
115 116 parents[n] = []
116 117 for p in commit.parents:
117 118 parents[n].append(p)
118 119 visit.append(p)
120 self.ui.progress(_('scanning'), None)
119 121
120 122 return parents
121 123
122 124 def toposort(self, parents, sortmode):
123 125 '''Return an ordering such that every uncommitted changeset is
124 126 preceeded by all its uncommitted ancestors.'''
125 127
126 128 def mapchildren(parents):
127 129 """Return a (children, roots) tuple where 'children' maps parent
128 130 revision identifiers to children ones, and 'roots' is the list of
129 131 revisions without parents. 'parents' must be a mapping of revision
130 132 identifier to its parents ones.
131 133 """
132 134 visit = parents.keys()
133 135 seen = set()
134 136 children = {}
135 137 roots = []
136 138
137 139 while visit:
138 140 n = visit.pop(0)
139 141 if n in seen:
140 142 continue
141 143 seen.add(n)
142 144 # Ensure that nodes without parents are present in the
143 145 # 'children' mapping.
144 146 children.setdefault(n, [])
145 147 hasparent = False
146 148 for p in parents[n]:
147 149 if not p in self.map:
148 150 visit.append(p)
149 151 hasparent = True
150 152 children.setdefault(p, []).append(n)
151 153 if not hasparent:
152 154 roots.append(n)
153 155
154 156 return children, roots
155 157
156 158 # Sort functions are supposed to take a list of revisions which
157 159 # can be converted immediately and pick one
158 160
159 161 def makebranchsorter():
160 162 """If the previously converted revision has a child in the
161 163 eligible revisions list, pick it. Return the list head
162 164 otherwise. Branch sort attempts to minimize branch
163 165 switching, which is harmful for Mercurial backend
164 166 compression.
165 167 """
166 168 prev = [None]
167 169 def picknext(nodes):
168 170 next = nodes[0]
169 171 for n in nodes:
170 172 if prev[0] in parents[n]:
171 173 next = n
172 174 break
173 175 prev[0] = next
174 176 return next
175 177 return picknext
176 178
177 179 def makesourcesorter():
178 180 """Source specific sort."""
179 181 keyfn = lambda n: self.commitcache[n].sortkey
180 182 def picknext(nodes):
181 183 return sorted(nodes, key=keyfn)[0]
182 184 return picknext
183 185
184 186 def makedatesorter():
185 187 """Sort revisions by date."""
186 188 dates = {}
187 189 def getdate(n):
188 190 if n not in dates:
189 191 dates[n] = util.parsedate(self.commitcache[n].date)
190 192 return dates[n]
191 193
192 194 def picknext(nodes):
193 195 return min([(getdate(n), n) for n in nodes])[1]
194 196
195 197 return picknext
196 198
197 199 if sortmode == 'branchsort':
198 200 picknext = makebranchsorter()
199 201 elif sortmode == 'datesort':
200 202 picknext = makedatesorter()
201 203 elif sortmode == 'sourcesort':
202 204 picknext = makesourcesorter()
203 205 else:
204 206 raise util.Abort(_('unknown sort mode: %s') % sortmode)
205 207
206 208 children, actives = mapchildren(parents)
207 209
208 210 s = []
209 211 pendings = {}
210 212 while actives:
211 213 n = picknext(actives)
212 214 actives.remove(n)
213 215 s.append(n)
214 216
215 217 # Update dependents list
216 218 for c in children.get(n, []):
217 219 if c not in pendings:
218 220 pendings[c] = [p for p in parents[c] if p not in self.map]
219 221 try:
220 222 pendings[c].remove(n)
221 223 except ValueError:
222 224 raise util.Abort(_('cycle detected between %s and %s')
223 225 % (recode(c), recode(n)))
224 226 if not pendings[c]:
225 227 # Parents are converted, node is eligible
226 228 actives.insert(0, c)
227 229 pendings[c] = None
228 230
229 231 if len(s) != len(parents):
230 232 raise util.Abort(_("not all revisions were sorted"))
231 233
232 234 return s
233 235
234 236 def writeauthormap(self):
235 237 authorfile = self.authorfile
236 238 if authorfile:
237 239 self.ui.status(_('Writing author map file %s\n') % authorfile)
238 240 ofile = open(authorfile, 'w+')
239 241 for author in self.authors:
240 242 ofile.write("%s=%s\n" % (author, self.authors[author]))
241 243 ofile.close()
242 244
243 245 def readauthormap(self, authorfile):
244 246 afile = open(authorfile, 'r')
245 247 for line in afile:
246 248
247 249 line = line.strip()
248 250 if not line or line.startswith('#'):
249 251 continue
250 252
251 253 try:
252 254 srcauthor, dstauthor = line.split('=', 1)
253 255 except ValueError:
254 256 msg = _('Ignoring bad line in author map file %s: %s\n')
255 257 self.ui.warn(msg % (authorfile, line.rstrip()))
256 258 continue
257 259
258 260 srcauthor = srcauthor.strip()
259 261 dstauthor = dstauthor.strip()
260 262 if self.authors.get(srcauthor) in (None, dstauthor):
261 263 msg = _('mapping author %s to %s\n')
262 264 self.ui.debug(msg % (srcauthor, dstauthor))
263 265 self.authors[srcauthor] = dstauthor
264 266 continue
265 267
266 268 m = _('overriding mapping for author %s, was %s, will be %s\n')
267 269 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
268 270
269 271 afile.close()
270 272
271 273 def cachecommit(self, rev):
272 274 commit = self.source.getcommit(rev)
273 275 commit.author = self.authors.get(commit.author, commit.author)
274 276 commit.branch = self.branchmap.get(commit.branch, commit.branch)
275 277 self.commitcache[rev] = commit
276 278 return commit
277 279
278 280 def copy(self, rev):
279 281 commit = self.commitcache[rev]
280 282
281 283 changes = self.source.getchanges(rev)
282 284 if isinstance(changes, basestring):
283 285 if changes == SKIPREV:
284 286 dest = SKIPREV
285 287 else:
286 288 dest = self.map[changes]
287 289 self.map[rev] = dest
288 290 return
289 291 files, copies = changes
290 292 pbranches = []
291 293 if commit.parents:
292 294 for prev in commit.parents:
293 295 if prev not in self.commitcache:
294 296 self.cachecommit(prev)
295 297 pbranches.append((self.map[prev],
296 298 self.commitcache[prev].branch))
297 299 self.dest.setbranch(commit.branch, pbranches)
298 300 try:
299 301 parents = self.splicemap[rev].replace(',', ' ').split()
300 302 self.ui.status(_('spliced in %s as parents of %s\n') %
301 303 (parents, rev))
302 304 parents = [self.map.get(p, p) for p in parents]
303 305 except KeyError:
304 306 parents = [b[0] for b in pbranches]
305 307 newnode = self.dest.putcommit(files, copies, parents, commit,
306 308 self.source, self.map)
307 309 self.source.converted(rev, newnode)
308 310 self.map[rev] = newnode
309 311
310 312 def convert(self, sortmode):
311 313 try:
312 314 self.source.before()
313 315 self.dest.before()
314 316 self.source.setrevmap(self.map)
315 317 self.ui.status(_("scanning source...\n"))
316 318 heads = self.source.getheads()
317 319 parents = self.walktree(heads)
318 320 self.ui.status(_("sorting...\n"))
319 321 t = self.toposort(parents, sortmode)
320 322 num = len(t)
321 323 c = None
322 324
323 325 self.ui.status(_("converting...\n"))
324 for c in t:
326 for i, c in enumerate(t):
325 327 num -= 1
326 328 desc = self.commitcache[c].desc
327 329 if "\n" in desc:
328 330 desc = desc.splitlines()[0]
329 331 # convert log message to local encoding without using
330 332 # tolocal() because encoding.encoding conver() use it as
331 333 # 'utf-8'
332 334 self.ui.status("%d %s\n" % (num, recode(desc)))
333 335 self.ui.note(_("source: %s\n") % recode(c))
336 self.ui.progress(_('converting'), i, unit=_('revisions'),
337 total=len(t))
334 338 self.copy(c)
339 self.ui.progress(_('converting'), None)
335 340
336 341 tags = self.source.gettags()
337 342 ctags = {}
338 343 for k in tags:
339 344 v = tags[k]
340 345 if self.map.get(v, SKIPREV) != SKIPREV:
341 346 ctags[k] = self.map[v]
342 347
343 348 if c and ctags:
344 349 nrev, tagsparent = self.dest.puttags(ctags)
345 350 if nrev and tagsparent:
346 351 # write another hash correspondence to override the previous
347 352 # one so we don't end up with extra tag heads
348 353 tagsparents = [e for e in self.map.iteritems()
349 354 if e[1] == tagsparent]
350 355 if tagsparents:
351 356 self.map[tagsparents[0][0]] = nrev
352 357
353 358 self.writeauthormap()
354 359 finally:
355 360 self.cleanup()
356 361
357 362 def cleanup(self):
358 363 try:
359 364 self.dest.after()
360 365 finally:
361 366 self.source.after()
362 367 self.map.close()
363 368
364 369 def convert(ui, src, dest=None, revmapfile=None, **opts):
365 370 global orig_encoding
366 371 orig_encoding = encoding.encoding
367 372 encoding.encoding = 'UTF-8'
368 373
369 374 if not dest:
370 375 dest = hg.defaultdest(src) + "-hg"
371 376 ui.status(_("assuming destination %s\n") % dest)
372 377
373 378 destc = convertsink(ui, dest, opts.get('dest_type'))
374 379
375 380 try:
376 381 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
377 382 opts.get('rev'))
378 383 except Exception:
379 384 for path in destc.created:
380 385 shutil.rmtree(path, True)
381 386 raise
382 387
383 388 sortmodes = ('branchsort', 'datesort', 'sourcesort')
384 389 sortmode = [m for m in sortmodes if opts.get(m)]
385 390 if len(sortmode) > 1:
386 391 raise util.Abort(_('more than one sort mode specified'))
387 392 sortmode = sortmode and sortmode[0] or defaultsort
388 393 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
389 394 raise util.Abort(_('--sourcesort is not supported by this data source'))
390 395
391 396 fmap = opts.get('filemap')
392 397 if fmap:
393 398 srcc = filemap.filemap_source(ui, srcc, fmap)
394 399 destc.setfilemapmode(True)
395 400
396 401 if not revmapfile:
397 402 try:
398 403 revmapfile = destc.revmapfile()
399 404 except:
400 405 revmapfile = os.path.join(destc, "map")
401 406
402 407 c = converter(ui, srcc, destc, revmapfile, opts)
403 408 c.convert(sortmode)
404 409
@@ -1,53 +1,70
1 1 #!/bin/sh
2 2
3 3 "$TESTDIR/hghave" svn svn-bindings || exit 80
4 4
5 5 fix_path()
6 6 {
7 7 tr '\\' /
8 8 }
9 9
10 10 echo "[extensions]" >> $HGRCPATH
11 11 echo "convert = " >> $HGRCPATH
12 12 echo "hgext.graphlog =" >> $HGRCPATH
13 13
14 14 svnadmin create svn-repo
15 15 cat "$TESTDIR/svn/move.svndump" | svnadmin load svn-repo > /dev/null
16 16
17 17 svnpath=`pwd | fix_path`
18 18 # SVN wants all paths to start with a slash. Unfortunately,
19 19 # Windows ones don't. Handle that.
20 20 expr "$svnpath" : "\/" > /dev/null
21 21 if [ $? -ne 0 ]; then
22 22 svnpath="/$svnpath"
23 23 fi
24 24 svnurl="file://$svnpath/svn-repo"
25 25
26 26 echo % convert trunk and branches
27 27 hg convert --datesort "$svnurl"/subproject A-hg
28 28
29 29 cd A-hg
30 30 hg glog --template '{rev} {desc|firstline} files: {files}\n'
31 31 echo '% check move copy records'
32 32 hg st --rev 12:13 --copies
33 33 echo '% check branches'
34 34 hg branches | sed 's/:.*/:/'
35 35 cd ..
36 36
37 37 mkdir test-replace
38 38 cd test-replace
39 39 svnadmin create svn-repo
40 40 cat "$TESTDIR/svn/replace.svndump" | svnadmin load svn-repo > /dev/null
41 41
42 42 echo '% convert files being replaced by directories'
43 43 hg convert svn-repo hg-repo
44 44 cd hg-repo
45 45 echo '% manifest before'
46 46 hg -v manifest -r 1
47 47 echo '% manifest after clobber1'
48 48 hg -v manifest -r 2
49 49 echo '% manifest after clobber2'
50 50 hg -v manifest -r 3
51 51 echo '% try updating'
52 52 hg up -qC default
53 53 cd ..
54
55 echo '% test convert progress bar'
56
57 echo "progress=" >> $HGRCPATH
58 echo "[progress]" >> $HGRCPATH
59 echo "assume-tty=1" >> $HGRCPATH
60 echo "delay=0" >> $HGRCPATH
61 echo "refresh=0" >> $HGRCPATH
62
63 cat > filtercr.py <<EOF
64 import sys, re
65 for line in sys.stdin:
66 line = re.sub(r'\r+[^\n]', lambda m: '\n' + m.group()[-1:], line)
67 sys.stdout.write(line)
68 EOF
69
70 hg convert svn-repo hg-progress 2>&1 | python filtercr.py
@@ -1,82 +1,105
1 1 % convert trunk and branches
2 2 initializing destination A-hg repository
3 3 scanning source...
4 4 sorting...
5 5 converting...
6 6 13 createtrunk
7 7 12 moved1
8 8 11 moved1
9 9 10 moved2
10 10 9 changeb and rm d2
11 11 8 changeb and rm d2
12 12 7 moved1again
13 13 6 moved1again
14 14 5 copyfilefrompast
15 15 4 copydirfrompast
16 16 3 add d3
17 17 2 copy dir and remove subdir
18 18 1 add d4old
19 19 0 rename d4old into d4new
20 20 o 13 rename d4old into d4new files: d4new/g d4old/g
21 21 |
22 22 o 12 add d4old files: d4old/g
23 23 |
24 24 o 11 copy dir and remove subdir files: d3/d31/e d4/d31/e d4/f
25 25 |
26 26 o 10 add d3 files: d3/d31/e d3/f
27 27 |
28 28 o 9 copydirfrompast files: d2/d
29 29 |
30 30 o 8 copyfilefrompast files: d
31 31 |
32 32 o 7 moved1again files: d1/b d1/c
33 33 |
34 34 | o 6 moved1again files:
35 35 | |
36 36 o | 5 changeb and rm d2 files: d1/b d2/d
37 37 | |
38 38 | o 4 changeb and rm d2 files: b
39 39 | |
40 40 o | 3 moved2 files: d2/d
41 41 | |
42 42 o | 2 moved1 files: d1/b d1/c
43 43 | |
44 44 | o 1 moved1 files: b c
45 45 |
46 46 o 0 createtrunk files:
47 47
48 48 % check move copy records
49 49 A d4new/g
50 50 d4old/g
51 51 R d4old/g
52 52 % check branches
53 53 default 13:
54 54 d1 6:
55 55 % convert files being replaced by directories
56 56 initializing destination hg-repo repository
57 57 scanning source...
58 58 sorting...
59 59 converting...
60 60 3 initial
61 61 2 clobber symlink
62 62 1 clobber1
63 63 0 clobber2
64 64 % manifest before
65 65 644 a
66 66 644 d/b
67 67 644 @ dlink
68 68 644 @ dlink2
69 69 644 dlink3
70 70 % manifest after clobber1
71 71 644 a/b
72 72 644 d/b
73 73 644 dlink/b
74 74 644 @ dlink2
75 75 644 dlink3
76 76 % manifest after clobber2
77 77 644 a/b
78 78 644 d/b
79 79 644 dlink/b
80 80 644 @ dlink2
81 81 644 @ dlink3
82 82 % try updating
83 % test convert progress bar
84
85 scanning [ <=> ] 1
86 scanning [ <=> ] 2
87 scanning [ <=> ] 3
88 scanning [ <=> ] 4
89
90 converting [ ] 0/4
91
92 converting [==============> ] 1/4
93
94 converting [==============================> ] 2/4
95
96 converting [=============================================> ] 3/4
97
98 initializing destination hg-progress repository
99 scanning source...
100 sorting...
101 converting...
102 3 initial
103 2 clobber symlink
104 1 clobber1
105 0 clobber2
General Comments 0
You need to be logged in to leave comments. Login now