##// END OF EJS Templates
convert: process splicemap in sorted order
Mads Kiilerich -
r18372:5965997b default
parent child Browse files
Show More
@@ -1,470 +1,470
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import svn_source, svn_sink
14 14 from monotone import monotone_source
15 15 from gnuarch import gnuarch_source
16 16 from bzr import bzr_source
17 17 from p4 import p4_source
18 18 import filemap, common
19 19
20 20 import os, shutil
21 21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
25 25
26 26 def recode(s):
27 27 if isinstance(s, unicode):
28 28 return s.encode(orig_encoding, 'replace')
29 29 else:
30 30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31 31
32 32 source_converters = [
33 33 ('cvs', convert_cvs, 'branchsort'),
34 34 ('git', convert_git, 'branchsort'),
35 35 ('svn', svn_source, 'branchsort'),
36 36 ('hg', mercurial_source, 'sourcesort'),
37 37 ('darcs', darcs_source, 'branchsort'),
38 38 ('mtn', monotone_source, 'branchsort'),
39 39 ('gnuarch', gnuarch_source, 'branchsort'),
40 40 ('bzr', bzr_source, 'branchsort'),
41 41 ('p4', p4_source, 'branchsort'),
42 42 ]
43 43
44 44 sink_converters = [
45 45 ('hg', mercurial_sink),
46 46 ('svn', svn_sink),
47 47 ]
48 48
49 49 def convertsource(ui, path, type, rev):
50 50 exceptions = []
51 51 if type and type not in [s[0] for s in source_converters]:
52 52 raise util.Abort(_('%s: invalid source repository type') % type)
53 53 for name, source, sortmode in source_converters:
54 54 try:
55 55 if not type or name == type:
56 56 return source(ui, path, rev), sortmode
57 57 except (NoRepo, MissingTool), inst:
58 58 exceptions.append(inst)
59 59 if not ui.quiet:
60 60 for inst in exceptions:
61 61 ui.write("%s\n" % inst)
62 62 raise util.Abort(_('%s: missing or unsupported repository') % path)
63 63
64 64 def convertsink(ui, path, type):
65 65 if type and type not in [s[0] for s in sink_converters]:
66 66 raise util.Abort(_('%s: invalid destination repository type') % type)
67 67 for name, sink in sink_converters:
68 68 try:
69 69 if not type or name == type:
70 70 return sink(ui, path)
71 71 except NoRepo, inst:
72 72 ui.note(_("convert: %s\n") % inst)
73 73 except MissingTool, inst:
74 74 raise util.Abort('%s\n' % inst)
75 75 raise util.Abort(_('%s: unknown repository type') % path)
76 76
77 77 class progresssource(object):
78 78 def __init__(self, ui, source, filecount):
79 79 self.ui = ui
80 80 self.source = source
81 81 self.filecount = filecount
82 82 self.retrieved = 0
83 83
84 84 def getfile(self, file, rev):
85 85 self.retrieved += 1
86 86 self.ui.progress(_('getting files'), self.retrieved,
87 87 item=file, total=self.filecount)
88 88 return self.source.getfile(file, rev)
89 89
90 90 def lookuprev(self, rev):
91 91 return self.source.lookuprev(rev)
92 92
93 93 def close(self):
94 94 self.ui.progress(_('getting files'), None)
95 95
96 96 class converter(object):
97 97 def __init__(self, ui, source, dest, revmapfile, opts):
98 98
99 99 self.source = source
100 100 self.dest = dest
101 101 self.ui = ui
102 102 self.opts = opts
103 103 self.commitcache = {}
104 104 self.authors = {}
105 105 self.authorfile = None
106 106
107 107 # Record converted revisions persistently: maps source revision
108 108 # ID to target revision ID (both strings). (This is how
109 109 # incremental conversions work.)
110 110 self.map = mapfile(ui, revmapfile)
111 111
112 112 # Read first the dst author map if any
113 113 authorfile = self.dest.authorfile()
114 114 if authorfile and os.path.exists(authorfile):
115 115 self.readauthormap(authorfile)
116 116 # Extend/Override with new author map if necessary
117 117 if opts.get('authormap'):
118 118 self.readauthormap(opts.get('authormap'))
119 119 self.authorfile = self.dest.authorfile()
120 120
121 121 self.splicemap = common.parsesplicemap(opts.get('splicemap'))
122 122 self.branchmap = mapfile(ui, opts.get('branchmap'))
123 123
124 124 def walktree(self, heads):
125 125 '''Return a mapping that identifies the uncommitted parents of every
126 126 uncommitted changeset.'''
127 127 visit = heads
128 128 known = set()
129 129 parents = {}
130 130 while visit:
131 131 n = visit.pop(0)
132 132 if n in known or n in self.map:
133 133 continue
134 134 known.add(n)
135 135 self.ui.progress(_('scanning'), len(known), unit=_('revisions'))
136 136 commit = self.cachecommit(n)
137 137 parents[n] = []
138 138 for p in commit.parents:
139 139 parents[n].append(p)
140 140 visit.append(p)
141 141 self.ui.progress(_('scanning'), None)
142 142
143 143 return parents
144 144
145 145 def mergesplicemap(self, parents, splicemap):
146 146 """A splicemap redefines child/parent relationships. Check the
147 147 map contains valid revision identifiers and merge the new
148 148 links in the source graph.
149 149 """
150 for c in splicemap:
150 for c in sorted(splicemap):
151 151 if c not in parents:
152 152 if not self.dest.hascommit(self.map.get(c, c)):
153 153 # Could be in source but not converted during this run
154 154 self.ui.warn(_('splice map revision %s is not being '
155 155 'converted, ignoring\n') % c)
156 156 continue
157 157 pc = []
158 158 for p in splicemap[c]:
159 159 # We do not have to wait for nodes already in dest.
160 160 if self.dest.hascommit(self.map.get(p, p)):
161 161 continue
162 162 # Parent is not in dest and not being converted, not good
163 163 if p not in parents:
164 164 raise util.Abort(_('unknown splice map parent: %s') % p)
165 165 pc.append(p)
166 166 parents[c] = pc
167 167
168 168 def toposort(self, parents, sortmode):
169 169 '''Return an ordering such that every uncommitted changeset is
170 170 preceded by all its uncommitted ancestors.'''
171 171
172 172 def mapchildren(parents):
173 173 """Return a (children, roots) tuple where 'children' maps parent
174 174 revision identifiers to children ones, and 'roots' is the list of
175 175 revisions without parents. 'parents' must be a mapping of revision
176 176 identifier to its parents ones.
177 177 """
178 178 visit = parents.keys()
179 179 seen = set()
180 180 children = {}
181 181 roots = []
182 182
183 183 while visit:
184 184 n = visit.pop(0)
185 185 if n in seen:
186 186 continue
187 187 seen.add(n)
188 188 # Ensure that nodes without parents are present in the
189 189 # 'children' mapping.
190 190 children.setdefault(n, [])
191 191 hasparent = False
192 192 for p in parents[n]:
193 193 if p not in self.map:
194 194 visit.append(p)
195 195 hasparent = True
196 196 children.setdefault(p, []).append(n)
197 197 if not hasparent:
198 198 roots.append(n)
199 199
200 200 return children, roots
201 201
202 202 # Sort functions are supposed to take a list of revisions which
203 203 # can be converted immediately and pick one
204 204
205 205 def makebranchsorter():
206 206 """If the previously converted revision has a child in the
207 207 eligible revisions list, pick it. Return the list head
208 208 otherwise. Branch sort attempts to minimize branch
209 209 switching, which is harmful for Mercurial backend
210 210 compression.
211 211 """
212 212 prev = [None]
213 213 def picknext(nodes):
214 214 next = nodes[0]
215 215 for n in nodes:
216 216 if prev[0] in parents[n]:
217 217 next = n
218 218 break
219 219 prev[0] = next
220 220 return next
221 221 return picknext
222 222
223 223 def makesourcesorter():
224 224 """Source specific sort."""
225 225 keyfn = lambda n: self.commitcache[n].sortkey
226 226 def picknext(nodes):
227 227 return sorted(nodes, key=keyfn)[0]
228 228 return picknext
229 229
230 230 def makedatesorter():
231 231 """Sort revisions by date."""
232 232 dates = {}
233 233 def getdate(n):
234 234 if n not in dates:
235 235 dates[n] = util.parsedate(self.commitcache[n].date)
236 236 return dates[n]
237 237
238 238 def picknext(nodes):
239 239 return min([(getdate(n), n) for n in nodes])[1]
240 240
241 241 return picknext
242 242
243 243 if sortmode == 'branchsort':
244 244 picknext = makebranchsorter()
245 245 elif sortmode == 'datesort':
246 246 picknext = makedatesorter()
247 247 elif sortmode == 'sourcesort':
248 248 picknext = makesourcesorter()
249 249 else:
250 250 raise util.Abort(_('unknown sort mode: %s') % sortmode)
251 251
252 252 children, actives = mapchildren(parents)
253 253
254 254 s = []
255 255 pendings = {}
256 256 while actives:
257 257 n = picknext(actives)
258 258 actives.remove(n)
259 259 s.append(n)
260 260
261 261 # Update dependents list
262 262 for c in children.get(n, []):
263 263 if c not in pendings:
264 264 pendings[c] = [p for p in parents[c] if p not in self.map]
265 265 try:
266 266 pendings[c].remove(n)
267 267 except ValueError:
268 268 raise util.Abort(_('cycle detected between %s and %s')
269 269 % (recode(c), recode(n)))
270 270 if not pendings[c]:
271 271 # Parents are converted, node is eligible
272 272 actives.insert(0, c)
273 273 pendings[c] = None
274 274
275 275 if len(s) != len(parents):
276 276 raise util.Abort(_("not all revisions were sorted"))
277 277
278 278 return s
279 279
280 280 def writeauthormap(self):
281 281 authorfile = self.authorfile
282 282 if authorfile:
283 283 self.ui.status(_('writing author map file %s\n') % authorfile)
284 284 ofile = open(authorfile, 'w+')
285 285 for author in self.authors:
286 286 ofile.write("%s=%s\n" % (author, self.authors[author]))
287 287 ofile.close()
288 288
289 289 def readauthormap(self, authorfile):
290 290 afile = open(authorfile, 'r')
291 291 for line in afile:
292 292
293 293 line = line.strip()
294 294 if not line or line.startswith('#'):
295 295 continue
296 296
297 297 try:
298 298 srcauthor, dstauthor = line.split('=', 1)
299 299 except ValueError:
300 300 msg = _('ignoring bad line in author map file %s: %s\n')
301 301 self.ui.warn(msg % (authorfile, line.rstrip()))
302 302 continue
303 303
304 304 srcauthor = srcauthor.strip()
305 305 dstauthor = dstauthor.strip()
306 306 if self.authors.get(srcauthor) in (None, dstauthor):
307 307 msg = _('mapping author %s to %s\n')
308 308 self.ui.debug(msg % (srcauthor, dstauthor))
309 309 self.authors[srcauthor] = dstauthor
310 310 continue
311 311
312 312 m = _('overriding mapping for author %s, was %s, will be %s\n')
313 313 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
314 314
315 315 afile.close()
316 316
317 317 def cachecommit(self, rev):
318 318 commit = self.source.getcommit(rev)
319 319 commit.author = self.authors.get(commit.author, commit.author)
320 320 commit.branch = self.branchmap.get(commit.branch, commit.branch)
321 321 self.commitcache[rev] = commit
322 322 return commit
323 323
324 324 def copy(self, rev):
325 325 commit = self.commitcache[rev]
326 326
327 327 changes = self.source.getchanges(rev)
328 328 if isinstance(changes, basestring):
329 329 if changes == SKIPREV:
330 330 dest = SKIPREV
331 331 else:
332 332 dest = self.map[changes]
333 333 self.map[rev] = dest
334 334 return
335 335 files, copies = changes
336 336 pbranches = []
337 337 if commit.parents:
338 338 for prev in commit.parents:
339 339 if prev not in self.commitcache:
340 340 self.cachecommit(prev)
341 341 pbranches.append((self.map[prev],
342 342 self.commitcache[prev].branch))
343 343 self.dest.setbranch(commit.branch, pbranches)
344 344 try:
345 345 parents = self.splicemap[rev]
346 346 self.ui.status(_('spliced in %s as parents of %s\n') %
347 347 (parents, rev))
348 348 parents = [self.map.get(p, p) for p in parents]
349 349 except KeyError:
350 350 parents = [b[0] for b in pbranches]
351 351 source = progresssource(self.ui, self.source, len(files))
352 352 newnode = self.dest.putcommit(files, copies, parents, commit,
353 353 source, self.map)
354 354 source.close()
355 355 self.source.converted(rev, newnode)
356 356 self.map[rev] = newnode
357 357
358 358 def convert(self, sortmode):
359 359 try:
360 360 self.source.before()
361 361 self.dest.before()
362 362 self.source.setrevmap(self.map)
363 363 self.ui.status(_("scanning source...\n"))
364 364 heads = self.source.getheads()
365 365 parents = self.walktree(heads)
366 366 self.mergesplicemap(parents, self.splicemap)
367 367 self.ui.status(_("sorting...\n"))
368 368 t = self.toposort(parents, sortmode)
369 369 num = len(t)
370 370 c = None
371 371
372 372 self.ui.status(_("converting...\n"))
373 373 for i, c in enumerate(t):
374 374 num -= 1
375 375 desc = self.commitcache[c].desc
376 376 if "\n" in desc:
377 377 desc = desc.splitlines()[0]
378 378 # convert log message to local encoding without using
379 379 # tolocal() because the encoding.encoding convert()
380 380 # uses is 'utf-8'
381 381 self.ui.status("%d %s\n" % (num, recode(desc)))
382 382 self.ui.note(_("source: %s\n") % recode(c))
383 383 self.ui.progress(_('converting'), i, unit=_('revisions'),
384 384 total=len(t))
385 385 self.copy(c)
386 386 self.ui.progress(_('converting'), None)
387 387
388 388 tags = self.source.gettags()
389 389 ctags = {}
390 390 for k in tags:
391 391 v = tags[k]
392 392 if self.map.get(v, SKIPREV) != SKIPREV:
393 393 ctags[k] = self.map[v]
394 394
395 395 if c and ctags:
396 396 nrev, tagsparent = self.dest.puttags(ctags)
397 397 if nrev and tagsparent:
398 398 # write another hash correspondence to override the previous
399 399 # one so we don't end up with extra tag heads
400 400 tagsparents = [e for e in self.map.iteritems()
401 401 if e[1] == tagsparent]
402 402 if tagsparents:
403 403 self.map[tagsparents[0][0]] = nrev
404 404
405 405 bookmarks = self.source.getbookmarks()
406 406 cbookmarks = {}
407 407 for k in bookmarks:
408 408 v = bookmarks[k]
409 409 if self.map.get(v, SKIPREV) != SKIPREV:
410 410 cbookmarks[k] = self.map[v]
411 411
412 412 if c and cbookmarks:
413 413 self.dest.putbookmarks(cbookmarks)
414 414
415 415 self.writeauthormap()
416 416 finally:
417 417 self.cleanup()
418 418
419 419 def cleanup(self):
420 420 try:
421 421 self.dest.after()
422 422 finally:
423 423 self.source.after()
424 424 self.map.close()
425 425
426 426 def convert(ui, src, dest=None, revmapfile=None, **opts):
427 427 global orig_encoding
428 428 orig_encoding = encoding.encoding
429 429 encoding.encoding = 'UTF-8'
430 430
431 431 # support --authors as an alias for --authormap
432 432 if not opts.get('authormap'):
433 433 opts['authormap'] = opts.get('authors')
434 434
435 435 if not dest:
436 436 dest = hg.defaultdest(src) + "-hg"
437 437 ui.status(_("assuming destination %s\n") % dest)
438 438
439 439 destc = convertsink(ui, dest, opts.get('dest_type'))
440 440
441 441 try:
442 442 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
443 443 opts.get('rev'))
444 444 except Exception:
445 445 for path in destc.created:
446 446 shutil.rmtree(path, True)
447 447 raise
448 448
449 449 sortmodes = ('branchsort', 'datesort', 'sourcesort')
450 450 sortmode = [m for m in sortmodes if opts.get(m)]
451 451 if len(sortmode) > 1:
452 452 raise util.Abort(_('more than one sort mode specified'))
453 453 sortmode = sortmode and sortmode[0] or defaultsort
454 454 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
455 455 raise util.Abort(_('--sourcesort is not supported by this data source'))
456 456
457 457 fmap = opts.get('filemap')
458 458 if fmap:
459 459 srcc = filemap.filemap_source(ui, srcc, fmap)
460 460 destc.setfilemapmode(True)
461 461
462 462 if not revmapfile:
463 463 try:
464 464 revmapfile = destc.revmapfile()
465 465 except Exception:
466 466 revmapfile = os.path.join(destc, "map")
467 467
468 468 c = converter(ui, srcc, destc, revmapfile, opts)
469 469 c.convert(sortmode)
470 470
General Comments 0
You need to be logged in to leave comments. Login now