##// END OF EJS Templates
convcmd: make a copy of heads before mutating it...
Augie Fackler -
r37905:73ca1c5e default
parent child Browse files
Show More
@@ -1,616 +1,616 b''
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import collections
10 10 import os
11 11 import shutil
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial import (
15 15 encoding,
16 16 error,
17 17 hg,
18 18 pycompat,
19 19 scmutil,
20 20 util,
21 21 )
22 22 from mercurial.utils import dateutil
23 23
24 24 from . import (
25 25 bzr,
26 26 common,
27 27 cvs,
28 28 darcs,
29 29 filemap,
30 30 git,
31 31 gnuarch,
32 32 hg as hgconvert,
33 33 monotone,
34 34 p4,
35 35 subversion,
36 36 )
37 37
38 38 mapfile = common.mapfile
39 39 MissingTool = common.MissingTool
40 40 NoRepo = common.NoRepo
41 41 SKIPREV = common.SKIPREV
42 42
43 43 bzr_source = bzr.bzr_source
44 44 convert_cvs = cvs.convert_cvs
45 45 convert_git = git.convert_git
46 46 darcs_source = darcs.darcs_source
47 47 gnuarch_source = gnuarch.gnuarch_source
48 48 mercurial_sink = hgconvert.mercurial_sink
49 49 mercurial_source = hgconvert.mercurial_source
50 50 monotone_source = monotone.monotone_source
51 51 p4_source = p4.p4_source
52 52 svn_sink = subversion.svn_sink
53 53 svn_source = subversion.svn_source
54 54
55 55 orig_encoding = 'ascii'
56 56
57 57 def recode(s):
58 58 if isinstance(s, unicode):
59 59 return s.encode(pycompat.sysstr(orig_encoding), 'replace')
60 60 else:
61 61 return s.decode('utf-8').encode(
62 62 pycompat.sysstr(orig_encoding), 'replace')
63 63
64 64 def mapbranch(branch, branchmap):
65 65 '''
66 66 >>> bmap = {b'default': b'branch1'}
67 67 >>> for i in [b'', None]:
68 68 ... mapbranch(i, bmap)
69 69 'branch1'
70 70 'branch1'
71 71 >>> bmap = {b'None': b'branch2'}
72 72 >>> for i in [b'', None]:
73 73 ... mapbranch(i, bmap)
74 74 'branch2'
75 75 'branch2'
76 76 >>> bmap = {b'None': b'branch3', b'default': b'branch4'}
77 77 >>> for i in [b'None', b'', None, b'default', b'branch5']:
78 78 ... mapbranch(i, bmap)
79 79 'branch3'
80 80 'branch4'
81 81 'branch4'
82 82 'branch4'
83 83 'branch5'
84 84 '''
85 85 # If branch is None or empty, this commit is coming from the source
86 86 # repository's default branch and destined for the default branch in the
87 87 # destination repository. For such commits, using a literal "default"
88 88 # in branchmap below allows the user to map "default" to an alternate
89 89 # default branch in the destination repository.
90 90 branch = branchmap.get(branch or 'default', branch)
91 91 # At some point we used "None" literal to denote the default branch,
92 92 # attempt to use that for backward compatibility.
93 93 if (not branch):
94 94 branch = branchmap.get('None', branch)
95 95 return branch
96 96
97 97 source_converters = [
98 98 ('cvs', convert_cvs, 'branchsort'),
99 99 ('git', convert_git, 'branchsort'),
100 100 ('svn', svn_source, 'branchsort'),
101 101 ('hg', mercurial_source, 'sourcesort'),
102 102 ('darcs', darcs_source, 'branchsort'),
103 103 ('mtn', monotone_source, 'branchsort'),
104 104 ('gnuarch', gnuarch_source, 'branchsort'),
105 105 ('bzr', bzr_source, 'branchsort'),
106 106 ('p4', p4_source, 'branchsort'),
107 107 ]
108 108
109 109 sink_converters = [
110 110 ('hg', mercurial_sink),
111 111 ('svn', svn_sink),
112 112 ]
113 113
114 114 def convertsource(ui, path, type, revs):
115 115 exceptions = []
116 116 if type and type not in [s[0] for s in source_converters]:
117 117 raise error.Abort(_('%s: invalid source repository type') % type)
118 118 for name, source, sortmode in source_converters:
119 119 try:
120 120 if not type or name == type:
121 121 return source(ui, name, path, revs), sortmode
122 122 except (NoRepo, MissingTool) as inst:
123 123 exceptions.append(inst)
124 124 if not ui.quiet:
125 125 for inst in exceptions:
126 126 ui.write("%s\n" % inst)
127 127 raise error.Abort(_('%s: missing or unsupported repository') % path)
128 128
129 129 def convertsink(ui, path, type):
130 130 if type and type not in [s[0] for s in sink_converters]:
131 131 raise error.Abort(_('%s: invalid destination repository type') % type)
132 132 for name, sink in sink_converters:
133 133 try:
134 134 if not type or name == type:
135 135 return sink(ui, name, path)
136 136 except NoRepo as inst:
137 137 ui.note(_("convert: %s\n") % inst)
138 138 except MissingTool as inst:
139 139 raise error.Abort('%s\n' % inst)
140 140 raise error.Abort(_('%s: unknown repository type') % path)
141 141
142 142 class progresssource(object):
143 143 def __init__(self, ui, source, filecount):
144 144 self.ui = ui
145 145 self.source = source
146 146 self.filecount = filecount
147 147 self.retrieved = 0
148 148
149 149 def getfile(self, file, rev):
150 150 self.retrieved += 1
151 151 self.ui.progress(_('getting files'), self.retrieved,
152 152 item=file, total=self.filecount, unit=_('files'))
153 153 return self.source.getfile(file, rev)
154 154
155 155 def targetfilebelongstosource(self, targetfilename):
156 156 return self.source.targetfilebelongstosource(targetfilename)
157 157
158 158 def lookuprev(self, rev):
159 159 return self.source.lookuprev(rev)
160 160
161 161 def close(self):
162 162 self.ui.progress(_('getting files'), None)
163 163
164 164 class converter(object):
165 165 def __init__(self, ui, source, dest, revmapfile, opts):
166 166
167 167 self.source = source
168 168 self.dest = dest
169 169 self.ui = ui
170 170 self.opts = opts
171 171 self.commitcache = {}
172 172 self.authors = {}
173 173 self.authorfile = None
174 174
175 175 # Record converted revisions persistently: maps source revision
176 176 # ID to target revision ID (both strings). (This is how
177 177 # incremental conversions work.)
178 178 self.map = mapfile(ui, revmapfile)
179 179
180 180 # Read first the dst author map if any
181 181 authorfile = self.dest.authorfile()
182 182 if authorfile and os.path.exists(authorfile):
183 183 self.readauthormap(authorfile)
184 184 # Extend/Override with new author map if necessary
185 185 if opts.get('authormap'):
186 186 self.readauthormap(opts.get('authormap'))
187 187 self.authorfile = self.dest.authorfile()
188 188
189 189 self.splicemap = self.parsesplicemap(opts.get('splicemap'))
190 190 self.branchmap = mapfile(ui, opts.get('branchmap'))
191 191
192 192 def parsesplicemap(self, path):
193 193 """ check and validate the splicemap format and
194 194 return a child/parents dictionary.
195 195 Format checking has two parts.
196 196 1. generic format which is same across all source types
197 197 2. specific format checking which may be different for
198 198 different source type. This logic is implemented in
199 199 checkrevformat function in source files like
200 200 hg.py, subversion.py etc.
201 201 """
202 202
203 203 if not path:
204 204 return {}
205 205 m = {}
206 206 try:
207 207 fp = open(path, 'rb')
208 208 for i, line in enumerate(util.iterfile(fp)):
209 209 line = line.splitlines()[0].rstrip()
210 210 if not line:
211 211 # Ignore blank lines
212 212 continue
213 213 # split line
214 214 lex = common.shlexer(data=line, whitespace=',')
215 215 line = list(lex)
216 216 # check number of parents
217 217 if not (2 <= len(line) <= 3):
218 218 raise error.Abort(_('syntax error in %s(%d): child parent1'
219 219 '[,parent2] expected') % (path, i + 1))
220 220 for part in line:
221 221 self.source.checkrevformat(part)
222 222 child, p1, p2 = line[0], line[1:2], line[2:]
223 223 if p1 == p2:
224 224 m[child] = p1
225 225 else:
226 226 m[child] = p1 + p2
227 227 # if file does not exist or error reading, exit
228 228 except IOError:
229 229 raise error.Abort(_('splicemap file not found or error reading %s:')
230 230 % path)
231 231 return m
232 232
233 233
234 234 def walktree(self, heads):
235 235 '''Return a mapping that identifies the uncommitted parents of every
236 236 uncommitted changeset.'''
237 visit = heads
237 visit = list(heads)
238 238 known = set()
239 239 parents = {}
240 240 numcommits = self.source.numcommits()
241 241 while visit:
242 242 n = visit.pop(0)
243 243 if n in known:
244 244 continue
245 245 if n in self.map:
246 246 m = self.map[n]
247 247 if m == SKIPREV or self.dest.hascommitfrommap(m):
248 248 continue
249 249 known.add(n)
250 250 self.ui.progress(_('scanning'), len(known), unit=_('revisions'),
251 251 total=numcommits)
252 252 commit = self.cachecommit(n)
253 253 parents[n] = []
254 254 for p in commit.parents:
255 255 parents[n].append(p)
256 256 visit.append(p)
257 257 self.ui.progress(_('scanning'), None)
258 258
259 259 return parents
260 260
261 261 def mergesplicemap(self, parents, splicemap):
262 262 """A splicemap redefines child/parent relationships. Check the
263 263 map contains valid revision identifiers and merge the new
264 264 links in the source graph.
265 265 """
266 266 for c in sorted(splicemap):
267 267 if c not in parents:
268 268 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
269 269 # Could be in source but not converted during this run
270 270 self.ui.warn(_('splice map revision %s is not being '
271 271 'converted, ignoring\n') % c)
272 272 continue
273 273 pc = []
274 274 for p in splicemap[c]:
275 275 # We do not have to wait for nodes already in dest.
276 276 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
277 277 continue
278 278 # Parent is not in dest and not being converted, not good
279 279 if p not in parents:
280 280 raise error.Abort(_('unknown splice map parent: %s') % p)
281 281 pc.append(p)
282 282 parents[c] = pc
283 283
284 284 def toposort(self, parents, sortmode):
285 285 '''Return an ordering such that every uncommitted changeset is
286 286 preceded by all its uncommitted ancestors.'''
287 287
288 288 def mapchildren(parents):
289 289 """Return a (children, roots) tuple where 'children' maps parent
290 290 revision identifiers to children ones, and 'roots' is the list of
291 291 revisions without parents. 'parents' must be a mapping of revision
292 292 identifier to its parents ones.
293 293 """
294 294 visit = collections.deque(sorted(parents))
295 295 seen = set()
296 296 children = {}
297 297 roots = []
298 298
299 299 while visit:
300 300 n = visit.popleft()
301 301 if n in seen:
302 302 continue
303 303 seen.add(n)
304 304 # Ensure that nodes without parents are present in the
305 305 # 'children' mapping.
306 306 children.setdefault(n, [])
307 307 hasparent = False
308 308 for p in parents[n]:
309 309 if p not in self.map:
310 310 visit.append(p)
311 311 hasparent = True
312 312 children.setdefault(p, []).append(n)
313 313 if not hasparent:
314 314 roots.append(n)
315 315
316 316 return children, roots
317 317
318 318 # Sort functions are supposed to take a list of revisions which
319 319 # can be converted immediately and pick one
320 320
321 321 def makebranchsorter():
322 322 """If the previously converted revision has a child in the
323 323 eligible revisions list, pick it. Return the list head
324 324 otherwise. Branch sort attempts to minimize branch
325 325 switching, which is harmful for Mercurial backend
326 326 compression.
327 327 """
328 328 prev = [None]
329 329 def picknext(nodes):
330 330 next = nodes[0]
331 331 for n in nodes:
332 332 if prev[0] in parents[n]:
333 333 next = n
334 334 break
335 335 prev[0] = next
336 336 return next
337 337 return picknext
338 338
339 339 def makesourcesorter():
340 340 """Source specific sort."""
341 341 keyfn = lambda n: self.commitcache[n].sortkey
342 342 def picknext(nodes):
343 343 return sorted(nodes, key=keyfn)[0]
344 344 return picknext
345 345
346 346 def makeclosesorter():
347 347 """Close order sort."""
348 348 keyfn = lambda n: ('close' not in self.commitcache[n].extra,
349 349 self.commitcache[n].sortkey)
350 350 def picknext(nodes):
351 351 return sorted(nodes, key=keyfn)[0]
352 352 return picknext
353 353
354 354 def makedatesorter():
355 355 """Sort revisions by date."""
356 356 dates = {}
357 357 def getdate(n):
358 358 if n not in dates:
359 359 dates[n] = dateutil.parsedate(self.commitcache[n].date)
360 360 return dates[n]
361 361
362 362 def picknext(nodes):
363 363 return min([(getdate(n), n) for n in nodes])[1]
364 364
365 365 return picknext
366 366
367 367 if sortmode == 'branchsort':
368 368 picknext = makebranchsorter()
369 369 elif sortmode == 'datesort':
370 370 picknext = makedatesorter()
371 371 elif sortmode == 'sourcesort':
372 372 picknext = makesourcesorter()
373 373 elif sortmode == 'closesort':
374 374 picknext = makeclosesorter()
375 375 else:
376 376 raise error.Abort(_('unknown sort mode: %s') % sortmode)
377 377
378 378 children, actives = mapchildren(parents)
379 379
380 380 s = []
381 381 pendings = {}
382 382 while actives:
383 383 n = picknext(actives)
384 384 actives.remove(n)
385 385 s.append(n)
386 386
387 387 # Update dependents list
388 388 for c in children.get(n, []):
389 389 if c not in pendings:
390 390 pendings[c] = [p for p in parents[c] if p not in self.map]
391 391 try:
392 392 pendings[c].remove(n)
393 393 except ValueError:
394 394 raise error.Abort(_('cycle detected between %s and %s')
395 395 % (recode(c), recode(n)))
396 396 if not pendings[c]:
397 397 # Parents are converted, node is eligible
398 398 actives.insert(0, c)
399 399 pendings[c] = None
400 400
401 401 if len(s) != len(parents):
402 402 raise error.Abort(_("not all revisions were sorted"))
403 403
404 404 return s
405 405
406 406 def writeauthormap(self):
407 407 authorfile = self.authorfile
408 408 if authorfile:
409 409 self.ui.status(_('writing author map file %s\n') % authorfile)
410 410 ofile = open(authorfile, 'wb+')
411 411 for author in self.authors:
412 412 ofile.write(util.tonativeeol("%s=%s\n"
413 413 % (author, self.authors[author])))
414 414 ofile.close()
415 415
416 416 def readauthormap(self, authorfile):
417 417 afile = open(authorfile, 'rb')
418 418 for line in afile:
419 419
420 420 line = line.strip()
421 421 if not line or line.startswith('#'):
422 422 continue
423 423
424 424 try:
425 425 srcauthor, dstauthor = line.split('=', 1)
426 426 except ValueError:
427 427 msg = _('ignoring bad line in author map file %s: %s\n')
428 428 self.ui.warn(msg % (authorfile, line.rstrip()))
429 429 continue
430 430
431 431 srcauthor = srcauthor.strip()
432 432 dstauthor = dstauthor.strip()
433 433 if self.authors.get(srcauthor) in (None, dstauthor):
434 434 msg = _('mapping author %s to %s\n')
435 435 self.ui.debug(msg % (srcauthor, dstauthor))
436 436 self.authors[srcauthor] = dstauthor
437 437 continue
438 438
439 439 m = _('overriding mapping for author %s, was %s, will be %s\n')
440 440 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
441 441
442 442 afile.close()
443 443
444 444 def cachecommit(self, rev):
445 445 commit = self.source.getcommit(rev)
446 446 commit.author = self.authors.get(commit.author, commit.author)
447 447 commit.branch = mapbranch(commit.branch, self.branchmap)
448 448 self.commitcache[rev] = commit
449 449 return commit
450 450
451 451 def copy(self, rev):
452 452 commit = self.commitcache[rev]
453 453 full = self.opts.get('full')
454 454 changes = self.source.getchanges(rev, full)
455 455 if isinstance(changes, bytes):
456 456 if changes == SKIPREV:
457 457 dest = SKIPREV
458 458 else:
459 459 dest = self.map[changes]
460 460 self.map[rev] = dest
461 461 return
462 462 files, copies, cleanp2 = changes
463 463 pbranches = []
464 464 if commit.parents:
465 465 for prev in commit.parents:
466 466 if prev not in self.commitcache:
467 467 self.cachecommit(prev)
468 468 pbranches.append((self.map[prev],
469 469 self.commitcache[prev].branch))
470 470 self.dest.setbranch(commit.branch, pbranches)
471 471 try:
472 472 parents = self.splicemap[rev]
473 473 self.ui.status(_('spliced in %s as parents of %s\n') %
474 474 (_(' and ').join(parents), rev))
475 475 parents = [self.map.get(p, p) for p in parents]
476 476 except KeyError:
477 477 parents = [b[0] for b in pbranches]
478 478 parents.extend(self.map[x]
479 479 for x in commit.optparents
480 480 if x in self.map)
481 481 if len(pbranches) != 2:
482 482 cleanp2 = set()
483 483 if len(parents) < 3:
484 484 source = progresssource(self.ui, self.source, len(files))
485 485 else:
486 486 # For an octopus merge, we end up traversing the list of
487 487 # changed files N-1 times. This tweak to the number of
488 488 # files makes it so the progress bar doesn't overflow
489 489 # itself.
490 490 source = progresssource(self.ui, self.source,
491 491 len(files) * (len(parents) - 1))
492 492 newnode = self.dest.putcommit(files, copies, parents, commit,
493 493 source, self.map, full, cleanp2)
494 494 source.close()
495 495 self.source.converted(rev, newnode)
496 496 self.map[rev] = newnode
497 497
498 498 def convert(self, sortmode):
499 499 try:
500 500 self.source.before()
501 501 self.dest.before()
502 502 self.source.setrevmap(self.map)
503 503 self.ui.status(_("scanning source...\n"))
504 504 heads = self.source.getheads()
505 505 parents = self.walktree(heads)
506 506 self.mergesplicemap(parents, self.splicemap)
507 507 self.ui.status(_("sorting...\n"))
508 508 t = self.toposort(parents, sortmode)
509 509 num = len(t)
510 510 c = None
511 511
512 512 self.ui.status(_("converting...\n"))
513 513 for i, c in enumerate(t):
514 514 num -= 1
515 515 desc = self.commitcache[c].desc
516 516 if "\n" in desc:
517 517 desc = desc.splitlines()[0]
518 518 # convert log message to local encoding without using
519 519 # tolocal() because the encoding.encoding convert()
520 520 # uses is 'utf-8'
521 521 self.ui.status("%d %s\n" % (num, recode(desc)))
522 522 self.ui.note(_("source: %s\n") % recode(c))
523 523 self.ui.progress(_('converting'), i, unit=_('revisions'),
524 524 total=len(t))
525 525 self.copy(c)
526 526 self.ui.progress(_('converting'), None)
527 527
528 528 if not self.ui.configbool('convert', 'skiptags'):
529 529 tags = self.source.gettags()
530 530 ctags = {}
531 531 for k in tags:
532 532 v = tags[k]
533 533 if self.map.get(v, SKIPREV) != SKIPREV:
534 534 ctags[k] = self.map[v]
535 535
536 536 if c and ctags:
537 537 nrev, tagsparent = self.dest.puttags(ctags)
538 538 if nrev and tagsparent:
539 539 # write another hash correspondence to override the
540 540 # previous one so we don't end up with extra tag heads
541 541 tagsparents = [e for e in self.map.iteritems()
542 542 if e[1] == tagsparent]
543 543 if tagsparents:
544 544 self.map[tagsparents[0][0]] = nrev
545 545
546 546 bookmarks = self.source.getbookmarks()
547 547 cbookmarks = {}
548 548 for k in bookmarks:
549 549 v = bookmarks[k]
550 550 if self.map.get(v, SKIPREV) != SKIPREV:
551 551 cbookmarks[k] = self.map[v]
552 552
553 553 if c and cbookmarks:
554 554 self.dest.putbookmarks(cbookmarks)
555 555
556 556 self.writeauthormap()
557 557 finally:
558 558 self.cleanup()
559 559
560 560 def cleanup(self):
561 561 try:
562 562 self.dest.after()
563 563 finally:
564 564 self.source.after()
565 565 self.map.close()
566 566
567 567 def convert(ui, src, dest=None, revmapfile=None, **opts):
568 568 opts = pycompat.byteskwargs(opts)
569 569 global orig_encoding
570 570 orig_encoding = encoding.encoding
571 571 encoding.encoding = 'UTF-8'
572 572
573 573 # support --authors as an alias for --authormap
574 574 if not opts.get('authormap'):
575 575 opts['authormap'] = opts.get('authors')
576 576
577 577 if not dest:
578 578 dest = hg.defaultdest(src) + "-hg"
579 579 ui.status(_("assuming destination %s\n") % dest)
580 580
581 581 destc = convertsink(ui, dest, opts.get('dest_type'))
582 582 destc = scmutil.wrapconvertsink(destc)
583 583
584 584 try:
585 585 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
586 586 opts.get('rev'))
587 587 except Exception:
588 588 for path in destc.created:
589 589 shutil.rmtree(path, True)
590 590 raise
591 591
592 592 sortmodes = ('branchsort', 'datesort', 'sourcesort', 'closesort')
593 593 sortmode = [m for m in sortmodes if opts.get(m)]
594 594 if len(sortmode) > 1:
595 595 raise error.Abort(_('more than one sort mode specified'))
596 596 if sortmode:
597 597 sortmode = sortmode[0]
598 598 else:
599 599 sortmode = defaultsort
600 600
601 601 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
602 602 raise error.Abort(_('--sourcesort is not supported by this data source')
603 603 )
604 604 if sortmode == 'closesort' and not srcc.hasnativeclose():
605 605 raise error.Abort(_('--closesort is not supported by this data source'))
606 606
607 607 fmap = opts.get('filemap')
608 608 if fmap:
609 609 srcc = filemap.filemap_source(ui, srcc, fmap)
610 610 destc.setfilemapmode(True)
611 611
612 612 if not revmapfile:
613 613 revmapfile = destc.revmapfile()
614 614
615 615 c = converter(ui, srcc, destc, revmapfile, opts)
616 616 c.convert(sortmode)
General Comments 0
You need to be logged in to leave comments. Login now