##// END OF EJS Templates
convcmd: pass encoding name as a sysstr...
Augie Fackler -
r36150:6df206ef default
parent child Browse files
Show More
@@ -1,614 +1,616
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import collections
10 10 import os
11 11 import shlex
12 12 import shutil
13 13
14 14 from mercurial.i18n import _
15 15 from mercurial import (
16 16 encoding,
17 17 error,
18 18 hg,
19 pycompat,
19 20 scmutil,
20 21 util,
21 22 )
22 23
23 24 from . import (
24 25 bzr,
25 26 common,
26 27 cvs,
27 28 darcs,
28 29 filemap,
29 30 git,
30 31 gnuarch,
31 32 hg as hgconvert,
32 33 monotone,
33 34 p4,
34 35 subversion,
35 36 )
36 37
37 38 mapfile = common.mapfile
38 39 MissingTool = common.MissingTool
39 40 NoRepo = common.NoRepo
40 41 SKIPREV = common.SKIPREV
41 42
42 43 bzr_source = bzr.bzr_source
43 44 convert_cvs = cvs.convert_cvs
44 45 convert_git = git.convert_git
45 46 darcs_source = darcs.darcs_source
46 47 gnuarch_source = gnuarch.gnuarch_source
47 48 mercurial_sink = hgconvert.mercurial_sink
48 49 mercurial_source = hgconvert.mercurial_source
49 50 monotone_source = monotone.monotone_source
50 51 p4_source = p4.p4_source
51 52 svn_sink = subversion.svn_sink
52 53 svn_source = subversion.svn_source
53 54
54 55 orig_encoding = 'ascii'
55 56
56 57 def recode(s):
57 58 if isinstance(s, unicode):
58 return s.encode(orig_encoding, 'replace')
59 return s.encode(pycompat.sysstr(orig_encoding), 'replace')
59 60 else:
60 return s.decode('utf-8').encode(orig_encoding, 'replace')
61 return s.decode('utf-8').encode(
62 pycompat.sysstr(orig_encoding), 'replace')
61 63
62 64 def mapbranch(branch, branchmap):
63 65 '''
64 66 >>> bmap = {b'default': b'branch1'}
65 67 >>> for i in [b'', None]:
66 68 ... mapbranch(i, bmap)
67 69 'branch1'
68 70 'branch1'
69 71 >>> bmap = {b'None': b'branch2'}
70 72 >>> for i in [b'', None]:
71 73 ... mapbranch(i, bmap)
72 74 'branch2'
73 75 'branch2'
74 76 >>> bmap = {b'None': b'branch3', b'default': b'branch4'}
75 77 >>> for i in [b'None', b'', None, b'default', b'branch5']:
76 78 ... mapbranch(i, bmap)
77 79 'branch3'
78 80 'branch4'
79 81 'branch4'
80 82 'branch4'
81 83 'branch5'
82 84 '''
83 85 # If branch is None or empty, this commit is coming from the source
84 86 # repository's default branch and destined for the default branch in the
85 87 # destination repository. For such commits, using a literal "default"
86 88 # in branchmap below allows the user to map "default" to an alternate
87 89 # default branch in the destination repository.
88 90 branch = branchmap.get(branch or 'default', branch)
89 91 # At some point we used "None" literal to denote the default branch,
90 92 # attempt to use that for backward compatibility.
91 93 if (not branch):
92 94 branch = branchmap.get('None', branch)
93 95 return branch
94 96
95 97 source_converters = [
96 98 ('cvs', convert_cvs, 'branchsort'),
97 99 ('git', convert_git, 'branchsort'),
98 100 ('svn', svn_source, 'branchsort'),
99 101 ('hg', mercurial_source, 'sourcesort'),
100 102 ('darcs', darcs_source, 'branchsort'),
101 103 ('mtn', monotone_source, 'branchsort'),
102 104 ('gnuarch', gnuarch_source, 'branchsort'),
103 105 ('bzr', bzr_source, 'branchsort'),
104 106 ('p4', p4_source, 'branchsort'),
105 107 ]
106 108
107 109 sink_converters = [
108 110 ('hg', mercurial_sink),
109 111 ('svn', svn_sink),
110 112 ]
111 113
112 114 def convertsource(ui, path, type, revs):
113 115 exceptions = []
114 116 if type and type not in [s[0] for s in source_converters]:
115 117 raise error.Abort(_('%s: invalid source repository type') % type)
116 118 for name, source, sortmode in source_converters:
117 119 try:
118 120 if not type or name == type:
119 121 return source(ui, name, path, revs), sortmode
120 122 except (NoRepo, MissingTool) as inst:
121 123 exceptions.append(inst)
122 124 if not ui.quiet:
123 125 for inst in exceptions:
124 126 ui.write("%s\n" % inst)
125 127 raise error.Abort(_('%s: missing or unsupported repository') % path)
126 128
127 129 def convertsink(ui, path, type):
128 130 if type and type not in [s[0] for s in sink_converters]:
129 131 raise error.Abort(_('%s: invalid destination repository type') % type)
130 132 for name, sink in sink_converters:
131 133 try:
132 134 if not type or name == type:
133 135 return sink(ui, name, path)
134 136 except NoRepo as inst:
135 137 ui.note(_("convert: %s\n") % inst)
136 138 except MissingTool as inst:
137 139 raise error.Abort('%s\n' % inst)
138 140 raise error.Abort(_('%s: unknown repository type') % path)
139 141
140 142 class progresssource(object):
141 143 def __init__(self, ui, source, filecount):
142 144 self.ui = ui
143 145 self.source = source
144 146 self.filecount = filecount
145 147 self.retrieved = 0
146 148
147 149 def getfile(self, file, rev):
148 150 self.retrieved += 1
149 151 self.ui.progress(_('getting files'), self.retrieved,
150 152 item=file, total=self.filecount, unit=_('files'))
151 153 return self.source.getfile(file, rev)
152 154
153 155 def targetfilebelongstosource(self, targetfilename):
154 156 return self.source.targetfilebelongstosource(targetfilename)
155 157
156 158 def lookuprev(self, rev):
157 159 return self.source.lookuprev(rev)
158 160
159 161 def close(self):
160 162 self.ui.progress(_('getting files'), None)
161 163
162 164 class converter(object):
163 165 def __init__(self, ui, source, dest, revmapfile, opts):
164 166
165 167 self.source = source
166 168 self.dest = dest
167 169 self.ui = ui
168 170 self.opts = opts
169 171 self.commitcache = {}
170 172 self.authors = {}
171 173 self.authorfile = None
172 174
173 175 # Record converted revisions persistently: maps source revision
174 176 # ID to target revision ID (both strings). (This is how
175 177 # incremental conversions work.)
176 178 self.map = mapfile(ui, revmapfile)
177 179
178 180 # Read first the dst author map if any
179 181 authorfile = self.dest.authorfile()
180 182 if authorfile and os.path.exists(authorfile):
181 183 self.readauthormap(authorfile)
182 184 # Extend/Override with new author map if necessary
183 185 if opts.get('authormap'):
184 186 self.readauthormap(opts.get('authormap'))
185 187 self.authorfile = self.dest.authorfile()
186 188
187 189 self.splicemap = self.parsesplicemap(opts.get('splicemap'))
188 190 self.branchmap = mapfile(ui, opts.get('branchmap'))
189 191
190 192 def parsesplicemap(self, path):
191 193 """ check and validate the splicemap format and
192 194 return a child/parents dictionary.
193 195 Format checking has two parts.
194 196 1. generic format which is same across all source types
195 197 2. specific format checking which may be different for
196 198 different source type. This logic is implemented in
197 199 checkrevformat function in source files like
198 200 hg.py, subversion.py etc.
199 201 """
200 202
201 203 if not path:
202 204 return {}
203 205 m = {}
204 206 try:
205 207 fp = open(path, 'rb')
206 208 for i, line in enumerate(util.iterfile(fp)):
207 209 line = line.splitlines()[0].rstrip()
208 210 if not line:
209 211 # Ignore blank lines
210 212 continue
211 213 # split line
212 214 lex = shlex.shlex(line, posix=True)
213 215 lex.whitespace_split = True
214 216 lex.whitespace += ','
215 217 line = list(lex)
216 218 # check number of parents
217 219 if not (2 <= len(line) <= 3):
218 220 raise error.Abort(_('syntax error in %s(%d): child parent1'
219 221 '[,parent2] expected') % (path, i + 1))
220 222 for part in line:
221 223 self.source.checkrevformat(part)
222 224 child, p1, p2 = line[0], line[1:2], line[2:]
223 225 if p1 == p2:
224 226 m[child] = p1
225 227 else:
226 228 m[child] = p1 + p2
227 229 # if file does not exist or error reading, exit
228 230 except IOError:
229 231 raise error.Abort(_('splicemap file not found or error reading %s:')
230 232 % path)
231 233 return m
232 234
233 235
234 236 def walktree(self, heads):
235 237 '''Return a mapping that identifies the uncommitted parents of every
236 238 uncommitted changeset.'''
237 239 visit = heads
238 240 known = set()
239 241 parents = {}
240 242 numcommits = self.source.numcommits()
241 243 while visit:
242 244 n = visit.pop(0)
243 245 if n in known:
244 246 continue
245 247 if n in self.map:
246 248 m = self.map[n]
247 249 if m == SKIPREV or self.dest.hascommitfrommap(m):
248 250 continue
249 251 known.add(n)
250 252 self.ui.progress(_('scanning'), len(known), unit=_('revisions'),
251 253 total=numcommits)
252 254 commit = self.cachecommit(n)
253 255 parents[n] = []
254 256 for p in commit.parents:
255 257 parents[n].append(p)
256 258 visit.append(p)
257 259 self.ui.progress(_('scanning'), None)
258 260
259 261 return parents
260 262
261 263 def mergesplicemap(self, parents, splicemap):
262 264 """A splicemap redefines child/parent relationships. Check the
263 265 map contains valid revision identifiers and merge the new
264 266 links in the source graph.
265 267 """
266 268 for c in sorted(splicemap):
267 269 if c not in parents:
268 270 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
269 271 # Could be in source but not converted during this run
270 272 self.ui.warn(_('splice map revision %s is not being '
271 273 'converted, ignoring\n') % c)
272 274 continue
273 275 pc = []
274 276 for p in splicemap[c]:
275 277 # We do not have to wait for nodes already in dest.
276 278 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
277 279 continue
278 280 # Parent is not in dest and not being converted, not good
279 281 if p not in parents:
280 282 raise error.Abort(_('unknown splice map parent: %s') % p)
281 283 pc.append(p)
282 284 parents[c] = pc
283 285
284 286 def toposort(self, parents, sortmode):
285 287 '''Return an ordering such that every uncommitted changeset is
286 288 preceded by all its uncommitted ancestors.'''
287 289
288 290 def mapchildren(parents):
289 291 """Return a (children, roots) tuple where 'children' maps parent
290 292 revision identifiers to children ones, and 'roots' is the list of
291 293 revisions without parents. 'parents' must be a mapping of revision
292 294 identifier to its parents ones.
293 295 """
294 296 visit = collections.deque(sorted(parents))
295 297 seen = set()
296 298 children = {}
297 299 roots = []
298 300
299 301 while visit:
300 302 n = visit.popleft()
301 303 if n in seen:
302 304 continue
303 305 seen.add(n)
304 306 # Ensure that nodes without parents are present in the
305 307 # 'children' mapping.
306 308 children.setdefault(n, [])
307 309 hasparent = False
308 310 for p in parents[n]:
309 311 if p not in self.map:
310 312 visit.append(p)
311 313 hasparent = True
312 314 children.setdefault(p, []).append(n)
313 315 if not hasparent:
314 316 roots.append(n)
315 317
316 318 return children, roots
317 319
318 320 # Sort functions are supposed to take a list of revisions which
319 321 # can be converted immediately and pick one
320 322
321 323 def makebranchsorter():
322 324 """If the previously converted revision has a child in the
323 325 eligible revisions list, pick it. Return the list head
324 326 otherwise. Branch sort attempts to minimize branch
325 327 switching, which is harmful for Mercurial backend
326 328 compression.
327 329 """
328 330 prev = [None]
329 331 def picknext(nodes):
330 332 next = nodes[0]
331 333 for n in nodes:
332 334 if prev[0] in parents[n]:
333 335 next = n
334 336 break
335 337 prev[0] = next
336 338 return next
337 339 return picknext
338 340
339 341 def makesourcesorter():
340 342 """Source specific sort."""
341 343 keyfn = lambda n: self.commitcache[n].sortkey
342 344 def picknext(nodes):
343 345 return sorted(nodes, key=keyfn)[0]
344 346 return picknext
345 347
346 348 def makeclosesorter():
347 349 """Close order sort."""
348 350 keyfn = lambda n: ('close' not in self.commitcache[n].extra,
349 351 self.commitcache[n].sortkey)
350 352 def picknext(nodes):
351 353 return sorted(nodes, key=keyfn)[0]
352 354 return picknext
353 355
354 356 def makedatesorter():
355 357 """Sort revisions by date."""
356 358 dates = {}
357 359 def getdate(n):
358 360 if n not in dates:
359 361 dates[n] = util.parsedate(self.commitcache[n].date)
360 362 return dates[n]
361 363
362 364 def picknext(nodes):
363 365 return min([(getdate(n), n) for n in nodes])[1]
364 366
365 367 return picknext
366 368
367 369 if sortmode == 'branchsort':
368 370 picknext = makebranchsorter()
369 371 elif sortmode == 'datesort':
370 372 picknext = makedatesorter()
371 373 elif sortmode == 'sourcesort':
372 374 picknext = makesourcesorter()
373 375 elif sortmode == 'closesort':
374 376 picknext = makeclosesorter()
375 377 else:
376 378 raise error.Abort(_('unknown sort mode: %s') % sortmode)
377 379
378 380 children, actives = mapchildren(parents)
379 381
380 382 s = []
381 383 pendings = {}
382 384 while actives:
383 385 n = picknext(actives)
384 386 actives.remove(n)
385 387 s.append(n)
386 388
387 389 # Update dependents list
388 390 for c in children.get(n, []):
389 391 if c not in pendings:
390 392 pendings[c] = [p for p in parents[c] if p not in self.map]
391 393 try:
392 394 pendings[c].remove(n)
393 395 except ValueError:
394 396 raise error.Abort(_('cycle detected between %s and %s')
395 397 % (recode(c), recode(n)))
396 398 if not pendings[c]:
397 399 # Parents are converted, node is eligible
398 400 actives.insert(0, c)
399 401 pendings[c] = None
400 402
401 403 if len(s) != len(parents):
402 404 raise error.Abort(_("not all revisions were sorted"))
403 405
404 406 return s
405 407
406 408 def writeauthormap(self):
407 409 authorfile = self.authorfile
408 410 if authorfile:
409 411 self.ui.status(_('writing author map file %s\n') % authorfile)
410 412 ofile = open(authorfile, 'wb+')
411 413 for author in self.authors:
412 414 ofile.write("%s=%s\n" % (author, self.authors[author]))
413 415 ofile.close()
414 416
415 417 def readauthormap(self, authorfile):
416 418 afile = open(authorfile, 'rb')
417 419 for line in afile:
418 420
419 421 line = line.strip()
420 422 if not line or line.startswith('#'):
421 423 continue
422 424
423 425 try:
424 426 srcauthor, dstauthor = line.split('=', 1)
425 427 except ValueError:
426 428 msg = _('ignoring bad line in author map file %s: %s\n')
427 429 self.ui.warn(msg % (authorfile, line.rstrip()))
428 430 continue
429 431
430 432 srcauthor = srcauthor.strip()
431 433 dstauthor = dstauthor.strip()
432 434 if self.authors.get(srcauthor) in (None, dstauthor):
433 435 msg = _('mapping author %s to %s\n')
434 436 self.ui.debug(msg % (srcauthor, dstauthor))
435 437 self.authors[srcauthor] = dstauthor
436 438 continue
437 439
438 440 m = _('overriding mapping for author %s, was %s, will be %s\n')
439 441 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
440 442
441 443 afile.close()
442 444
443 445 def cachecommit(self, rev):
444 446 commit = self.source.getcommit(rev)
445 447 commit.author = self.authors.get(commit.author, commit.author)
446 448 commit.branch = mapbranch(commit.branch, self.branchmap)
447 449 self.commitcache[rev] = commit
448 450 return commit
449 451
450 452 def copy(self, rev):
451 453 commit = self.commitcache[rev]
452 454 full = self.opts.get('full')
453 455 changes = self.source.getchanges(rev, full)
454 456 if isinstance(changes, bytes):
455 457 if changes == SKIPREV:
456 458 dest = SKIPREV
457 459 else:
458 460 dest = self.map[changes]
459 461 self.map[rev] = dest
460 462 return
461 463 files, copies, cleanp2 = changes
462 464 pbranches = []
463 465 if commit.parents:
464 466 for prev in commit.parents:
465 467 if prev not in self.commitcache:
466 468 self.cachecommit(prev)
467 469 pbranches.append((self.map[prev],
468 470 self.commitcache[prev].branch))
469 471 self.dest.setbranch(commit.branch, pbranches)
470 472 try:
471 473 parents = self.splicemap[rev]
472 474 self.ui.status(_('spliced in %s as parents of %s\n') %
473 475 (_(' and ').join(parents), rev))
474 476 parents = [self.map.get(p, p) for p in parents]
475 477 except KeyError:
476 478 parents = [b[0] for b in pbranches]
477 479 parents.extend(self.map[x]
478 480 for x in commit.optparents
479 481 if x in self.map)
480 482 if len(pbranches) != 2:
481 483 cleanp2 = set()
482 484 if len(parents) < 3:
483 485 source = progresssource(self.ui, self.source, len(files))
484 486 else:
485 487 # For an octopus merge, we end up traversing the list of
486 488 # changed files N-1 times. This tweak to the number of
487 489 # files makes it so the progress bar doesn't overflow
488 490 # itself.
489 491 source = progresssource(self.ui, self.source,
490 492 len(files) * (len(parents) - 1))
491 493 newnode = self.dest.putcommit(files, copies, parents, commit,
492 494 source, self.map, full, cleanp2)
493 495 source.close()
494 496 self.source.converted(rev, newnode)
495 497 self.map[rev] = newnode
496 498
497 499 def convert(self, sortmode):
498 500 try:
499 501 self.source.before()
500 502 self.dest.before()
501 503 self.source.setrevmap(self.map)
502 504 self.ui.status(_("scanning source...\n"))
503 505 heads = self.source.getheads()
504 506 parents = self.walktree(heads)
505 507 self.mergesplicemap(parents, self.splicemap)
506 508 self.ui.status(_("sorting...\n"))
507 509 t = self.toposort(parents, sortmode)
508 510 num = len(t)
509 511 c = None
510 512
511 513 self.ui.status(_("converting...\n"))
512 514 for i, c in enumerate(t):
513 515 num -= 1
514 516 desc = self.commitcache[c].desc
515 517 if "\n" in desc:
516 518 desc = desc.splitlines()[0]
517 519 # convert log message to local encoding without using
518 520 # tolocal() because the encoding.encoding convert()
519 521 # uses is 'utf-8'
520 522 self.ui.status("%d %s\n" % (num, recode(desc)))
521 523 self.ui.note(_("source: %s\n") % recode(c))
522 524 self.ui.progress(_('converting'), i, unit=_('revisions'),
523 525 total=len(t))
524 526 self.copy(c)
525 527 self.ui.progress(_('converting'), None)
526 528
527 529 if not self.ui.configbool('convert', 'skiptags'):
528 530 tags = self.source.gettags()
529 531 ctags = {}
530 532 for k in tags:
531 533 v = tags[k]
532 534 if self.map.get(v, SKIPREV) != SKIPREV:
533 535 ctags[k] = self.map[v]
534 536
535 537 if c and ctags:
536 538 nrev, tagsparent = self.dest.puttags(ctags)
537 539 if nrev and tagsparent:
538 540 # write another hash correspondence to override the
539 541 # previous one so we don't end up with extra tag heads
540 542 tagsparents = [e for e in self.map.iteritems()
541 543 if e[1] == tagsparent]
542 544 if tagsparents:
543 545 self.map[tagsparents[0][0]] = nrev
544 546
545 547 bookmarks = self.source.getbookmarks()
546 548 cbookmarks = {}
547 549 for k in bookmarks:
548 550 v = bookmarks[k]
549 551 if self.map.get(v, SKIPREV) != SKIPREV:
550 552 cbookmarks[k] = self.map[v]
551 553
552 554 if c and cbookmarks:
553 555 self.dest.putbookmarks(cbookmarks)
554 556
555 557 self.writeauthormap()
556 558 finally:
557 559 self.cleanup()
558 560
559 561 def cleanup(self):
560 562 try:
561 563 self.dest.after()
562 564 finally:
563 565 self.source.after()
564 566 self.map.close()
565 567
566 568 def convert(ui, src, dest=None, revmapfile=None, **opts):
567 569 global orig_encoding
568 570 orig_encoding = encoding.encoding
569 571 encoding.encoding = 'UTF-8'
570 572
571 573 # support --authors as an alias for --authormap
572 574 if not opts.get('authormap'):
573 575 opts['authormap'] = opts.get('authors')
574 576
575 577 if not dest:
576 578 dest = hg.defaultdest(src) + "-hg"
577 579 ui.status(_("assuming destination %s\n") % dest)
578 580
579 581 destc = convertsink(ui, dest, opts.get('dest_type'))
580 582 destc = scmutil.wrapconvertsink(destc)
581 583
582 584 try:
583 585 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
584 586 opts.get('rev'))
585 587 except Exception:
586 588 for path in destc.created:
587 589 shutil.rmtree(path, True)
588 590 raise
589 591
590 592 sortmodes = ('branchsort', 'datesort', 'sourcesort', 'closesort')
591 593 sortmode = [m for m in sortmodes if opts.get(m)]
592 594 if len(sortmode) > 1:
593 595 raise error.Abort(_('more than one sort mode specified'))
594 596 if sortmode:
595 597 sortmode = sortmode[0]
596 598 else:
597 599 sortmode = defaultsort
598 600
599 601 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
600 602 raise error.Abort(_('--sourcesort is not supported by this data source')
601 603 )
602 604 if sortmode == 'closesort' and not srcc.hasnativeclose():
603 605 raise error.Abort(_('--closesort is not supported by this data source'))
604 606
605 607 fmap = opts.get('filemap')
606 608 if fmap:
607 609 srcc = filemap.filemap_source(ui, srcc, fmap)
608 610 destc.setfilemapmode(True)
609 611
610 612 if not revmapfile:
611 613 revmapfile = destc.revmapfile()
612 614
613 615 c = converter(ui, srcc, destc, revmapfile, opts)
614 616 c.convert(sortmode)
General Comments 0
You need to be logged in to leave comments. Login now