##// END OF EJS Templates
Split convert extension into common and repository type modules
Brendan Cully -
r4536:cc9b7921 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (519 lines changed) Show them Hide them
@@ -1,749 +1,242 b''
1 1 # convert.py Foreign SCM converter
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 import sys, os, zlib, sha, time, re, locale, socket
8 from common import NoRepo
9 from cvs import convert_cvs
10 from git import convert_git
11 from hg import convert_mercurial
12
13 import os
9 14 from mercurial import hg, ui, util, commands
10 15
11 16 commands.norepo += " convert"
12 17
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 18 converters = [convert_cvs, convert_git, convert_mercurial]
526 19
527 20 def converter(ui, path):
528 21 if not os.path.isdir(path):
529 22 raise util.Abort("%s: not a directory" % path)
530 23 for c in converters:
531 24 try:
532 25 return c(ui, path)
533 26 except NoRepo:
534 27 pass
535 28 raise util.Abort("%s: unknown repository type" % path)
536 29
537 30 class convert(object):
538 31 def __init__(self, ui, source, dest, mapfile, opts):
539 32
540 33 self.source = source
541 34 self.dest = dest
542 35 self.ui = ui
543 36 self.mapfile = mapfile
544 37 self.opts = opts
545 38 self.commitcache = {}
546 39
547 40 self.map = {}
548 41 try:
549 42 for l in file(self.mapfile):
550 43 sv, dv = l[:-1].split()
551 44 self.map[sv] = dv
552 45 except IOError:
553 46 pass
554 47
555 48 def walktree(self, heads):
556 49 visit = heads
557 50 known = {}
558 51 parents = {}
559 52 while visit:
560 53 n = visit.pop(0)
561 54 if n in known or n in self.map: continue
562 55 known[n] = 1
563 56 self.commitcache[n] = self.source.getcommit(n)
564 57 cp = self.commitcache[n].parents
565 58 for p in cp:
566 59 parents.setdefault(n, []).append(p)
567 60 visit.append(p)
568 61
569 62 return parents
570 63
571 64 def toposort(self, parents):
572 65 visit = parents.keys()
573 66 seen = {}
574 67 children = {}
575 68
576 69 while visit:
577 70 n = visit.pop(0)
578 71 if n in seen: continue
579 72 seen[n] = 1
580 73 pc = 0
581 74 if n in parents:
582 75 for p in parents[n]:
583 76 if p not in self.map: pc += 1
584 77 visit.append(p)
585 78 children.setdefault(p, []).append(n)
586 79 if not pc: root = n
587 80
588 81 s = []
589 82 removed = {}
590 83 visit = children.keys()
591 84 while visit:
592 85 n = visit.pop(0)
593 86 if n in removed: continue
594 87 dep = 0
595 88 if n in parents:
596 89 for p in parents[n]:
597 90 if p in self.map: continue
598 91 if p not in removed:
599 92 # we're still dependent
600 93 visit.append(n)
601 94 dep = 1
602 95 break
603 96
604 97 if not dep:
605 98 # all n's parents are in the list
606 99 removed[n] = 1
607 100 if n not in self.map:
608 101 s.append(n)
609 102 if n in children:
610 103 for c in children[n]:
611 104 visit.insert(0, c)
612 105
613 106 if self.opts.get('datesort'):
614 107 depth = {}
615 108 for n in s:
616 109 depth[n] = 0
617 110 pl = [p for p in self.commitcache[n].parents
618 111 if p not in self.map]
619 112 if pl:
620 113 depth[n] = max([depth[p] for p in pl]) + 1
621 114
622 115 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 116 s.sort()
624 117 s = [e[2] for e in s]
625 118
626 119 return s
627 120
628 121 def copy(self, rev):
629 122 c = self.commitcache[rev]
630 123 files = self.source.getchanges(rev)
631 124
632 125 for f, v in files:
633 126 try:
634 127 data = self.source.getfile(f, v)
635 128 except IOError, inst:
636 129 self.dest.delfile(f)
637 130 else:
638 131 e = self.source.getmode(f, v)
639 132 self.dest.putfile(f, e, data)
640 133
641 134 r = [self.map[v] for v in c.parents]
642 135 f = [f for f, v in files]
643 136 self.map[rev] = self.dest.putcommit(f, r, c)
644 137 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645 138
646 139 def convert(self):
647 140 self.ui.status("scanning source...\n")
648 141 heads = self.source.getheads()
649 142 parents = self.walktree(heads)
650 143 self.ui.status("sorting...\n")
651 144 t = self.toposort(parents)
652 145 num = len(t)
653 146 c = None
654 147
655 148 self.ui.status("converting...\n")
656 149 for c in t:
657 150 num -= 1
658 151 desc = self.commitcache[c].desc
659 152 if "\n" in desc:
660 153 desc = desc.splitlines()[0]
661 154 self.ui.status("%d %s\n" % (num, desc))
662 155 self.copy(c)
663 156
664 157 tags = self.source.gettags()
665 158 ctags = {}
666 159 for k in tags:
667 160 v = tags[k]
668 161 if v in self.map:
669 162 ctags[k] = self.map[v]
670 163
671 164 if c and ctags:
672 165 nrev = self.dest.puttags(ctags)
673 166 # write another hash correspondence to override the previous
674 167 # one so we don't end up with extra tag heads
675 168 if nrev:
676 169 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677 170
678 171 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 172 '''Convert a foreign SCM repository to a Mercurial one.
680 173
681 174 Accepted source formats:
682 175 - GIT
683 176 - CVS
684 177
685 178 Accepted destination formats:
686 179 - Mercurial
687 180
688 181 If destination isn't given, a new Mercurial repo named <src>-hg will
689 182 be created. If <mapfile> isn't given, it will be put in a default
690 183 location (<dest>/.hg/shamap by default)
691 184
692 185 The <mapfile> is a simple text file that maps each source commit ID to
693 186 the destination ID for that revision, like so:
694 187
695 188 <source ID> <destination ID>
696 189
697 190 If the file doesn't exist, it's automatically created. It's updated
698 191 on each commit copied, so convert-repo can be interrupted and can
699 192 be run repeatedly to copy new commits.
700 193 '''
701 194
702 195 srcc = converter(ui, src)
703 196 if not hasattr(srcc, "getcommit"):
704 197 raise util.Abort("%s: can't read from this repo type" % src)
705 198
706 199 if not dest:
707 200 dest = src + "-hg"
708 201 ui.status("assuming destination %s\n" % dest)
709 202
710 203 # Try to be smart and initalize things when required
711 204 if os.path.isdir(dest):
712 205 if len(os.listdir(dest)) > 0:
713 206 try:
714 207 hg.repository(ui, dest)
715 208 ui.status("destination %s is a Mercurial repository\n" % dest)
716 209 except hg.RepoError:
717 210 raise util.Abort(
718 211 "destination directory %s is not empty.\n"
719 212 "Please specify an empty directory to be initialized\n"
720 213 "or an already initialized mercurial repository"
721 214 % dest)
722 215 else:
723 216 ui.status("initializing destination %s repository\n" % dest)
724 217 hg.repository(ui, dest, create=True)
725 218 elif os.path.exists(dest):
726 219 raise util.Abort("destination %s exists and is not a directory" % dest)
727 220 else:
728 221 ui.status("initializing destination %s repository\n" % dest)
729 222 hg.repository(ui, dest, create=True)
730 223
731 224 destc = converter(ui, dest)
732 225 if not hasattr(destc, "putcommit"):
733 226 raise util.Abort("%s: can't write to this repo type" % src)
734 227
735 228 if not mapfile:
736 229 try:
737 230 mapfile = destc.mapfile()
738 231 except:
739 232 mapfile = os.path.join(destc, "map")
740 233
741 234 c = convert(ui, srcc, destc, mapfile, opts)
742 235 c.convert()
743 236
744 237 cmdtable = {
745 238 "convert":
746 239 (_convert,
747 240 [('', 'datesort', None, 'try to sort changesets by date')],
748 241 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 242 }
This diff has been collapsed as it changes many lines, (661 lines changed) Show them Hide them
@@ -1,749 +1,90 b''
1 # convert.py Foreign SCM converter
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
1 # common code for the convert extension
12 2
13 3 class NoRepo(Exception): pass
14 4
15 5 class commit(object):
16 6 def __init__(self, **parts):
17 7 for x in "author date desc parents".split():
18 8 if not x in parts:
19 9 raise util.Abort("commit missing field %s" % x)
20 10 self.__dict__.update(parts)
21 11
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 12 class converter_source(object):
32 13 """Conversion source interface"""
33 14
34 15 def __init__(self, ui, path):
35 16 """Initialize conversion source (or raise NoRepo("message")
36 17 exception if path is not a valid repository)"""
37 18 raise NotImplementedError()
38 19
39 20 def getheads(self):
40 21 """Return a list of this repository's heads"""
41 22 raise NotImplementedError()
42 23
43 24 def getfile(self, name, rev):
44 25 """Return file contents as a string"""
45 26 raise NotImplementedError()
46 27
47 28 def getmode(self, name, rev):
48 29 """Return file mode, eg. '', 'x', or 'l'"""
49 30 raise NotImplementedError()
50 31
51 32 def getchanges(self, version):
52 33 """Return sorted list of (filename, id) tuples for all files changed in rev.
53 34
54 35 id just tells us which revision to return in getfile(), e.g. in
55 36 git it's an object hash."""
56 37 raise NotImplementedError()
57 38
58 39 def getcommit(self, version):
59 40 """Return the commit object for version"""
60 41 raise NotImplementedError()
61 42
62 43 def gettags(self):
63 44 """Return the tags as a dictionary of name: revision"""
64 45 raise NotImplementedError()
65 46
66 47 class converter_sink(object):
67 48 """Conversion sink (target) interface"""
68 49
69 50 def __init__(self, ui, path):
70 51 """Initialize conversion sink (or raise NoRepo("message")
71 52 exception if path is not a valid repository)"""
72 53 raise NotImplementedError()
73 54
74 55 def getheads(self):
75 56 """Return a list of this repository's heads"""
76 57 raise NotImplementedError()
77 58
78 59 def mapfile(self):
79 60 """Path to a file that will contain lines
80 61 source_rev_id sink_rev_id
81 62 mapping equivalent revision identifiers for each system."""
82 63 raise NotImplementedError()
83 64
84 65 def putfile(self, f, e, data):
85 66 """Put file for next putcommit().
86 67 f: path to file
87 68 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 69 data: file contents"""
89 70 raise NotImplementedError()
90 71
91 72 def delfile(self, f):
92 73 """Delete file for next putcommit().
93 74 f: path to file"""
94 75 raise NotImplementedError()
95 76
96 77 def putcommit(self, files, parents, commit):
97 78 """Create a revision with all changed files listed in 'files'
98 79 and having listed parents. 'commit' is a commit object containing
99 80 at a minimum the author, date, and message for this changeset.
100 81 Called after putfile() and delfile() calls. Note that the sink
101 82 repository is not told to update itself to a particular revision
102 83 (or even what that revision would be) before it receives the
103 84 file data."""
104 85 raise NotImplementedError()
105 86
106 87 def puttags(self, tags):
107 88 """Put tags into sink.
108 89 tags: {tagname: sink_rev_id, ...}"""
109 90 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (513 lines changed) Show them Hide them
@@ -1,749 +1,244 b''
1 # convert.py Foreign SCM converter
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
1 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
53 2
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
3 import os, locale, re, socket
4 from mercurial import util
83 5
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
6 from common import NoRepo, commit, converter_source
95 7
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 8 class convert_cvs(converter_source):
114 9 def __init__(self, ui, path):
115 10 self.path = path
116 11 self.ui = ui
117 12 cvs = os.path.join(path, "CVS")
118 13 if not os.path.exists(cvs):
119 14 raise NoRepo("couldn't open CVS repo %s" % path)
120 15
121 16 self.changeset = {}
122 17 self.files = {}
123 18 self.tags = {}
124 19 self.lastbranch = {}
125 20 self.parent = {}
126 21 self.socket = None
127 22 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 23 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 24 self.encoding = locale.getpreferredencoding()
130 25 self._parse()
131 26 self._connect()
132 27
133 28 def _parse(self):
134 29 if self.changeset:
135 30 return
136 31
137 32 d = os.getcwd()
138 33 try:
139 34 os.chdir(self.path)
140 35 id = None
141 36 state = 0
142 37 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 38 if state == 0: # header
144 39 if l.startswith("PatchSet"):
145 40 id = l[9:-2]
146 41 elif l.startswith("Date"):
147 42 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 43 date = util.datestr(date)
149 44 elif l.startswith("Branch"):
150 45 branch = l[8:-1]
151 46 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 47 self.lastbranch[branch] = id
153 48 elif l.startswith("Ancestor branch"):
154 49 ancestor = l[17:-1]
155 50 self.parent[id] = self.lastbranch[ancestor]
156 51 elif l.startswith("Author"):
157 52 author = self.recode(l[8:-1])
158 53 elif l.startswith("Tag: "):
159 54 t = l[5:-1].rstrip()
160 55 if t != "(none)":
161 56 self.tags[t] = id
162 57 elif l.startswith("Log:"):
163 58 state = 1
164 59 log = ""
165 60 elif state == 1: # log
166 61 if l == "Members: \n":
167 62 files = {}
168 63 log = self.recode(log[:-1])
169 64 if log.isspace():
170 65 log = "*** empty log message ***\n"
171 66 state = 2
172 67 else:
173 68 log += l
174 69 elif state == 2:
175 70 if l == "\n": #
176 71 state = 0
177 72 p = [self.parent[id]]
178 73 if id == "1":
179 74 p = []
180 75 if branch == "HEAD":
181 76 branch = ""
182 77 c = commit(author=author, date=date, parents=p,
183 78 desc=log, branch=branch)
184 79 self.changeset[id] = c
185 80 self.files[id] = files
186 81 else:
187 82 colon = l.rfind(':')
188 83 file = l[1:colon]
189 84 rev = l[colon+1:-2]
190 85 rev = rev.split("->")[1]
191 86 files[file] = rev
192 87
193 88 self.heads = self.lastbranch.values()
194 89 finally:
195 90 os.chdir(d)
196 91
197 92 def _connect(self):
198 93 root = self.cvsroot
199 94 conntype = None
200 95 user, host = None, None
201 96 cmd = ['cvs', 'server']
202 97
203 98 self.ui.status("connecting to %s\n" % root)
204 99
205 100 if root.startswith(":pserver:"):
206 101 root = root[9:]
207 102 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 103 root)
209 104 if m:
210 105 conntype = "pserver"
211 106 user, passw, serv, port, root = m.groups()
212 107 if not user:
213 108 user = "anonymous"
214 109 rr = ":pserver:" + user + "@" + serv + ":" + root
215 110 if port:
216 111 rr2, port = "-", int(port)
217 112 else:
218 113 rr2, port = rr, 2401
219 114 rr += str(port)
220 115
221 116 if not passw:
222 117 passw = "A"
223 118 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 119 for l in pf:
225 120 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 121 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 122 l = m.group(2)
228 123 w, p = l.split(' ', 1)
229 124 if w in [rr, rr2]:
230 125 passw = p
231 126 break
232 127 pf.close()
233 128
234 129 sck = socket.socket()
235 130 sck.connect((serv, port))
236 131 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 132 "END AUTH REQUEST", ""]))
238 133 if sck.recv(128) != "I LOVE YOU\n":
239 134 raise NoRepo("CVS pserver authentication failed")
240 135
241 136 self.writep = self.readp = sck.makefile('r+')
242 137
243 138 if not conntype and root.startswith(":local:"):
244 139 conntype = "local"
245 140 root = root[7:]
246 141
247 142 if not conntype:
248 143 # :ext:user@host/home/user/path/to/cvsroot
249 144 if root.startswith(":ext:"):
250 145 root = root[5:]
251 146 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 147 if not m:
253 148 conntype = "local"
254 149 else:
255 150 conntype = "rsh"
256 151 user, host, root = m.group(1), m.group(2), m.group(3)
257 152
258 153 if conntype != "pserver":
259 154 if conntype == "rsh":
260 155 rsh = os.environ.get("CVS_RSH" or "rsh")
261 156 if user:
262 157 cmd = [rsh, '-l', user, host] + cmd
263 158 else:
264 159 cmd = [rsh, host] + cmd
265 160
266 161 self.writep, self.readp = os.popen2(cmd)
267 162
268 163 self.realroot = root
269 164
270 165 self.writep.write("Root %s\n" % root)
271 166 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 167 " M Mbinary E Checked-in Created Updated"
273 168 " Merged Removed\n")
274 169 self.writep.write("valid-requests\n")
275 170 self.writep.flush()
276 171 r = self.readp.readline()
277 172 if not r.startswith("Valid-requests"):
278 173 raise util.Abort("server sucks")
279 174 if "UseUnchanged" in r:
280 175 self.writep.write("UseUnchanged\n")
281 176 self.writep.flush()
282 177 r = self.readp.readline()
283 178
284 179 def getheads(self):
285 180 return self.heads
286 181
287 182 def _getfile(self, name, rev):
288 183 if rev.endswith("(DEAD)"):
289 184 raise IOError
290 185
291 186 args = ("-N -P -kk -r %s --" % rev).split()
292 187 args.append(os.path.join(self.cvsrepo, name))
293 188 for x in args:
294 189 self.writep.write("Argument %s\n" % x)
295 190 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 191 self.writep.flush()
297 192
298 193 data = ""
299 194 while 1:
300 195 line = self.readp.readline()
301 196 if line.startswith("Created ") or line.startswith("Updated "):
302 197 self.readp.readline() # path
303 198 self.readp.readline() # entries
304 199 mode = self.readp.readline()[:-1]
305 200 count = int(self.readp.readline()[:-1])
306 201 data = self.readp.read(count)
307 202 elif line.startswith(" "):
308 203 data += line[1:]
309 204 elif line.startswith("M "):
310 205 pass
311 206 elif line.startswith("Mbinary "):
312 207 count = int(self.readp.readline()[:-1])
313 208 data = self.readp.read(count)
314 209 else:
315 210 if line == "ok\n":
316 211 return (data, "x" in mode and "x" or "")
317 212 elif line.startswith("E "):
318 213 self.ui.warn("cvs server: %s\n" % line[2:])
319 214 elif line.startswith("Remove"):
320 215 l = self.readp.readline()
321 216 l = self.readp.readline()
322 217 if l != "ok\n":
323 218 raise util.Abort("unknown CVS response: %s" % l)
324 219 else:
325 220 raise util.Abort("unknown CVS response: %s" % line)
326 221
327 222 def getfile(self, file, rev):
328 223 data, mode = self._getfile(file, rev)
329 224 self.modecache[(file, rev)] = mode
330 225 return data
331 226
332 227 def getmode(self, file, rev):
333 228 return self.modecache[(file, rev)]
334 229
335 230 def getchanges(self, rev):
336 231 self.modecache = {}
337 232 files = self.files[rev]
338 233 cl = files.items()
339 234 cl.sort()
340 235 return cl
341 236
342 237 def recode(self, text):
343 238 return text.decode(self.encoding, "replace").encode("utf-8")
344 239
345 240 def getcommit(self, rev):
346 241 return self.changeset[rev]
347 242
348 243 def gettags(self):
349 244 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (652 lines changed) Show them Hide them
@@ -1,749 +1,103 b''
1 # convert.py Foreign SCM converter
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
1 # git support for the convert extension
10 2
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
3 import os
14 4
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
5 from common import NoRepo, commit, converter_source
21 6
22 7 def recode(s):
23 8 try:
24 9 return s.decode("utf-8").encode("utf-8")
25 10 except:
26 11 try:
27 12 return s.decode("latin-1").encode("utf-8")
28 13 except:
29 14 return s.decode("utf-8", "replace").encode("utf-8")
30 15
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 16 class convert_git(converter_source):
352 17 def __init__(self, ui, path):
353 18 if os.path.isdir(path + "/.git"):
354 19 path += "/.git"
355 20 self.path = path
356 21 self.ui = ui
357 22 if not os.path.exists(path + "/objects"):
358 23 raise NoRepo("couldn't open GIT repo %s" % path)
359 24
360 25 def getheads(self):
361 26 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 27 return [fh.read()[:-1]]
363 28
364 29 def catfile(self, rev, type):
365 30 if rev == "0" * 40: raise IOError()
366 31 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 32 % (self.path, type, rev))
368 33 return fh.read()
369 34
370 35 def getfile(self, name, rev):
371 36 return self.catfile(rev, "blob")
372 37
373 38 def getmode(self, name, rev):
374 39 return self.modecache[(name, rev)]
375 40
376 41 def getchanges(self, version):
377 42 self.modecache = {}
378 43 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 44 % (self.path, version))
380 45 changes = []
381 46 for l in fh:
382 47 if "\t" not in l: continue
383 48 m, f = l[:-1].split("\t")
384 49 m = m.split()
385 50 h = m[3]
386 51 p = (m[1] == "100755")
387 52 s = (m[1] == "120000")
388 53 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 54 changes.append((f, h))
390 55 return changes
391 56
392 57 def getcommit(self, version):
393 58 c = self.catfile(version, "commit") # read the commit hash
394 59 end = c.find("\n\n")
395 60 message = c[end+2:]
396 61 message = recode(message)
397 62 l = c[:end].splitlines()
398 63 manifest = l[0].split()[1]
399 64 parents = []
400 65 for e in l[1:]:
401 66 n, v = e.split(" ", 1)
402 67 if n == "author":
403 68 p = v.split()
404 69 tm, tz = p[-2:]
405 70 author = " ".join(p[:-2])
406 71 if author[0] == "<": author = author[1:-1]
407 72 author = recode(author)
408 73 if n == "committer":
409 74 p = v.split()
410 75 tm, tz = p[-2:]
411 76 committer = " ".join(p[:-2])
412 77 if committer[0] == "<": committer = committer[1:-1]
413 78 committer = recode(committer)
414 79 message += "\ncommitter: %s\n" % committer
415 80 if n == "parent": parents.append(v)
416 81
417 82 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 83 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 84 date = tm + " " + str(tz)
420 85
421 86 c = commit(parents=parents, date=date, author=author, desc=message)
422 87 return c
423 88
424 89 def gettags(self):
425 90 tags = {}
426 91 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 92 prefix = 'refs/tags/'
428 93 for line in fh:
429 94 line = line.strip()
430 95 if not line.endswith("^{}"):
431 96 continue
432 97 node, tag = line.split(None, 1)
433 98 if not tag.startswith(prefix):
434 99 continue
435 100 tag = tag[len(prefix):-3]
436 101 tags[tag] = node
437 102
438 103 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (666 lines changed) Show them Hide them
@@ -1,749 +1,91 b''
1 # convert.py Foreign SCM converter
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
1 # hg backend for convert extension
204 2
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
3 import os, time
4 from mercurial import hg
297 5
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
6 from common import NoRepo, converter_sink
439 7
440 8 class convert_mercurial(converter_sink):
441 9 def __init__(self, ui, path):
442 10 self.path = path
443 11 self.ui = ui
444 12 try:
445 13 self.repo = hg.repository(self.ui, path)
446 14 except:
447 15 raise NoRepo("could open hg repo %s" % path)
448 16
449 17 def mapfile(self):
450 18 return os.path.join(self.path, ".hg", "shamap")
451 19
452 20 def getheads(self):
453 21 h = self.repo.changelog.heads()
454 22 return [ hg.hex(x) for x in h ]
455 23
456 24 def putfile(self, f, e, data):
457 25 self.repo.wwrite(f, data, e)
458 26 if self.repo.dirstate.state(f) == '?':
459 27 self.repo.dirstate.update([f], "a")
460 28
461 29 def delfile(self, f):
462 30 try:
463 31 os.unlink(self.repo.wjoin(f))
464 32 #self.repo.remove([f])
465 33 except:
466 34 pass
467 35
468 36 def putcommit(self, files, parents, commit):
469 37 seen = {}
470 38 pl = []
471 39 for p in parents:
472 40 if p not in seen:
473 41 pl.append(p)
474 42 seen[p] = 1
475 43 parents = pl
476 44
477 45 if len(parents) < 2: parents.append("0" * 40)
478 46 if len(parents) < 2: parents.append("0" * 40)
479 47 p2 = parents.pop(0)
480 48
481 49 text = commit.desc
482 50 extra = {}
483 51 try:
484 52 extra["branch"] = commit.branch
485 53 except AttributeError:
486 54 pass
487 55
488 56 while parents:
489 57 p1 = p2
490 58 p2 = parents.pop(0)
491 59 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 60 hg.bin(p1), hg.bin(p2), extra=extra)
493 61 text = "(octopus merge fixup)\n"
494 62 p2 = hg.hex(self.repo.changelog.tip())
495 63
496 64 return p2
497 65
498 66 def puttags(self, tags):
499 67 try:
500 68 old = self.repo.wfile(".hgtags").read()
501 69 oldlines = old.splitlines(1)
502 70 oldlines.sort()
503 71 except:
504 72 oldlines = []
505 73
506 74 k = tags.keys()
507 75 k.sort()
508 76 newlines = []
509 77 for tag in k:
510 78 newlines.append("%s %s\n" % (tags[tag], tag))
511 79
512 80 newlines.sort()
513 81
514 82 if newlines != oldlines:
515 83 self.ui.status("updating tags\n")
516 84 f = self.repo.wfile(".hgtags", "w")
517 85 f.write("".join(newlines))
518 86 f.close()
519 87 if not oldlines: self.repo.add([".hgtags"])
520 88 date = "%s 0" % int(time.mktime(time.gmtime()))
521 89 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 90 date, self.repo.changelog.tip(), hg.nullid)
523 91 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
General Comments 0
You need to be logged in to leave comments. Login now