##// END OF EJS Templates
Split convert extension into common and repository type modules
Brendan Cully -
r4536:cc9b7921 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (519 lines changed) Show them Hide them
@@ -1,749 +1,242 b''
1 # convert.py Foreign SCM converter
1 # convert.py Foreign SCM converter
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 import sys, os, zlib, sha, time, re, locale, socket
8 from common import NoRepo
9 from cvs import convert_cvs
10 from git import convert_git
11 from hg import convert_mercurial
12
13 import os
9 from mercurial import hg, ui, util, commands
14 from mercurial import hg, ui, util, commands
10
15
11 commands.norepo += " convert"
16 commands.norepo += " convert"
12
17
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
18 converters = [convert_cvs, convert_git, convert_mercurial]
526
19
527 def converter(ui, path):
20 def converter(ui, path):
528 if not os.path.isdir(path):
21 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
22 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
23 for c in converters:
531 try:
24 try:
532 return c(ui, path)
25 return c(ui, path)
533 except NoRepo:
26 except NoRepo:
534 pass
27 pass
535 raise util.Abort("%s: unknown repository type" % path)
28 raise util.Abort("%s: unknown repository type" % path)
536
29
537 class convert(object):
30 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
31 def __init__(self, ui, source, dest, mapfile, opts):
539
32
540 self.source = source
33 self.source = source
541 self.dest = dest
34 self.dest = dest
542 self.ui = ui
35 self.ui = ui
543 self.mapfile = mapfile
36 self.mapfile = mapfile
544 self.opts = opts
37 self.opts = opts
545 self.commitcache = {}
38 self.commitcache = {}
546
39
547 self.map = {}
40 self.map = {}
548 try:
41 try:
549 for l in file(self.mapfile):
42 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
43 sv, dv = l[:-1].split()
551 self.map[sv] = dv
44 self.map[sv] = dv
552 except IOError:
45 except IOError:
553 pass
46 pass
554
47
555 def walktree(self, heads):
48 def walktree(self, heads):
556 visit = heads
49 visit = heads
557 known = {}
50 known = {}
558 parents = {}
51 parents = {}
559 while visit:
52 while visit:
560 n = visit.pop(0)
53 n = visit.pop(0)
561 if n in known or n in self.map: continue
54 if n in known or n in self.map: continue
562 known[n] = 1
55 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
56 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
57 cp = self.commitcache[n].parents
565 for p in cp:
58 for p in cp:
566 parents.setdefault(n, []).append(p)
59 parents.setdefault(n, []).append(p)
567 visit.append(p)
60 visit.append(p)
568
61
569 return parents
62 return parents
570
63
571 def toposort(self, parents):
64 def toposort(self, parents):
572 visit = parents.keys()
65 visit = parents.keys()
573 seen = {}
66 seen = {}
574 children = {}
67 children = {}
575
68
576 while visit:
69 while visit:
577 n = visit.pop(0)
70 n = visit.pop(0)
578 if n in seen: continue
71 if n in seen: continue
579 seen[n] = 1
72 seen[n] = 1
580 pc = 0
73 pc = 0
581 if n in parents:
74 if n in parents:
582 for p in parents[n]:
75 for p in parents[n]:
583 if p not in self.map: pc += 1
76 if p not in self.map: pc += 1
584 visit.append(p)
77 visit.append(p)
585 children.setdefault(p, []).append(n)
78 children.setdefault(p, []).append(n)
586 if not pc: root = n
79 if not pc: root = n
587
80
588 s = []
81 s = []
589 removed = {}
82 removed = {}
590 visit = children.keys()
83 visit = children.keys()
591 while visit:
84 while visit:
592 n = visit.pop(0)
85 n = visit.pop(0)
593 if n in removed: continue
86 if n in removed: continue
594 dep = 0
87 dep = 0
595 if n in parents:
88 if n in parents:
596 for p in parents[n]:
89 for p in parents[n]:
597 if p in self.map: continue
90 if p in self.map: continue
598 if p not in removed:
91 if p not in removed:
599 # we're still dependent
92 # we're still dependent
600 visit.append(n)
93 visit.append(n)
601 dep = 1
94 dep = 1
602 break
95 break
603
96
604 if not dep:
97 if not dep:
605 # all n's parents are in the list
98 # all n's parents are in the list
606 removed[n] = 1
99 removed[n] = 1
607 if n not in self.map:
100 if n not in self.map:
608 s.append(n)
101 s.append(n)
609 if n in children:
102 if n in children:
610 for c in children[n]:
103 for c in children[n]:
611 visit.insert(0, c)
104 visit.insert(0, c)
612
105
613 if self.opts.get('datesort'):
106 if self.opts.get('datesort'):
614 depth = {}
107 depth = {}
615 for n in s:
108 for n in s:
616 depth[n] = 0
109 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
110 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
111 if p not in self.map]
619 if pl:
112 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
113 depth[n] = max([depth[p] for p in pl]) + 1
621
114
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
115 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
116 s.sort()
624 s = [e[2] for e in s]
117 s = [e[2] for e in s]
625
118
626 return s
119 return s
627
120
628 def copy(self, rev):
121 def copy(self, rev):
629 c = self.commitcache[rev]
122 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
123 files = self.source.getchanges(rev)
631
124
632 for f, v in files:
125 for f, v in files:
633 try:
126 try:
634 data = self.source.getfile(f, v)
127 data = self.source.getfile(f, v)
635 except IOError, inst:
128 except IOError, inst:
636 self.dest.delfile(f)
129 self.dest.delfile(f)
637 else:
130 else:
638 e = self.source.getmode(f, v)
131 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
132 self.dest.putfile(f, e, data)
640
133
641 r = [self.map[v] for v in c.parents]
134 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
135 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
136 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
137 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
138
646 def convert(self):
139 def convert(self):
647 self.ui.status("scanning source...\n")
140 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
141 heads = self.source.getheads()
649 parents = self.walktree(heads)
142 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
143 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
144 t = self.toposort(parents)
652 num = len(t)
145 num = len(t)
653 c = None
146 c = None
654
147
655 self.ui.status("converting...\n")
148 self.ui.status("converting...\n")
656 for c in t:
149 for c in t:
657 num -= 1
150 num -= 1
658 desc = self.commitcache[c].desc
151 desc = self.commitcache[c].desc
659 if "\n" in desc:
152 if "\n" in desc:
660 desc = desc.splitlines()[0]
153 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
154 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
155 self.copy(c)
663
156
664 tags = self.source.gettags()
157 tags = self.source.gettags()
665 ctags = {}
158 ctags = {}
666 for k in tags:
159 for k in tags:
667 v = tags[k]
160 v = tags[k]
668 if v in self.map:
161 if v in self.map:
669 ctags[k] = self.map[v]
162 ctags[k] = self.map[v]
670
163
671 if c and ctags:
164 if c and ctags:
672 nrev = self.dest.puttags(ctags)
165 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
166 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
167 # one so we don't end up with extra tag heads
675 if nrev:
168 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
169 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
170
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
171 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
172 '''Convert a foreign SCM repository to a Mercurial one.
680
173
681 Accepted source formats:
174 Accepted source formats:
682 - GIT
175 - GIT
683 - CVS
176 - CVS
684
177
685 Accepted destination formats:
178 Accepted destination formats:
686 - Mercurial
179 - Mercurial
687
180
688 If destination isn't given, a new Mercurial repo named <src>-hg will
181 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
182 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
183 location (<dest>/.hg/shamap by default)
691
184
692 The <mapfile> is a simple text file that maps each source commit ID to
185 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
186 the destination ID for that revision, like so:
694
187
695 <source ID> <destination ID>
188 <source ID> <destination ID>
696
189
697 If the file doesn't exist, it's automatically created. It's updated
190 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
191 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
192 be run repeatedly to copy new commits.
700 '''
193 '''
701
194
702 srcc = converter(ui, src)
195 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
196 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
197 raise util.Abort("%s: can't read from this repo type" % src)
705
198
706 if not dest:
199 if not dest:
707 dest = src + "-hg"
200 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
201 ui.status("assuming destination %s\n" % dest)
709
202
710 # Try to be smart and initalize things when required
203 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
204 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
205 if len(os.listdir(dest)) > 0:
713 try:
206 try:
714 hg.repository(ui, dest)
207 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
208 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
209 except hg.RepoError:
717 raise util.Abort(
210 raise util.Abort(
718 "destination directory %s is not empty.\n"
211 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
212 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
213 "or an already initialized mercurial repository"
721 % dest)
214 % dest)
722 else:
215 else:
723 ui.status("initializing destination %s repository\n" % dest)
216 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
217 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
218 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
219 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
220 else:
728 ui.status("initializing destination %s repository\n" % dest)
221 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
222 hg.repository(ui, dest, create=True)
730
223
731 destc = converter(ui, dest)
224 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
225 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
226 raise util.Abort("%s: can't write to this repo type" % src)
734
227
735 if not mapfile:
228 if not mapfile:
736 try:
229 try:
737 mapfile = destc.mapfile()
230 mapfile = destc.mapfile()
738 except:
231 except:
739 mapfile = os.path.join(destc, "map")
232 mapfile = os.path.join(destc, "map")
740
233
741 c = convert(ui, srcc, destc, mapfile, opts)
234 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
235 c.convert()
743
236
744 cmdtable = {
237 cmdtable = {
745 "convert":
238 "convert":
746 (_convert,
239 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
240 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
241 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
242 }
This diff has been collapsed as it changes many lines, (661 lines changed) Show them Hide them
@@ -1,749 +1,90 b''
1 # convert.py Foreign SCM converter
1 # common code for the convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
2
13 class NoRepo(Exception): pass
3 class NoRepo(Exception): pass
14
4
15 class commit(object):
5 class commit(object):
16 def __init__(self, **parts):
6 def __init__(self, **parts):
17 for x in "author date desc parents".split():
7 for x in "author date desc parents".split():
18 if not x in parts:
8 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
9 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
10 self.__dict__.update(parts)
21
11
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
12 class converter_source(object):
32 """Conversion source interface"""
13 """Conversion source interface"""
33
14
34 def __init__(self, ui, path):
15 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
16 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
17 exception if path is not a valid repository)"""
37 raise NotImplementedError()
18 raise NotImplementedError()
38
19
39 def getheads(self):
20 def getheads(self):
40 """Return a list of this repository's heads"""
21 """Return a list of this repository's heads"""
41 raise NotImplementedError()
22 raise NotImplementedError()
42
23
43 def getfile(self, name, rev):
24 def getfile(self, name, rev):
44 """Return file contents as a string"""
25 """Return file contents as a string"""
45 raise NotImplementedError()
26 raise NotImplementedError()
46
27
47 def getmode(self, name, rev):
28 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
29 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
30 raise NotImplementedError()
50
31
51 def getchanges(self, version):
32 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
33 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
34
54 id just tells us which revision to return in getfile(), e.g. in
35 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
36 git it's an object hash."""
56 raise NotImplementedError()
37 raise NotImplementedError()
57
38
58 def getcommit(self, version):
39 def getcommit(self, version):
59 """Return the commit object for version"""
40 """Return the commit object for version"""
60 raise NotImplementedError()
41 raise NotImplementedError()
61
42
62 def gettags(self):
43 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
44 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
45 raise NotImplementedError()
65
46
66 class converter_sink(object):
47 class converter_sink(object):
67 """Conversion sink (target) interface"""
48 """Conversion sink (target) interface"""
68
49
69 def __init__(self, ui, path):
50 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
51 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
52 exception if path is not a valid repository)"""
72 raise NotImplementedError()
53 raise NotImplementedError()
73
54
74 def getheads(self):
55 def getheads(self):
75 """Return a list of this repository's heads"""
56 """Return a list of this repository's heads"""
76 raise NotImplementedError()
57 raise NotImplementedError()
77
58
78 def mapfile(self):
59 def mapfile(self):
79 """Path to a file that will contain lines
60 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
61 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
62 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
63 raise NotImplementedError()
83
64
84 def putfile(self, f, e, data):
65 def putfile(self, f, e, data):
85 """Put file for next putcommit().
66 """Put file for next putcommit().
86 f: path to file
67 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
68 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
69 data: file contents"""
89 raise NotImplementedError()
70 raise NotImplementedError()
90
71
91 def delfile(self, f):
72 def delfile(self, f):
92 """Delete file for next putcommit().
73 """Delete file for next putcommit().
93 f: path to file"""
74 f: path to file"""
94 raise NotImplementedError()
75 raise NotImplementedError()
95
76
96 def putcommit(self, files, parents, commit):
77 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
78 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
79 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
80 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
81 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
82 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
83 (or even what that revision would be) before it receives the
103 file data."""
84 file data."""
104 raise NotImplementedError()
85 raise NotImplementedError()
105
86
106 def puttags(self, tags):
87 def puttags(self, tags):
107 """Put tags into sink.
88 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
89 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
90 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (513 lines changed) Show them Hide them
@@ -1,749 +1,244 b''
1 # convert.py Foreign SCM converter
1 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
2
54 id just tells us which revision to return in getfile(), e.g. in
3 import os, locale, re, socket
55 git it's an object hash."""
4 from mercurial import util
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
5
84 def putfile(self, f, e, data):
6 from common import NoRepo, commit, converter_source
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
7
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
8 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
9 def __init__(self, ui, path):
115 self.path = path
10 self.path = path
116 self.ui = ui
11 self.ui = ui
117 cvs = os.path.join(path, "CVS")
12 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
13 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
14 raise NoRepo("couldn't open CVS repo %s" % path)
120
15
121 self.changeset = {}
16 self.changeset = {}
122 self.files = {}
17 self.files = {}
123 self.tags = {}
18 self.tags = {}
124 self.lastbranch = {}
19 self.lastbranch = {}
125 self.parent = {}
20 self.parent = {}
126 self.socket = None
21 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
22 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
23 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
24 self.encoding = locale.getpreferredencoding()
130 self._parse()
25 self._parse()
131 self._connect()
26 self._connect()
132
27
133 def _parse(self):
28 def _parse(self):
134 if self.changeset:
29 if self.changeset:
135 return
30 return
136
31
137 d = os.getcwd()
32 d = os.getcwd()
138 try:
33 try:
139 os.chdir(self.path)
34 os.chdir(self.path)
140 id = None
35 id = None
141 state = 0
36 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
37 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
38 if state == 0: # header
144 if l.startswith("PatchSet"):
39 if l.startswith("PatchSet"):
145 id = l[9:-2]
40 id = l[9:-2]
146 elif l.startswith("Date"):
41 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
42 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
43 date = util.datestr(date)
149 elif l.startswith("Branch"):
44 elif l.startswith("Branch"):
150 branch = l[8:-1]
45 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
46 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
47 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
48 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
49 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
50 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
51 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
52 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
53 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
54 t = l[5:-1].rstrip()
160 if t != "(none)":
55 if t != "(none)":
161 self.tags[t] = id
56 self.tags[t] = id
162 elif l.startswith("Log:"):
57 elif l.startswith("Log:"):
163 state = 1
58 state = 1
164 log = ""
59 log = ""
165 elif state == 1: # log
60 elif state == 1: # log
166 if l == "Members: \n":
61 if l == "Members: \n":
167 files = {}
62 files = {}
168 log = self.recode(log[:-1])
63 log = self.recode(log[:-1])
169 if log.isspace():
64 if log.isspace():
170 log = "*** empty log message ***\n"
65 log = "*** empty log message ***\n"
171 state = 2
66 state = 2
172 else:
67 else:
173 log += l
68 log += l
174 elif state == 2:
69 elif state == 2:
175 if l == "\n": #
70 if l == "\n": #
176 state = 0
71 state = 0
177 p = [self.parent[id]]
72 p = [self.parent[id]]
178 if id == "1":
73 if id == "1":
179 p = []
74 p = []
180 if branch == "HEAD":
75 if branch == "HEAD":
181 branch = ""
76 branch = ""
182 c = commit(author=author, date=date, parents=p,
77 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
78 desc=log, branch=branch)
184 self.changeset[id] = c
79 self.changeset[id] = c
185 self.files[id] = files
80 self.files[id] = files
186 else:
81 else:
187 colon = l.rfind(':')
82 colon = l.rfind(':')
188 file = l[1:colon]
83 file = l[1:colon]
189 rev = l[colon+1:-2]
84 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
85 rev = rev.split("->")[1]
191 files[file] = rev
86 files[file] = rev
192
87
193 self.heads = self.lastbranch.values()
88 self.heads = self.lastbranch.values()
194 finally:
89 finally:
195 os.chdir(d)
90 os.chdir(d)
196
91
197 def _connect(self):
92 def _connect(self):
198 root = self.cvsroot
93 root = self.cvsroot
199 conntype = None
94 conntype = None
200 user, host = None, None
95 user, host = None, None
201 cmd = ['cvs', 'server']
96 cmd = ['cvs', 'server']
202
97
203 self.ui.status("connecting to %s\n" % root)
98 self.ui.status("connecting to %s\n" % root)
204
99
205 if root.startswith(":pserver:"):
100 if root.startswith(":pserver:"):
206 root = root[9:]
101 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
102 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
103 root)
209 if m:
104 if m:
210 conntype = "pserver"
105 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
106 user, passw, serv, port, root = m.groups()
212 if not user:
107 if not user:
213 user = "anonymous"
108 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
109 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
110 if port:
216 rr2, port = "-", int(port)
111 rr2, port = "-", int(port)
217 else:
112 else:
218 rr2, port = rr, 2401
113 rr2, port = rr, 2401
219 rr += str(port)
114 rr += str(port)
220
115
221 if not passw:
116 if not passw:
222 passw = "A"
117 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
118 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
119 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
120 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
121 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
122 l = m.group(2)
228 w, p = l.split(' ', 1)
123 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
124 if w in [rr, rr2]:
230 passw = p
125 passw = p
231 break
126 break
232 pf.close()
127 pf.close()
233
128
234 sck = socket.socket()
129 sck = socket.socket()
235 sck.connect((serv, port))
130 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
131 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
132 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
133 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
134 raise NoRepo("CVS pserver authentication failed")
240
135
241 self.writep = self.readp = sck.makefile('r+')
136 self.writep = self.readp = sck.makefile('r+')
242
137
243 if not conntype and root.startswith(":local:"):
138 if not conntype and root.startswith(":local:"):
244 conntype = "local"
139 conntype = "local"
245 root = root[7:]
140 root = root[7:]
246
141
247 if not conntype:
142 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
143 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
144 if root.startswith(":ext:"):
250 root = root[5:]
145 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
146 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
147 if not m:
253 conntype = "local"
148 conntype = "local"
254 else:
149 else:
255 conntype = "rsh"
150 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
151 user, host, root = m.group(1), m.group(2), m.group(3)
257
152
258 if conntype != "pserver":
153 if conntype != "pserver":
259 if conntype == "rsh":
154 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
155 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
156 if user:
262 cmd = [rsh, '-l', user, host] + cmd
157 cmd = [rsh, '-l', user, host] + cmd
263 else:
158 else:
264 cmd = [rsh, host] + cmd
159 cmd = [rsh, host] + cmd
265
160
266 self.writep, self.readp = os.popen2(cmd)
161 self.writep, self.readp = os.popen2(cmd)
267
162
268 self.realroot = root
163 self.realroot = root
269
164
270 self.writep.write("Root %s\n" % root)
165 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
166 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
167 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
168 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
169 self.writep.write("valid-requests\n")
275 self.writep.flush()
170 self.writep.flush()
276 r = self.readp.readline()
171 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
172 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
173 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
174 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
175 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
176 self.writep.flush()
282 r = self.readp.readline()
177 r = self.readp.readline()
283
178
284 def getheads(self):
179 def getheads(self):
285 return self.heads
180 return self.heads
286
181
287 def _getfile(self, name, rev):
182 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
183 if rev.endswith("(DEAD)"):
289 raise IOError
184 raise IOError
290
185
291 args = ("-N -P -kk -r %s --" % rev).split()
186 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
187 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
188 for x in args:
294 self.writep.write("Argument %s\n" % x)
189 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
190 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
191 self.writep.flush()
297
192
298 data = ""
193 data = ""
299 while 1:
194 while 1:
300 line = self.readp.readline()
195 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
196 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
197 self.readp.readline() # path
303 self.readp.readline() # entries
198 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
199 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
200 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
201 data = self.readp.read(count)
307 elif line.startswith(" "):
202 elif line.startswith(" "):
308 data += line[1:]
203 data += line[1:]
309 elif line.startswith("M "):
204 elif line.startswith("M "):
310 pass
205 pass
311 elif line.startswith("Mbinary "):
206 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
207 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
208 data = self.readp.read(count)
314 else:
209 else:
315 if line == "ok\n":
210 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
211 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
212 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
213 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
214 elif line.startswith("Remove"):
320 l = self.readp.readline()
215 l = self.readp.readline()
321 l = self.readp.readline()
216 l = self.readp.readline()
322 if l != "ok\n":
217 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
218 raise util.Abort("unknown CVS response: %s" % l)
324 else:
219 else:
325 raise util.Abort("unknown CVS response: %s" % line)
220 raise util.Abort("unknown CVS response: %s" % line)
326
221
327 def getfile(self, file, rev):
222 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
223 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
224 self.modecache[(file, rev)] = mode
330 return data
225 return data
331
226
332 def getmode(self, file, rev):
227 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
228 return self.modecache[(file, rev)]
334
229
335 def getchanges(self, rev):
230 def getchanges(self, rev):
336 self.modecache = {}
231 self.modecache = {}
337 files = self.files[rev]
232 files = self.files[rev]
338 cl = files.items()
233 cl = files.items()
339 cl.sort()
234 cl.sort()
340 return cl
235 return cl
341
236
342 def recode(self, text):
237 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
238 return text.decode(self.encoding, "replace").encode("utf-8")
344
239
345 def getcommit(self, rev):
240 def getcommit(self, rev):
346 return self.changeset[rev]
241 return self.changeset[rev]
347
242
348 def gettags(self):
243 def gettags(self):
349 return self.tags
244 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (652 lines changed) Show them Hide them
@@ -1,749 +1,103 b''
1 # convert.py Foreign SCM converter
1 # git support for the convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
2
11 commands.norepo += " convert"
3 import os
12
13 class NoRepo(Exception): pass
14
4
15 class commit(object):
5 from common import NoRepo, commit, converter_source
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
6
22 def recode(s):
7 def recode(s):
23 try:
8 try:
24 return s.decode("utf-8").encode("utf-8")
9 return s.decode("utf-8").encode("utf-8")
25 except:
10 except:
26 try:
11 try:
27 return s.decode("latin-1").encode("utf-8")
12 return s.decode("latin-1").encode("utf-8")
28 except:
13 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
14 return s.decode("utf-8", "replace").encode("utf-8")
30
15
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
16 class convert_git(converter_source):
352 def __init__(self, ui, path):
17 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
18 if os.path.isdir(path + "/.git"):
354 path += "/.git"
19 path += "/.git"
355 self.path = path
20 self.path = path
356 self.ui = ui
21 self.ui = ui
357 if not os.path.exists(path + "/objects"):
22 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
23 raise NoRepo("couldn't open GIT repo %s" % path)
359
24
360 def getheads(self):
25 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
26 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
27 return [fh.read()[:-1]]
363
28
364 def catfile(self, rev, type):
29 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
30 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
31 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
32 % (self.path, type, rev))
368 return fh.read()
33 return fh.read()
369
34
370 def getfile(self, name, rev):
35 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
36 return self.catfile(rev, "blob")
372
37
373 def getmode(self, name, rev):
38 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
39 return self.modecache[(name, rev)]
375
40
376 def getchanges(self, version):
41 def getchanges(self, version):
377 self.modecache = {}
42 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
43 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
44 % (self.path, version))
380 changes = []
45 changes = []
381 for l in fh:
46 for l in fh:
382 if "\t" not in l: continue
47 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
48 m, f = l[:-1].split("\t")
384 m = m.split()
49 m = m.split()
385 h = m[3]
50 h = m[3]
386 p = (m[1] == "100755")
51 p = (m[1] == "100755")
387 s = (m[1] == "120000")
52 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
53 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
54 changes.append((f, h))
390 return changes
55 return changes
391
56
392 def getcommit(self, version):
57 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
58 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
59 end = c.find("\n\n")
395 message = c[end+2:]
60 message = c[end+2:]
396 message = recode(message)
61 message = recode(message)
397 l = c[:end].splitlines()
62 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
63 manifest = l[0].split()[1]
399 parents = []
64 parents = []
400 for e in l[1:]:
65 for e in l[1:]:
401 n, v = e.split(" ", 1)
66 n, v = e.split(" ", 1)
402 if n == "author":
67 if n == "author":
403 p = v.split()
68 p = v.split()
404 tm, tz = p[-2:]
69 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
70 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
71 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
72 author = recode(author)
408 if n == "committer":
73 if n == "committer":
409 p = v.split()
74 p = v.split()
410 tm, tz = p[-2:]
75 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
76 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
77 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
78 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
79 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
80 if n == "parent": parents.append(v)
416
81
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
82 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
83 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
84 date = tm + " " + str(tz)
420
85
421 c = commit(parents=parents, date=date, author=author, desc=message)
86 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
87 return c
423
88
424 def gettags(self):
89 def gettags(self):
425 tags = {}
90 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
91 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
92 prefix = 'refs/tags/'
428 for line in fh:
93 for line in fh:
429 line = line.strip()
94 line = line.strip()
430 if not line.endswith("^{}"):
95 if not line.endswith("^{}"):
431 continue
96 continue
432 node, tag = line.split(None, 1)
97 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
98 if not tag.startswith(prefix):
434 continue
99 continue
435 tag = tag[len(prefix):-3]
100 tag = tag[len(prefix):-3]
436 tags[tag] = node
101 tags[tag] = node
437
102
438 return tags
103 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (666 lines changed) Show them Hide them
@@ -1,749 +1,91 b''
1 # convert.py Foreign SCM converter
1 # hg backend for convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
2
205 if root.startswith(":pserver:"):
3 import os, time
206 root = root[9:]
4 from mercurial import hg
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
5
298 data = ""
6 from common import NoRepo, converter_sink
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
7
440 class convert_mercurial(converter_sink):
8 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
9 def __init__(self, ui, path):
442 self.path = path
10 self.path = path
443 self.ui = ui
11 self.ui = ui
444 try:
12 try:
445 self.repo = hg.repository(self.ui, path)
13 self.repo = hg.repository(self.ui, path)
446 except:
14 except:
447 raise NoRepo("could open hg repo %s" % path)
15 raise NoRepo("could open hg repo %s" % path)
448
16
449 def mapfile(self):
17 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
18 return os.path.join(self.path, ".hg", "shamap")
451
19
452 def getheads(self):
20 def getheads(self):
453 h = self.repo.changelog.heads()
21 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
22 return [ hg.hex(x) for x in h ]
455
23
456 def putfile(self, f, e, data):
24 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
25 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
26 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
27 self.repo.dirstate.update([f], "a")
460
28
461 def delfile(self, f):
29 def delfile(self, f):
462 try:
30 try:
463 os.unlink(self.repo.wjoin(f))
31 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
32 #self.repo.remove([f])
465 except:
33 except:
466 pass
34 pass
467
35
468 def putcommit(self, files, parents, commit):
36 def putcommit(self, files, parents, commit):
469 seen = {}
37 seen = {}
470 pl = []
38 pl = []
471 for p in parents:
39 for p in parents:
472 if p not in seen:
40 if p not in seen:
473 pl.append(p)
41 pl.append(p)
474 seen[p] = 1
42 seen[p] = 1
475 parents = pl
43 parents = pl
476
44
477 if len(parents) < 2: parents.append("0" * 40)
45 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
46 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
47 p2 = parents.pop(0)
480
48
481 text = commit.desc
49 text = commit.desc
482 extra = {}
50 extra = {}
483 try:
51 try:
484 extra["branch"] = commit.branch
52 extra["branch"] = commit.branch
485 except AttributeError:
53 except AttributeError:
486 pass
54 pass
487
55
488 while parents:
56 while parents:
489 p1 = p2
57 p1 = p2
490 p2 = parents.pop(0)
58 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
59 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
60 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
61 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
62 p2 = hg.hex(self.repo.changelog.tip())
495
63
496 return p2
64 return p2
497
65
498 def puttags(self, tags):
66 def puttags(self, tags):
499 try:
67 try:
500 old = self.repo.wfile(".hgtags").read()
68 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
69 oldlines = old.splitlines(1)
502 oldlines.sort()
70 oldlines.sort()
503 except:
71 except:
504 oldlines = []
72 oldlines = []
505
73
506 k = tags.keys()
74 k = tags.keys()
507 k.sort()
75 k.sort()
508 newlines = []
76 newlines = []
509 for tag in k:
77 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
78 newlines.append("%s %s\n" % (tags[tag], tag))
511
79
512 newlines.sort()
80 newlines.sort()
513
81
514 if newlines != oldlines:
82 if newlines != oldlines:
515 self.ui.status("updating tags\n")
83 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
84 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
85 f.write("".join(newlines))
518 f.close()
86 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
87 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
88 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
89 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
90 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
91 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
General Comments 0
You need to be logged in to leave comments. Login now