##// END OF EJS Templates
Split convert extension into common and repository type modules
Brendan Cully -
r4536:cc9b7921 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (519 lines changed) Show them Hide them
@@ -5,523 +5,16 b''
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 import sys, os, zlib, sha, time, re, locale, socket
8 from common import NoRepo
9 from cvs import convert_cvs
10 from git import convert_git
11 from hg import convert_mercurial
12
13 import os
9 from mercurial import hg, ui, util, commands
14 from mercurial import hg, ui, util, commands
10
15
11 commands.norepo += " convert"
16 commands.norepo += " convert"
12
17
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
18 converters = [convert_cvs, convert_git, convert_mercurial]
526
19
527 def converter(ui, path):
20 def converter(ui, path):
This diff has been collapsed as it changes many lines, (661 lines changed) Show them Hide them
@@ -1,14 +1,4 b''
1 # convert.py Foreign SCM converter
1 # common code for the convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
2
13 class NoRepo(Exception): pass
3 class NoRepo(Exception): pass
14
4
@@ -19,15 +9,6 b' class commit(object):'
19 raise util.Abort("commit missing field %s" % x)
9 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
10 self.__dict__.update(parts)
21
11
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
12 class converter_source(object):
32 """Conversion source interface"""
13 """Conversion source interface"""
33
14
@@ -107,643 +88,3 b' class converter_sink(object):'
107 """Put tags into sink.
88 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
89 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
90 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (513 lines changed) Show them Hide them
@@ -1,115 +1,10 b''
1 # convert.py Foreign SCM converter
1 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
2
54 id just tells us which revision to return in getfile(), e.g. in
3 import os, locale, re, socket
55 git it's an object hash."""
4 from mercurial import util
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
5
84 def putfile(self, f, e, data):
6 from common import NoRepo, commit, converter_source
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
7
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
8 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
9 def __init__(self, ui, path):
115 self.path = path
10 self.path = path
@@ -347,403 +242,3 b' class convert_cvs(converter_source):'
347
242
348 def gettags(self):
243 def gettags(self):
349 return self.tags
244 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (652 lines changed) Show them Hide them
@@ -1,23 +1,8 b''
1 # convert.py Foreign SCM converter
1 # git support for the convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
2
11 commands.norepo += " convert"
3 import os
12
13 class NoRepo(Exception): pass
14
4
15 class commit(object):
5 from common import NoRepo, commit, converter_source
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
6
22 def recode(s):
7 def recode(s):
23 try:
8 try:
@@ -28,326 +13,6 b' def recode(s):'
28 except:
13 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
14 return s.decode("utf-8", "replace").encode("utf-8")
30
15
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
16 class convert_git(converter_source):
352 def __init__(self, ui, path):
17 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
18 if os.path.isdir(path + "/.git"):
@@ -436,314 +101,3 b' class convert_git(converter_source):'
436 tags[tag] = node
101 tags[tag] = node
437
102
438 return tags
103 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
This diff has been collapsed as it changes many lines, (666 lines changed) Show them Hide them
@@ -1,441 +1,9 b''
1 # convert.py Foreign SCM converter
1 # hg backend for convert extension
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import sys, os, zlib, sha, time, re, locale, socket
9 from mercurial import hg, ui, util, commands
10
11 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
2
205 if root.startswith(":pserver:"):
3 import os, time
206 root = root[9:]
4 from mercurial import hg
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
5
298 data = ""
6 from common import NoRepo, converter_sink
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
7
440 class convert_mercurial(converter_sink):
8 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
9 def __init__(self, ui, path):
@@ -521,229 +89,3 b' class convert_mercurial(converter_sink):'
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
89 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
90 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
91 return hg.hex(self.repo.changelog.tip())
524
525 converters = [convert_cvs, convert_git, convert_mercurial]
526
527 def converter(ui, path):
528 if not os.path.isdir(path):
529 raise util.Abort("%s: not a directory" % path)
530 for c in converters:
531 try:
532 return c(ui, path)
533 except NoRepo:
534 pass
535 raise util.Abort("%s: unknown repository type" % path)
536
537 class convert(object):
538 def __init__(self, ui, source, dest, mapfile, opts):
539
540 self.source = source
541 self.dest = dest
542 self.ui = ui
543 self.mapfile = mapfile
544 self.opts = opts
545 self.commitcache = {}
546
547 self.map = {}
548 try:
549 for l in file(self.mapfile):
550 sv, dv = l[:-1].split()
551 self.map[sv] = dv
552 except IOError:
553 pass
554
555 def walktree(self, heads):
556 visit = heads
557 known = {}
558 parents = {}
559 while visit:
560 n = visit.pop(0)
561 if n in known or n in self.map: continue
562 known[n] = 1
563 self.commitcache[n] = self.source.getcommit(n)
564 cp = self.commitcache[n].parents
565 for p in cp:
566 parents.setdefault(n, []).append(p)
567 visit.append(p)
568
569 return parents
570
571 def toposort(self, parents):
572 visit = parents.keys()
573 seen = {}
574 children = {}
575
576 while visit:
577 n = visit.pop(0)
578 if n in seen: continue
579 seen[n] = 1
580 pc = 0
581 if n in parents:
582 for p in parents[n]:
583 if p not in self.map: pc += 1
584 visit.append(p)
585 children.setdefault(p, []).append(n)
586 if not pc: root = n
587
588 s = []
589 removed = {}
590 visit = children.keys()
591 while visit:
592 n = visit.pop(0)
593 if n in removed: continue
594 dep = 0
595 if n in parents:
596 for p in parents[n]:
597 if p in self.map: continue
598 if p not in removed:
599 # we're still dependent
600 visit.append(n)
601 dep = 1
602 break
603
604 if not dep:
605 # all n's parents are in the list
606 removed[n] = 1
607 if n not in self.map:
608 s.append(n)
609 if n in children:
610 for c in children[n]:
611 visit.insert(0, c)
612
613 if self.opts.get('datesort'):
614 depth = {}
615 for n in s:
616 depth[n] = 0
617 pl = [p for p in self.commitcache[n].parents
618 if p not in self.map]
619 if pl:
620 depth[n] = max([depth[p] for p in pl]) + 1
621
622 s = [(depth[n], self.commitcache[n].date, n) for n in s]
623 s.sort()
624 s = [e[2] for e in s]
625
626 return s
627
628 def copy(self, rev):
629 c = self.commitcache[rev]
630 files = self.source.getchanges(rev)
631
632 for f, v in files:
633 try:
634 data = self.source.getfile(f, v)
635 except IOError, inst:
636 self.dest.delfile(f)
637 else:
638 e = self.source.getmode(f, v)
639 self.dest.putfile(f, e, data)
640
641 r = [self.map[v] for v in c.parents]
642 f = [f for f, v in files]
643 self.map[rev] = self.dest.putcommit(f, r, c)
644 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
645
646 def convert(self):
647 self.ui.status("scanning source...\n")
648 heads = self.source.getheads()
649 parents = self.walktree(heads)
650 self.ui.status("sorting...\n")
651 t = self.toposort(parents)
652 num = len(t)
653 c = None
654
655 self.ui.status("converting...\n")
656 for c in t:
657 num -= 1
658 desc = self.commitcache[c].desc
659 if "\n" in desc:
660 desc = desc.splitlines()[0]
661 self.ui.status("%d %s\n" % (num, desc))
662 self.copy(c)
663
664 tags = self.source.gettags()
665 ctags = {}
666 for k in tags:
667 v = tags[k]
668 if v in self.map:
669 ctags[k] = self.map[v]
670
671 if c and ctags:
672 nrev = self.dest.puttags(ctags)
673 # write another hash correspondence to override the previous
674 # one so we don't end up with extra tag heads
675 if nrev:
676 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
677
678 def _convert(ui, src, dest=None, mapfile=None, **opts):
679 '''Convert a foreign SCM repository to a Mercurial one.
680
681 Accepted source formats:
682 - GIT
683 - CVS
684
685 Accepted destination formats:
686 - Mercurial
687
688 If destination isn't given, a new Mercurial repo named <src>-hg will
689 be created. If <mapfile> isn't given, it will be put in a default
690 location (<dest>/.hg/shamap by default)
691
692 The <mapfile> is a simple text file that maps each source commit ID to
693 the destination ID for that revision, like so:
694
695 <source ID> <destination ID>
696
697 If the file doesn't exist, it's automatically created. It's updated
698 on each commit copied, so convert-repo can be interrupted and can
699 be run repeatedly to copy new commits.
700 '''
701
702 srcc = converter(ui, src)
703 if not hasattr(srcc, "getcommit"):
704 raise util.Abort("%s: can't read from this repo type" % src)
705
706 if not dest:
707 dest = src + "-hg"
708 ui.status("assuming destination %s\n" % dest)
709
710 # Try to be smart and initalize things when required
711 if os.path.isdir(dest):
712 if len(os.listdir(dest)) > 0:
713 try:
714 hg.repository(ui, dest)
715 ui.status("destination %s is a Mercurial repository\n" % dest)
716 except hg.RepoError:
717 raise util.Abort(
718 "destination directory %s is not empty.\n"
719 "Please specify an empty directory to be initialized\n"
720 "or an already initialized mercurial repository"
721 % dest)
722 else:
723 ui.status("initializing destination %s repository\n" % dest)
724 hg.repository(ui, dest, create=True)
725 elif os.path.exists(dest):
726 raise util.Abort("destination %s exists and is not a directory" % dest)
727 else:
728 ui.status("initializing destination %s repository\n" % dest)
729 hg.repository(ui, dest, create=True)
730
731 destc = converter(ui, dest)
732 if not hasattr(destc, "putcommit"):
733 raise util.Abort("%s: can't write to this repo type" % src)
734
735 if not mapfile:
736 try:
737 mapfile = destc.mapfile()
738 except:
739 mapfile = os.path.join(destc, "map")
740
741 c = convert(ui, srcc, destc, mapfile, opts)
742 c.convert()
743
744 cmdtable = {
745 "convert":
746 (_convert,
747 [('', 'datesort', None, 'try to sort changesets by date')],
748 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
749 }
General Comments 0
You need to be logged in to leave comments. Login now