# convert.py Foreign SCM converter # # Copyright 2005-2007 Matt Mackall # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. from common import NoRepo, converter_source, converter_sink from cvs import convert_cvs from git import convert_git from hg import convert_mercurial from subversion import convert_svn import os, shutil from mercurial import hg, ui, util, commands commands.norepo += " convert" converters = [convert_cvs, convert_git, convert_svn, convert_mercurial] def convertsource(ui, path, **opts): for c in converters: if not hasattr(c, 'getcommit'): continue try: return c(ui, path, **opts) except NoRepo: pass raise util.Abort('%s: unknown repository type' % path) def convertsink(ui, path): if not os.path.isdir(path): raise util.Abort("%s: not a directory" % path) for c in converters: if not hasattr(c, 'putcommit'): continue try: return c(ui, path) except NoRepo: pass raise util.Abort('%s: unknown repository type' % path) class convert(object): def __init__(self, ui, source, dest, mapfile, opts): self.source = source self.dest = dest self.ui = ui self.opts = opts self.commitcache = {} self.mapfile = mapfile self.mapfilefd = None self.authors = {} self.authorfile = None self.map = {} try: origmapfile = open(self.mapfile, 'r') for l in origmapfile: sv, dv = l[:-1].split() self.map[sv] = dv origmapfile.close() except IOError: pass # Read first the dst author map if any authorfile = self.dest.authorfile() if authorfile and os.path.exists(authorfile): self.readauthormap(authorfile) # Extend/Override with new author map if necessary if opts.get('authors'): self.readauthormap(opts.get('authors')) self.authorfile = self.dest.authorfile() def walktree(self, heads): '''Return a mapping that identifies the uncommitted parents of every uncommitted changeset.''' visit = heads known = {} parents = {} while visit: n = visit.pop(0) if n in known or n in self.map: continue known[n] = 1 self.commitcache[n] = self.source.getcommit(n) cp = self.commitcache[n].parents parents[n] = [] for p in cp: parents[n].append(p) visit.append(p) return parents def toposort(self, parents): '''Return an ordering such that every uncommitted changeset is preceeded by all its uncommitted ancestors.''' visit = parents.keys() seen = {} children = {} while visit: n = visit.pop(0) if n in seen: continue seen[n] = 1 # Ensure that nodes without parents are present in the 'children' # mapping. children.setdefault(n, []) for p in parents[n]: if not p in self.map: visit.append(p) children.setdefault(p, []).append(n) s = [] removed = {} visit = children.keys() while visit: n = visit.pop(0) if n in removed: continue dep = 0 if n in parents: for p in parents[n]: if p in self.map: continue if p not in removed: # we're still dependent visit.append(n) dep = 1 break if not dep: # all n's parents are in the list removed[n] = 1 if n not in self.map: s.append(n) if n in children: for c in children[n]: visit.insert(0, c) if self.opts.get('datesort'): depth = {} for n in s: depth[n] = 0 pl = [p for p in self.commitcache[n].parents if p not in self.map] if pl: depth[n] = max([depth[p] for p in pl]) + 1 s = [(depth[n], self.commitcache[n].date, n) for n in s] s.sort() s = [e[2] for e in s] return s def mapentry(self, src, dst): if self.mapfilefd is None: try: self.mapfilefd = open(self.mapfile, "a") except IOError, (errno, strerror): raise util.Abort("Could not open map file %s: %s, %s\n" % (self.mapfile, errno, strerror)) self.map[src] = dst self.mapfilefd.write("%s %s\n" % (src, dst)) self.mapfilefd.flush() def writeauthormap(self): authorfile = self.authorfile if authorfile: self.ui.status('Writing author map file %s\n' % authorfile) ofile = open(authorfile, 'w+') for author in self.authors: ofile.write("%s=%s\n" % (author, self.authors[author])) ofile.close() def readauthormap(self, authorfile): afile = open(authorfile, 'r') for line in afile: try: srcauthor = line.split('=')[0].strip() dstauthor = line.split('=')[1].strip() if srcauthor in self.authors and dstauthor != self.authors[srcauthor]: self.ui.status( 'Overriding mapping for author %s, was %s, will be %s\n' % (srcauthor, self.authors[srcauthor], dstauthor)) else: self.ui.debug('Mapping author %s to %s\n' % (srcauthor, dstauthor)) self.authors[srcauthor] = dstauthor except IndexError: self.ui.warn( 'Ignoring bad line in author file map %s: %s\n' % (authorfile, line)) afile.close() def copy(self, rev): c = self.commitcache[rev] files = self.source.getchanges(rev) do_copies = (hasattr(c, 'copies') and hasattr(self.dest, 'copyfile')) for f, v in files: try: data = self.source.getfile(f, v) except IOError, inst: self.dest.delfile(f) else: e = self.source.getmode(f, v) self.dest.putfile(f, e, data) if do_copies: if f in c.copies: # Merely marks that a copy happened. self.dest.copyfile(c.copies[f], f) r = [self.map[v] for v in c.parents] f = [f for f, v in files] newnode = self.dest.putcommit(f, r, c) self.mapentry(rev, newnode) def convert(self): try: self.source.setrevmap(self.map) self.ui.status("scanning source...\n") heads = self.source.getheads() parents = self.walktree(heads) self.ui.status("sorting...\n") t = self.toposort(parents) num = len(t) c = None self.ui.status("converting...\n") for c in t: num -= 1 desc = self.commitcache[c].desc if "\n" in desc: desc = desc.splitlines()[0] author = self.commitcache[c].author author = self.authors.get(author, author) self.commitcache[c].author = author self.ui.status("%d %s\n" % (num, desc)) self.copy(c) tags = self.source.gettags() ctags = {} for k in tags: v = tags[k] if v in self.map: ctags[k] = self.map[v] if c and ctags: nrev = self.dest.puttags(ctags) # write another hash correspondence to override the previous # one so we don't end up with extra tag heads if nrev: self.mapentry(c, nrev) self.writeauthormap() finally: self.cleanup() def cleanup(self): if self.mapfilefd: self.mapfilefd.close() def _convert(ui, src, dest=None, mapfile=None, **opts): '''Convert a foreign SCM repository to a Mercurial one. Accepted source formats: - GIT - CVS - SVN Accepted destination formats: - Mercurial If no revision is given, all revisions will be converted. Otherwise, convert will only import up to the named revision (given in a format understood by the source). If no destination directory name is specified, it defaults to the basename of the source with '-hg' appended. If the destination repository doesn't exist, it will be created. If isn't given, it will be put in a default location (/.hg/shamap by default). The is a simple text file that maps each source commit ID to the destination ID for that revision, like so: If the file doesn't exist, it's automatically created. It's updated on each commit copied, so convert-repo can be interrupted and can be run repeatedly to copy new commits. The [username mapping] file is a simple text file that maps each source commit author to a destination commit author. It is handy for source SCMs that use unix logins to identify authors (eg: CVS). One line per author mapping and the line format is: srcauthor=whatever string you want ''' util._encoding = 'UTF-8' if not dest: dest = hg.defaultdest(src) + "-hg" ui.status("assuming destination %s\n" % dest) # Try to be smart and initalize things when required created = False if os.path.isdir(dest): if len(os.listdir(dest)) > 0: try: hg.repository(ui, dest) ui.status("destination %s is a Mercurial repository\n" % dest) except hg.RepoError: raise util.Abort( "destination directory %s is not empty.\n" "Please specify an empty directory to be initialized\n" "or an already initialized mercurial repository" % dest) else: ui.status("initializing destination %s repository\n" % dest) hg.repository(ui, dest, create=True) created = True elif os.path.exists(dest): raise util.Abort("destination %s exists and is not a directory" % dest) else: ui.status("initializing destination %s repository\n" % dest) hg.repository(ui, dest, create=True) created = True destc = convertsink(ui, dest) try: srcc = convertsource(ui, src, rev=opts.get('rev')) except Exception: if created: shutil.rmtree(dest, True) raise if not mapfile: try: mapfile = destc.mapfile() except: mapfile = os.path.join(destc, "map") c = convert(ui, srcc, destc, mapfile, opts) c.convert() cmdtable = { "convert": (_convert, [('A', 'authors', '', 'username mapping filename'), ('r', 'rev', '', 'import up to target revision REV'), ('', 'datesort', None, 'try to sort changesets by date')], 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'), }