##// END OF EJS Templates
convert: document splicemap, allow setting of multiple parents
Bryan O'Sullivan -
r6143:5b159ebb default
parent child Browse files
Show More
@@ -1,109 +1,121
1 1 # convert.py Foreign SCM converter
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 import convcmd
9 9 from mercurial import commands
10 10
11 11 # Commands definition was moved elsewhere to ease demandload job.
12 12
13 13 def convert(ui, src, dest=None, revmapfile=None, **opts):
14 14 """Convert a foreign SCM repository to a Mercurial one.
15 15
16 16 Accepted source formats:
17 17 - Mercurial
18 18 - CVS
19 19 - Darcs
20 20 - git
21 21 - Subversion
22 22 - GNU Arch
23 23
24 24 Accepted destination formats:
25 25 - Mercurial
26 26 - Subversion (history on branches is not preserved)
27 27
28 28 If no revision is given, all revisions will be converted. Otherwise,
29 29 convert will only import up to the named revision (given in a format
30 30 understood by the source).
31 31
32 32 If no destination directory name is specified, it defaults to the
33 33 basename of the source with '-hg' appended. If the destination
34 34 repository doesn't exist, it will be created.
35 35
36 36 If <MAPFILE> isn't given, it will be put in a default location
37 37 (<dest>/.hg/shamap by default). The <MAPFILE> is a simple text
38 38 file that maps each source commit ID to the destination ID for
39 39 that revision, like so:
40 40 <source ID> <destination ID>
41 41
42 42 If the file doesn't exist, it's automatically created. It's updated
43 43 on each commit copied, so convert-repo can be interrupted and can
44 44 be run repeatedly to copy new commits.
45 45
46 46 The [username mapping] file is a simple text file that maps each source
47 47 commit author to a destination commit author. It is handy for source SCMs
48 48 that use unix logins to identify authors (eg: CVS). One line per author
49 49 mapping and the line format is:
50 50 srcauthor=whatever string you want
51 51
52 52 The filemap is a file that allows filtering and remapping of files
53 53 and directories. Comment lines start with '#'. Each line can
54 54 contain one of the following directives:
55 55
56 56 include path/to/file
57 57
58 58 exclude path/to/file
59 59
60 60 rename from/file to/file
61 61
62 62 The 'include' directive causes a file, or all files under a
63 63 directory, to be included in the destination repository, and the
64 64 exclusion of all other files and dirs not explicitely included.
65 65 The 'exclude' directive causes files or directories to be omitted.
66 66 The 'rename' directive renames a file or directory. To rename from a
67 67 subdirectory into the root of the repository, use '.' as the path to
68 68 rename to.
69 69
70 The splicemap is a file that allows insertion of synthetic
71 history, letting you specify the parents of a revision. This is
72 useful if you want to e.g. give a Subversion merge two parents, or
73 graft two disconnected series of history together. Each entry
74 contains a key, followed by a space, followed by one or two
75 values, separated by spaces. The key is the revision ID in the
76 source revision control system whose parents should be modified
77 (same format as a key in .hg/shamap). The values are the revision
78 IDs (in either the source or destination revision control system)
79 that should be used as the new parents for that node.
80
70 81 Back end options:
71 82
72 83 --config convert.hg.clonebranches=False (boolean)
73 84 hg target: XXX not documented
74 85 --config convert.hg.saverev=True (boolean)
75 86 hg source: allow target to preserve source revision ID
76 87 --config convert.hg.tagsbranch=default (branch name)
77 88 hg target: XXX not documented
78 89 --config convert.hg.usebranchnames=True (boolean)
79 90 hg target: preserve branch names
80 91
81 92 --config convert.svn.branches=branches (directory name)
82 93 svn source: specify the directory containing branches
83 94 --config convert.svn.tags=tags (directory name)
84 95 svn source: specify the directory containing tags
85 96 --config convert.svn.trunk=trunk (directory name)
86 97 svn source: specify the name of the trunk branch
87 98 """
88 99 return convcmd.convert(ui, src, dest, revmapfile, **opts)
89 100
90 101 def debugsvnlog(ui, **opts):
91 102 return convcmd.debugsvnlog(ui, **opts)
92 103
93 104 commands.norepo += " convert debugsvnlog"
94 105
95 106 cmdtable = {
96 107 "convert":
97 108 (convert,
98 109 [('A', 'authors', '', 'username mapping filename'),
99 110 ('d', 'dest-type', '', 'destination repository type'),
100 111 ('', 'filemap', '', 'remap file names using contents of file'),
101 112 ('r', 'rev', '', 'import up to target revision REV'),
102 113 ('s', 'source-type', '', 'source repository type'),
114 ('', 'splicemap', '', 'splice synthesized history into place'),
103 115 ('', 'datesort', None, 'try to sort changesets by date')],
104 116 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
105 117 "debugsvnlog":
106 118 (debugsvnlog,
107 119 [],
108 120 'hg debugsvnlog'),
109 121 }
@@ -1,348 +1,349
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from common import NoRepo, SKIPREV, converter_source, converter_sink, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import debugsvnlog, svn_source, svn_sink
14 14 from gnuarch import gnuarch_source
15 15 import filemap
16 16
17 17 import os, shutil
18 18 from mercurial import hg, util
19 19 from mercurial.i18n import _
20 20
21 21 orig_encoding = 'ascii'
22 22
23 23 def recode(s):
24 24 if isinstance(s, unicode):
25 25 return s.encode(orig_encoding, 'replace')
26 26 else:
27 27 return s.decode('utf-8').encode(orig_encoding, 'replace')
28 28
29 29 source_converters = [
30 30 ('cvs', convert_cvs),
31 31 ('git', convert_git),
32 32 ('svn', svn_source),
33 33 ('hg', mercurial_source),
34 34 ('darcs', darcs_source),
35 35 ('gnuarch', gnuarch_source),
36 36 ]
37 37
38 38 sink_converters = [
39 39 ('hg', mercurial_sink),
40 40 ('svn', svn_sink),
41 41 ]
42 42
43 43 def convertsource(ui, path, type, rev):
44 44 exceptions = []
45 45 for name, source in source_converters:
46 46 try:
47 47 if not type or name == type:
48 48 return source(ui, path, rev)
49 49 except NoRepo, inst:
50 50 exceptions.append(inst)
51 51 if not ui.quiet:
52 52 for inst in exceptions:
53 53 ui.write(_("%s\n") % inst)
54 54 raise util.Abort('%s: unknown repository type' % path)
55 55
56 56 def convertsink(ui, path, type):
57 57 for name, sink in sink_converters:
58 58 try:
59 59 if not type or name == type:
60 60 return sink(ui, path)
61 61 except NoRepo, inst:
62 62 ui.note(_("convert: %s\n") % inst)
63 63 raise util.Abort('%s: unknown repository type' % path)
64 64
65 65 class converter(object):
66 66 def __init__(self, ui, source, dest, revmapfile, opts):
67 67
68 68 self.source = source
69 69 self.dest = dest
70 70 self.ui = ui
71 71 self.opts = opts
72 72 self.commitcache = {}
73 73 self.authors = {}
74 74 self.authorfile = None
75 75
76 76 self.map = mapfile(ui, revmapfile)
77 77
78 78 # Read first the dst author map if any
79 79 authorfile = self.dest.authorfile()
80 80 if authorfile and os.path.exists(authorfile):
81 81 self.readauthormap(authorfile)
82 82 # Extend/Override with new author map if necessary
83 83 if opts.get('authors'):
84 84 self.readauthormap(opts.get('authors'))
85 85 self.authorfile = self.dest.authorfile()
86 86
87 self.splicemap = mapfile(ui, ui.config('convert', 'splicemap'))
87 self.splicemap = mapfile(ui, opts.get('splicemap'))
88 88
89 89 def walktree(self, heads):
90 90 '''Return a mapping that identifies the uncommitted parents of every
91 91 uncommitted changeset.'''
92 92 visit = heads
93 93 known = {}
94 94 parents = {}
95 95 while visit:
96 96 n = visit.pop(0)
97 97 if n in known or n in self.map: continue
98 98 known[n] = 1
99 99 commit = self.cachecommit(n)
100 100 parents[n] = []
101 101 for p in commit.parents:
102 102 parents[n].append(p)
103 103 visit.append(p)
104 104
105 105 return parents
106 106
107 107 def toposort(self, parents):
108 108 '''Return an ordering such that every uncommitted changeset is
109 109 preceeded by all its uncommitted ancestors.'''
110 110 visit = parents.keys()
111 111 seen = {}
112 112 children = {}
113 113 actives = []
114 114
115 115 while visit:
116 116 n = visit.pop(0)
117 117 if n in seen: continue
118 118 seen[n] = 1
119 119 # Ensure that nodes without parents are present in the 'children'
120 120 # mapping.
121 121 children.setdefault(n, [])
122 122 hasparent = False
123 123 for p in parents[n]:
124 124 if not p in self.map:
125 125 visit.append(p)
126 126 hasparent = True
127 127 children.setdefault(p, []).append(n)
128 128 if not hasparent:
129 129 actives.append(n)
130 130
131 131 del seen
132 132 del visit
133 133
134 134 if self.opts.get('datesort'):
135 135 dates = {}
136 136 def getdate(n):
137 137 if n not in dates:
138 138 dates[n] = util.parsedate(self.commitcache[n].date)
139 139 return dates[n]
140 140
141 141 def picknext(nodes):
142 142 return min([(getdate(n), n) for n in nodes])[1]
143 143 else:
144 144 prev = [None]
145 145 def picknext(nodes):
146 146 # Return the first eligible child of the previously converted
147 147 # revision, or any of them.
148 148 next = nodes[0]
149 149 for n in nodes:
150 150 if prev[0] in parents[n]:
151 151 next = n
152 152 break
153 153 prev[0] = next
154 154 return next
155 155
156 156 s = []
157 157 pendings = {}
158 158 while actives:
159 159 n = picknext(actives)
160 160 actives.remove(n)
161 161 s.append(n)
162 162
163 163 # Update dependents list
164 164 for c in children.get(n, []):
165 165 if c not in pendings:
166 166 pendings[c] = [p for p in parents[c] if p not in self.map]
167 167 try:
168 168 pendings[c].remove(n)
169 169 except ValueError:
170 170 raise util.Abort(_('cycle detected between %s and %s')
171 171 % (recode(c), recode(n)))
172 172 if not pendings[c]:
173 173 # Parents are converted, node is eligible
174 174 actives.insert(0, c)
175 175 pendings[c] = None
176 176
177 177 if len(s) != len(parents):
178 178 raise util.Abort(_("not all revisions were sorted"))
179 179
180 180 return s
181 181
182 182 def writeauthormap(self):
183 183 authorfile = self.authorfile
184 184 if authorfile:
185 185 self.ui.status('Writing author map file %s\n' % authorfile)
186 186 ofile = open(authorfile, 'w+')
187 187 for author in self.authors:
188 188 ofile.write("%s=%s\n" % (author, self.authors[author]))
189 189 ofile.close()
190 190
191 191 def readauthormap(self, authorfile):
192 192 afile = open(authorfile, 'r')
193 193 for line in afile:
194 194 try:
195 195 srcauthor = line.split('=')[0].strip()
196 196 dstauthor = line.split('=')[1].strip()
197 197 if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
198 198 self.ui.status(
199 199 'Overriding mapping for author %s, was %s, will be %s\n'
200 200 % (srcauthor, self.authors[srcauthor], dstauthor))
201 201 else:
202 202 self.ui.debug('Mapping author %s to %s\n'
203 203 % (srcauthor, dstauthor))
204 204 self.authors[srcauthor] = dstauthor
205 205 except IndexError:
206 206 self.ui.warn(
207 207 'Ignoring bad line in author file map %s: %s\n'
208 208 % (authorfile, line))
209 209 afile.close()
210 210
211 211 def cachecommit(self, rev):
212 212 commit = self.source.getcommit(rev)
213 213 commit.author = self.authors.get(commit.author, commit.author)
214 214 self.commitcache[rev] = commit
215 215 return commit
216 216
217 217 def copy(self, rev):
218 218 commit = self.commitcache[rev]
219 219 do_copies = hasattr(self.dest, 'copyfile')
220 220 filenames = []
221 221
222 222 changes = self.source.getchanges(rev)
223 223 if isinstance(changes, basestring):
224 224 if changes == SKIPREV:
225 225 dest = SKIPREV
226 226 else:
227 227 dest = self.map[changes]
228 228 self.map[rev] = dest
229 229 return
230 230 files, copies = changes
231 231 pbranches = []
232 232 if commit.parents:
233 233 for prev in commit.parents:
234 234 if prev not in self.commitcache:
235 235 self.cachecommit(prev)
236 236 pbranches.append((self.map[prev],
237 237 self.commitcache[prev].branch))
238 238 self.dest.setbranch(commit.branch, pbranches)
239 239 for f, v in files:
240 240 filenames.append(f)
241 241 try:
242 242 data = self.source.getfile(f, v)
243 243 except IOError, inst:
244 244 self.dest.delfile(f)
245 245 else:
246 246 e = self.source.getmode(f, v)
247 247 self.dest.putfile(f, e, data)
248 248 if do_copies:
249 249 if f in copies:
250 250 copyf = copies[f]
251 251 # Merely marks that a copy happened.
252 252 self.dest.copyfile(copyf, f)
253 253
254 254 try:
255 parents = [self.splicemap[rev]]
256 self.ui.debug('spliced in %s as parents of %s\n' %
257 (parents, rev))
255 parents = self.splicemap[rev].replace(',', ' ').split()
256 self.ui.status('spliced in %s as parents of %s\n' %
257 (parents, rev))
258 parents = [self.map.get(p, p) for p in parents]
258 259 except KeyError:
259 260 parents = [b[0] for b in pbranches]
260 261 newnode = self.dest.putcommit(filenames, parents, commit)
261 262 self.source.converted(rev, newnode)
262 263 self.map[rev] = newnode
263 264
264 265 def convert(self):
265 266
266 267 try:
267 268 self.source.before()
268 269 self.dest.before()
269 270 self.source.setrevmap(self.map)
270 271 self.ui.status("scanning source...\n")
271 272 heads = self.source.getheads()
272 273 parents = self.walktree(heads)
273 274 self.ui.status("sorting...\n")
274 275 t = self.toposort(parents)
275 276 num = len(t)
276 277 c = None
277 278
278 279 self.ui.status("converting...\n")
279 280 for c in t:
280 281 num -= 1
281 282 desc = self.commitcache[c].desc
282 283 if "\n" in desc:
283 284 desc = desc.splitlines()[0]
284 285 # convert log message to local encoding without using
285 286 # tolocal() because util._encoding conver() use it as
286 287 # 'utf-8'
287 288 self.ui.status("%d %s\n" % (num, recode(desc)))
288 289 self.ui.note(_("source: %s\n" % recode(c)))
289 290 self.copy(c)
290 291
291 292 tags = self.source.gettags()
292 293 ctags = {}
293 294 for k in tags:
294 295 v = tags[k]
295 296 if self.map.get(v, SKIPREV) != SKIPREV:
296 297 ctags[k] = self.map[v]
297 298
298 299 if c and ctags:
299 300 nrev = self.dest.puttags(ctags)
300 301 # write another hash correspondence to override the previous
301 302 # one so we don't end up with extra tag heads
302 303 if nrev:
303 304 self.map[c] = nrev
304 305
305 306 self.writeauthormap()
306 307 finally:
307 308 self.cleanup()
308 309
309 310 def cleanup(self):
310 311 try:
311 312 self.dest.after()
312 313 finally:
313 314 self.source.after()
314 315 self.map.close()
315 316
316 317 def convert(ui, src, dest=None, revmapfile=None, **opts):
317 318 global orig_encoding
318 319 orig_encoding = util._encoding
319 320 util._encoding = 'UTF-8'
320 321
321 322 if not dest:
322 323 dest = hg.defaultdest(src) + "-hg"
323 324 ui.status("assuming destination %s\n" % dest)
324 325
325 326 destc = convertsink(ui, dest, opts.get('dest_type'))
326 327
327 328 try:
328 329 srcc = convertsource(ui, src, opts.get('source_type'),
329 330 opts.get('rev'))
330 331 except Exception:
331 332 for path in destc.created:
332 333 shutil.rmtree(path, True)
333 334 raise
334 335
335 336 fmap = opts.get('filemap')
336 337 if fmap:
337 338 srcc = filemap.filemap_source(ui, srcc, fmap)
338 339 destc.setfilemapmode(True)
339 340
340 341 if not revmapfile:
341 342 try:
342 343 revmapfile = destc.revmapfile()
343 344 except:
344 345 revmapfile = os.path.join(destc, "map")
345 346
346 347 c = converter(ui, srcc, destc, revmapfile, opts)
347 348 c.convert()
348 349
General Comments 0
You need to be logged in to leave comments. Login now