##// END OF EJS Templates
convert: allow the converter_source to say "skip this revision"...
Alexis S. L. Carvalho -
r5374:e7108742 default
parent child Browse files
Show More
@@ -1,492 +1,500 b''
1 1 # convert.py Foreign SCM converter
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 from common import NoRepo, converter_source, converter_sink
8 from common import NoRepo, SKIPREV, converter_source, converter_sink
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import convert_svn, debugsvnlog
14 14
15 15 import os, shlex, shutil
16 16 from mercurial import hg, ui, util, commands
17 17 from mercurial.i18n import _
18 18
19 19 commands.norepo += " convert debugsvnlog"
20 20
21 21 converters = [convert_cvs, convert_git, convert_svn, mercurial_source,
22 22 mercurial_sink, darcs_source]
23 23
24 24 def convertsource(ui, path, **opts):
25 25 for c in converters:
26 26 try:
27 27 return c.getcommit and c(ui, path, **opts)
28 28 except (AttributeError, NoRepo):
29 29 pass
30 30 raise util.Abort('%s: unknown repository type' % path)
31 31
32 32 def convertsink(ui, path):
33 33 if not os.path.isdir(path):
34 34 raise util.Abort("%s: not a directory" % path)
35 35 for c in converters:
36 36 try:
37 37 return c.putcommit and c(ui, path)
38 38 except (AttributeError, NoRepo):
39 39 pass
40 40 raise util.Abort('%s: unknown repository type' % path)
41 41
42 42 class converter(object):
43 43 def __init__(self, ui, source, dest, revmapfile, filemapper, opts):
44 44
45 45 self.source = source
46 46 self.dest = dest
47 47 self.ui = ui
48 48 self.opts = opts
49 49 self.commitcache = {}
50 50 self.revmapfile = revmapfile
51 51 self.revmapfilefd = None
52 52 self.authors = {}
53 53 self.authorfile = None
54 54 self.mapfile = filemapper
55 55
56 56 self.maporder = []
57 57 self.map = {}
58 58 try:
59 59 origrevmapfile = open(self.revmapfile, 'r')
60 60 for l in origrevmapfile:
61 61 sv, dv = l[:-1].split()
62 62 if sv not in self.map:
63 63 self.maporder.append(sv)
64 64 self.map[sv] = dv
65 65 origrevmapfile.close()
66 66 except IOError:
67 67 pass
68 68
69 69 # Read first the dst author map if any
70 70 authorfile = self.dest.authorfile()
71 71 if authorfile and os.path.exists(authorfile):
72 72 self.readauthormap(authorfile)
73 73 # Extend/Override with new author map if necessary
74 74 if opts.get('authors'):
75 75 self.readauthormap(opts.get('authors'))
76 76 self.authorfile = self.dest.authorfile()
77 77
78 78 def walktree(self, heads):
79 79 '''Return a mapping that identifies the uncommitted parents of every
80 80 uncommitted changeset.'''
81 81 visit = heads
82 82 known = {}
83 83 parents = {}
84 84 while visit:
85 85 n = visit.pop(0)
86 86 if n in known or n in self.map: continue
87 87 known[n] = 1
88 88 commit = self.cachecommit(n)
89 89 parents[n] = []
90 90 for p in commit.parents:
91 91 parents[n].append(p)
92 92 visit.append(p)
93 93
94 94 return parents
95 95
96 96 def toposort(self, parents):
97 97 '''Return an ordering such that every uncommitted changeset is
98 98 preceeded by all its uncommitted ancestors.'''
99 99 visit = parents.keys()
100 100 seen = {}
101 101 children = {}
102 102
103 103 while visit:
104 104 n = visit.pop(0)
105 105 if n in seen: continue
106 106 seen[n] = 1
107 107 # Ensure that nodes without parents are present in the 'children'
108 108 # mapping.
109 109 children.setdefault(n, [])
110 110 for p in parents[n]:
111 111 if not p in self.map:
112 112 visit.append(p)
113 113 children.setdefault(p, []).append(n)
114 114
115 115 s = []
116 116 removed = {}
117 117 visit = children.keys()
118 118 while visit:
119 119 n = visit.pop(0)
120 120 if n in removed: continue
121 121 dep = 0
122 122 if n in parents:
123 123 for p in parents[n]:
124 124 if p in self.map: continue
125 125 if p not in removed:
126 126 # we're still dependent
127 127 visit.append(n)
128 128 dep = 1
129 129 break
130 130
131 131 if not dep:
132 132 # all n's parents are in the list
133 133 removed[n] = 1
134 134 if n not in self.map:
135 135 s.append(n)
136 136 if n in children:
137 137 for c in children[n]:
138 138 visit.insert(0, c)
139 139
140 140 if self.opts.get('datesort'):
141 141 depth = {}
142 142 for n in s:
143 143 depth[n] = 0
144 144 pl = [p for p in self.commitcache[n].parents
145 145 if p not in self.map]
146 146 if pl:
147 147 depth[n] = max([depth[p] for p in pl]) + 1
148 148
149 149 s = [(depth[n], self.commitcache[n].date, n) for n in s]
150 150 s.sort()
151 151 s = [e[2] for e in s]
152 152
153 153 return s
154 154
155 155 def mapentry(self, src, dst):
156 156 if self.revmapfilefd is None:
157 157 try:
158 158 self.revmapfilefd = open(self.revmapfile, "a")
159 159 except IOError, (errno, strerror):
160 160 raise util.Abort("Could not open map file %s: %s, %s\n" % (self.revmapfile, errno, strerror))
161 161 self.map[src] = dst
162 162 self.revmapfilefd.write("%s %s\n" % (src, dst))
163 163 self.revmapfilefd.flush()
164 164
165 165 def writeauthormap(self):
166 166 authorfile = self.authorfile
167 167 if authorfile:
168 168 self.ui.status('Writing author map file %s\n' % authorfile)
169 169 ofile = open(authorfile, 'w+')
170 170 for author in self.authors:
171 171 ofile.write("%s=%s\n" % (author, self.authors[author]))
172 172 ofile.close()
173 173
174 174 def readauthormap(self, authorfile):
175 175 afile = open(authorfile, 'r')
176 176 for line in afile:
177 177 try:
178 178 srcauthor = line.split('=')[0].strip()
179 179 dstauthor = line.split('=')[1].strip()
180 180 if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
181 181 self.ui.status(
182 182 'Overriding mapping for author %s, was %s, will be %s\n'
183 183 % (srcauthor, self.authors[srcauthor], dstauthor))
184 184 else:
185 185 self.ui.debug('Mapping author %s to %s\n'
186 186 % (srcauthor, dstauthor))
187 187 self.authors[srcauthor] = dstauthor
188 188 except IndexError:
189 189 self.ui.warn(
190 190 'Ignoring bad line in author file map %s: %s\n'
191 191 % (authorfile, line))
192 192 afile.close()
193 193
194 194 def cachecommit(self, rev):
195 195 commit = self.source.getcommit(rev)
196 196 commit.author = self.authors.get(commit.author, commit.author)
197 197 self.commitcache[rev] = commit
198 198 return commit
199 199
200 200 def copy(self, rev):
201 201 commit = self.commitcache[rev]
202 202 do_copies = hasattr(self.dest, 'copyfile')
203 203 filenames = []
204 204
205 files, copies = self.source.getchanges(rev)
205 changes = self.source.getchanges(rev)
206 if isinstance(changes, basestring):
207 if changes == SKIPREV:
208 dest = SKIPREV
209 else:
210 dest = self.map[changes]
211 self.mapentry(rev, dest)
212 return
213 files, copies = changes
206 214 parents = [self.map[r] for r in commit.parents]
207 215 if commit.parents:
208 216 prev = commit.parents[0]
209 217 if prev not in self.commitcache:
210 218 self.cachecommit(prev)
211 219 pbranch = self.commitcache[prev].branch
212 220 else:
213 221 pbranch = None
214 222 self.dest.setbranch(commit.branch, pbranch, parents)
215 223 for f, v in files:
216 224 newf = self.mapfile(f)
217 225 if not newf:
218 226 continue
219 227 filenames.append(newf)
220 228 try:
221 229 data = self.source.getfile(f, v)
222 230 except IOError, inst:
223 231 self.dest.delfile(newf)
224 232 else:
225 233 e = self.source.getmode(f, v)
226 234 self.dest.putfile(newf, e, data)
227 235 if do_copies:
228 236 if f in copies:
229 237 copyf = self.mapfile(copies[f])
230 238 if copyf:
231 239 # Merely marks that a copy happened.
232 240 self.dest.copyfile(copyf, newf)
233 241
234 242 if not filenames and self.mapfile.active():
235 243 newnode = parents[0]
236 244 else:
237 245 newnode = self.dest.putcommit(filenames, parents, commit)
238 246 self.mapentry(rev, newnode)
239 247
240 248 def convert(self):
241 249 try:
242 250 self.source.before()
243 251 self.dest.before()
244 252 self.source.setrevmap(self.map, self.maporder)
245 253 self.ui.status("scanning source...\n")
246 254 heads = self.source.getheads()
247 255 parents = self.walktree(heads)
248 256 self.ui.status("sorting...\n")
249 257 t = self.toposort(parents)
250 258 num = len(t)
251 259 c = None
252 260
253 261 self.ui.status("converting...\n")
254 262 for c in t:
255 263 num -= 1
256 264 desc = self.commitcache[c].desc
257 265 if "\n" in desc:
258 266 desc = desc.splitlines()[0]
259 267 self.ui.status("%d %s\n" % (num, desc))
260 268 self.copy(c)
261 269
262 270 tags = self.source.gettags()
263 271 ctags = {}
264 272 for k in tags:
265 273 v = tags[k]
266 if v in self.map:
274 if self.map.get(v, SKIPREV) != SKIPREV:
267 275 ctags[k] = self.map[v]
268 276
269 277 if c and ctags:
270 278 nrev = self.dest.puttags(ctags)
271 279 # write another hash correspondence to override the previous
272 280 # one so we don't end up with extra tag heads
273 281 if nrev:
274 282 self.mapentry(c, nrev)
275 283
276 284 self.writeauthormap()
277 285 finally:
278 286 self.cleanup()
279 287
280 288 def cleanup(self):
281 289 try:
282 290 self.dest.after()
283 291 finally:
284 292 self.source.after()
285 293 if self.revmapfilefd:
286 294 self.revmapfilefd.close()
287 295
288 296 def rpairs(name):
289 297 e = len(name)
290 298 while e != -1:
291 299 yield name[:e], name[e+1:]
292 300 e = name.rfind('/', 0, e)
293 301
294 302 class filemapper(object):
295 303 '''Map and filter filenames when importing.
296 304 A name can be mapped to itself, a new name, or None (omit from new
297 305 repository).'''
298 306
299 307 def __init__(self, ui, path=None):
300 308 self.ui = ui
301 309 self.include = {}
302 310 self.exclude = {}
303 311 self.rename = {}
304 312 if path:
305 313 if self.parse(path):
306 314 raise util.Abort(_('errors in filemap'))
307 315
308 316 def parse(self, path):
309 317 errs = 0
310 318 def check(name, mapping, listname):
311 319 if name in mapping:
312 320 self.ui.warn(_('%s:%d: %r already in %s list\n') %
313 321 (lex.infile, lex.lineno, name, listname))
314 322 return 1
315 323 return 0
316 324 lex = shlex.shlex(open(path), path, True)
317 325 lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
318 326 cmd = lex.get_token()
319 327 while cmd:
320 328 if cmd == 'include':
321 329 name = lex.get_token()
322 330 errs += check(name, self.exclude, 'exclude')
323 331 self.include[name] = name
324 332 elif cmd == 'exclude':
325 333 name = lex.get_token()
326 334 errs += check(name, self.include, 'include')
327 335 errs += check(name, self.rename, 'rename')
328 336 self.exclude[name] = name
329 337 elif cmd == 'rename':
330 338 src = lex.get_token()
331 339 dest = lex.get_token()
332 340 errs += check(src, self.exclude, 'exclude')
333 341 self.rename[src] = dest
334 342 elif cmd == 'source':
335 343 errs += self.parse(lex.get_token())
336 344 else:
337 345 self.ui.warn(_('%s:%d: unknown directive %r\n') %
338 346 (lex.infile, lex.lineno, cmd))
339 347 errs += 1
340 348 cmd = lex.get_token()
341 349 return errs
342 350
343 351 def lookup(self, name, mapping):
344 352 for pre, suf in rpairs(name):
345 353 try:
346 354 return mapping[pre], pre, suf
347 355 except KeyError, err:
348 356 pass
349 357 return '', name, ''
350 358
351 359 def __call__(self, name):
352 360 if self.include:
353 361 inc = self.lookup(name, self.include)[0]
354 362 else:
355 363 inc = name
356 364 if self.exclude:
357 365 exc = self.lookup(name, self.exclude)[0]
358 366 else:
359 367 exc = ''
360 368 if not inc or exc:
361 369 return None
362 370 newpre, pre, suf = self.lookup(name, self.rename)
363 371 if newpre:
364 372 if newpre == '.':
365 373 return suf
366 374 if suf:
367 375 return newpre + '/' + suf
368 376 return newpre
369 377 return name
370 378
371 379 def active(self):
372 380 return bool(self.include or self.exclude or self.rename)
373 381
374 382 def convert(ui, src, dest=None, revmapfile=None, **opts):
375 383 """Convert a foreign SCM repository to a Mercurial one.
376 384
377 385 Accepted source formats:
378 386 - CVS
379 387 - Darcs
380 388 - git
381 389 - Subversion
382 390
383 391 Accepted destination formats:
384 392 - Mercurial
385 393
386 394 If no revision is given, all revisions will be converted. Otherwise,
387 395 convert will only import up to the named revision (given in a format
388 396 understood by the source).
389 397
390 398 If no destination directory name is specified, it defaults to the
391 399 basename of the source with '-hg' appended. If the destination
392 400 repository doesn't exist, it will be created.
393 401
394 402 If <revmapfile> isn't given, it will be put in a default location
395 403 (<dest>/.hg/shamap by default). The <revmapfile> is a simple text
396 404 file that maps each source commit ID to the destination ID for
397 405 that revision, like so:
398 406 <source ID> <destination ID>
399 407
400 408 If the file doesn't exist, it's automatically created. It's updated
401 409 on each commit copied, so convert-repo can be interrupted and can
402 410 be run repeatedly to copy new commits.
403 411
404 412 The [username mapping] file is a simple text file that maps each source
405 413 commit author to a destination commit author. It is handy for source SCMs
406 414 that use unix logins to identify authors (eg: CVS). One line per author
407 415 mapping and the line format is:
408 416 srcauthor=whatever string you want
409 417
410 418 The filemap is a file that allows filtering and remapping of files
411 419 and directories. Comment lines start with '#'. Each line can
412 420 contain one of the following directives:
413 421
414 422 include path/to/file
415 423
416 424 exclude path/to/file
417 425
418 426 rename from/file to/file
419 427
420 428 The 'include' directive causes a file, or all files under a
421 429 directory, to be included in the destination repository. The
422 430 'exclude' directive causes files or directories to be omitted.
423 431 The 'rename' directive renames a file or directory. To rename
424 432 from a subdirectory into the root of the repository, use '.' as
425 433 the path to rename to.
426 434 """
427 435
428 436 util._encoding = 'UTF-8'
429 437
430 438 if not dest:
431 439 dest = hg.defaultdest(src) + "-hg"
432 440 ui.status("assuming destination %s\n" % dest)
433 441
434 442 # Try to be smart and initalize things when required
435 443 created = False
436 444 if os.path.isdir(dest):
437 445 if len(os.listdir(dest)) > 0:
438 446 try:
439 447 hg.repository(ui, dest)
440 448 ui.status("destination %s is a Mercurial repository\n" % dest)
441 449 except hg.RepoError:
442 450 raise util.Abort(
443 451 "destination directory %s is not empty.\n"
444 452 "Please specify an empty directory to be initialized\n"
445 453 "or an already initialized mercurial repository"
446 454 % dest)
447 455 else:
448 456 ui.status("initializing destination %s repository\n" % dest)
449 457 hg.repository(ui, dest, create=True)
450 458 created = True
451 459 elif os.path.exists(dest):
452 460 raise util.Abort("destination %s exists and is not a directory" % dest)
453 461 else:
454 462 ui.status("initializing destination %s repository\n" % dest)
455 463 hg.repository(ui, dest, create=True)
456 464 created = True
457 465
458 466 destc = convertsink(ui, dest)
459 467
460 468 try:
461 469 srcc = convertsource(ui, src, rev=opts.get('rev'))
462 470 except Exception:
463 471 if created:
464 472 shutil.rmtree(dest, True)
465 473 raise
466 474
467 475 if not revmapfile:
468 476 try:
469 477 revmapfile = destc.revmapfile()
470 478 except:
471 479 revmapfile = os.path.join(destc, "map")
472 480
473 481
474 482 c = converter(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
475 483 opts)
476 484 c.convert()
477 485
478 486
479 487 cmdtable = {
480 488 "convert":
481 489 (convert,
482 490 [('A', 'authors', '', 'username mapping filename'),
483 491 ('', 'filemap', '', 'remap file names using contents of file'),
484 492 ('r', 'rev', '', 'import up to target revision REV'),
485 493 ('', 'datesort', None, 'try to sort changesets by date')],
486 494 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
487 495 "debugsvnlog":
488 496 (debugsvnlog,
489 497 [],
490 498 'hg debugsvnlog'),
491 499 }
492 500
@@ -1,154 +1,156 b''
1 1 # common code for the convert extension
2 2 import base64
3 3 import cPickle as pickle
4 4
5 5 def encodeargs(args):
6 6 def encodearg(s):
7 7 lines = base64.encodestring(s)
8 8 lines = [l.splitlines()[0] for l in lines]
9 9 return ''.join(lines)
10 10
11 11 s = pickle.dumps(args)
12 12 return encodearg(s)
13 13
14 14 def decodeargs(s):
15 15 s = base64.decodestring(s)
16 16 return pickle.loads(s)
17 17
18 18 class NoRepo(Exception): pass
19 19
20 SKIPREV = 'hg-convert-skipped-revision'
21
20 22 class commit(object):
21 23 def __init__(self, author, date, desc, parents, branch=None, rev=None):
22 24 self.author = author
23 25 self.date = date
24 26 self.desc = desc
25 27 self.parents = parents
26 28 self.branch = branch
27 29 self.rev = rev
28 30
29 31 class converter_source(object):
30 32 """Conversion source interface"""
31 33
32 34 def __init__(self, ui, path, rev=None):
33 35 """Initialize conversion source (or raise NoRepo("message")
34 36 exception if path is not a valid repository)"""
35 37 self.ui = ui
36 38 self.path = path
37 39 self.rev = rev
38 40
39 41 self.encoding = 'utf-8'
40 42
41 43 def before(self):
42 44 pass
43 45
44 46 def after(self):
45 47 pass
46 48
47 49 def setrevmap(self, revmap, order):
48 50 """set the map of already-converted revisions
49 51
50 52 order is a list with the keys from revmap in the order they
51 53 appear in the revision map file."""
52 54 pass
53 55
54 56 def getheads(self):
55 57 """Return a list of this repository's heads"""
56 58 raise NotImplementedError()
57 59
58 60 def getfile(self, name, rev):
59 61 """Return file contents as a string"""
60 62 raise NotImplementedError()
61 63
62 64 def getmode(self, name, rev):
63 65 """Return file mode, eg. '', 'x', or 'l'"""
64 66 raise NotImplementedError()
65 67
66 68 def getchanges(self, version):
67 69 """Returns a tuple of (files, copies)
68 70 Files is a sorted list of (filename, id) tuples for all files changed
69 71 in version, where id is the source revision id of the file.
70 72
71 73 copies is a dictionary of dest: source
72 74 """
73 75 raise NotImplementedError()
74 76
75 77 def getcommit(self, version):
76 78 """Return the commit object for version"""
77 79 raise NotImplementedError()
78 80
79 81 def gettags(self):
80 82 """Return the tags as a dictionary of name: revision"""
81 83 raise NotImplementedError()
82 84
83 85 def recode(self, s, encoding=None):
84 86 if not encoding:
85 87 encoding = self.encoding or 'utf-8'
86 88
87 89 if isinstance(s, unicode):
88 90 return s.encode("utf-8")
89 91 try:
90 92 return s.decode(encoding).encode("utf-8")
91 93 except:
92 94 try:
93 95 return s.decode("latin-1").encode("utf-8")
94 96 except:
95 97 return s.decode(encoding, "replace").encode("utf-8")
96 98
97 99 class converter_sink(object):
98 100 """Conversion sink (target) interface"""
99 101
100 102 def __init__(self, ui, path):
101 103 """Initialize conversion sink (or raise NoRepo("message")
102 104 exception if path is not a valid repository)"""
103 105 raise NotImplementedError()
104 106
105 107 def getheads(self):
106 108 """Return a list of this repository's heads"""
107 109 raise NotImplementedError()
108 110
109 111 def revmapfile(self):
110 112 """Path to a file that will contain lines
111 113 source_rev_id sink_rev_id
112 114 mapping equivalent revision identifiers for each system."""
113 115 raise NotImplementedError()
114 116
115 117 def authorfile(self):
116 118 """Path to a file that will contain lines
117 119 srcauthor=dstauthor
118 120 mapping equivalent authors identifiers for each system."""
119 121 return None
120 122
121 123 def putfile(self, f, e, data):
122 124 """Put file for next putcommit().
123 125 f: path to file
124 126 e: '', 'x', or 'l' (regular file, executable, or symlink)
125 127 data: file contents"""
126 128 raise NotImplementedError()
127 129
128 130 def delfile(self, f):
129 131 """Delete file for next putcommit().
130 132 f: path to file"""
131 133 raise NotImplementedError()
132 134
133 135 def putcommit(self, files, parents, commit):
134 136 """Create a revision with all changed files listed in 'files'
135 137 and having listed parents. 'commit' is a commit object containing
136 138 at a minimum the author, date, and message for this changeset.
137 139 Called after putfile() and delfile() calls. Note that the sink
138 140 repository is not told to update itself to a particular revision
139 141 (or even what that revision would be) before it receives the
140 142 file data."""
141 143 raise NotImplementedError()
142 144
143 145 def puttags(self, tags):
144 146 """Put tags into sink.
145 147 tags: {tagname: sink_rev_id, ...}"""
146 148 raise NotImplementedError()
147 149
148 150 def setbranch(self, branch, pbranch, parents):
149 151 """Set the current branch name. Called before the first putfile
150 152 on the branch.
151 153 branch: branch name for subsequent commits
152 154 pbranch: branch name of parent commit
153 155 parents: destination revisions of parent"""
154 156 pass
General Comments 0
You need to be logged in to leave comments. Login now