##// END OF EJS Templates
convert: split toposort() into subfunctions for readability
Patrick Mezard -
r8688:31e613a8 default
parent child Browse files
Show More
@@ -1,351 +1,377 b''
1 # convcmd - convert extension commands definition
1 # convcmd - convert extension commands definition
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from common import NoRepo, MissingTool, SKIPREV, mapfile
8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 from cvs import convert_cvs
9 from cvs import convert_cvs
10 from darcs import darcs_source
10 from darcs import darcs_source
11 from git import convert_git
11 from git import convert_git
12 from hg import mercurial_source, mercurial_sink
12 from hg import mercurial_source, mercurial_sink
13 from subversion import svn_source, svn_sink
13 from subversion import svn_source, svn_sink
14 from monotone import monotone_source
14 from monotone import monotone_source
15 from gnuarch import gnuarch_source
15 from gnuarch import gnuarch_source
16 from bzr import bzr_source
16 from bzr import bzr_source
17 from p4 import p4_source
17 from p4 import p4_source
18 import filemap
18 import filemap
19
19
20 import os, shutil
20 import os, shutil
21 from mercurial import hg, util, encoding
21 from mercurial import hg, util, encoding
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24 orig_encoding = 'ascii'
24 orig_encoding = 'ascii'
25
25
26 def recode(s):
26 def recode(s):
27 if isinstance(s, unicode):
27 if isinstance(s, unicode):
28 return s.encode(orig_encoding, 'replace')
28 return s.encode(orig_encoding, 'replace')
29 else:
29 else:
30 return s.decode('utf-8').encode(orig_encoding, 'replace')
30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31
31
32 source_converters = [
32 source_converters = [
33 ('cvs', convert_cvs),
33 ('cvs', convert_cvs),
34 ('git', convert_git),
34 ('git', convert_git),
35 ('svn', svn_source),
35 ('svn', svn_source),
36 ('hg', mercurial_source),
36 ('hg', mercurial_source),
37 ('darcs', darcs_source),
37 ('darcs', darcs_source),
38 ('mtn', monotone_source),
38 ('mtn', monotone_source),
39 ('gnuarch', gnuarch_source),
39 ('gnuarch', gnuarch_source),
40 ('bzr', bzr_source),
40 ('bzr', bzr_source),
41 ('p4', p4_source),
41 ('p4', p4_source),
42 ]
42 ]
43
43
44 sink_converters = [
44 sink_converters = [
45 ('hg', mercurial_sink),
45 ('hg', mercurial_sink),
46 ('svn', svn_sink),
46 ('svn', svn_sink),
47 ]
47 ]
48
48
49 def convertsource(ui, path, type, rev):
49 def convertsource(ui, path, type, rev):
50 exceptions = []
50 exceptions = []
51 for name, source in source_converters:
51 for name, source in source_converters:
52 try:
52 try:
53 if not type or name == type:
53 if not type or name == type:
54 return source(ui, path, rev)
54 return source(ui, path, rev)
55 except (NoRepo, MissingTool), inst:
55 except (NoRepo, MissingTool), inst:
56 exceptions.append(inst)
56 exceptions.append(inst)
57 if not ui.quiet:
57 if not ui.quiet:
58 for inst in exceptions:
58 for inst in exceptions:
59 ui.write("%s\n" % inst)
59 ui.write("%s\n" % inst)
60 raise util.Abort(_('%s: missing or unsupported repository') % path)
60 raise util.Abort(_('%s: missing or unsupported repository') % path)
61
61
62 def convertsink(ui, path, type):
62 def convertsink(ui, path, type):
63 for name, sink in sink_converters:
63 for name, sink in sink_converters:
64 try:
64 try:
65 if not type or name == type:
65 if not type or name == type:
66 return sink(ui, path)
66 return sink(ui, path)
67 except NoRepo, inst:
67 except NoRepo, inst:
68 ui.note(_("convert: %s\n") % inst)
68 ui.note(_("convert: %s\n") % inst)
69 raise util.Abort(_('%s: unknown repository type') % path)
69 raise util.Abort(_('%s: unknown repository type') % path)
70
70
71 class converter(object):
71 class converter(object):
72 def __init__(self, ui, source, dest, revmapfile, opts):
72 def __init__(self, ui, source, dest, revmapfile, opts):
73
73
74 self.source = source
74 self.source = source
75 self.dest = dest
75 self.dest = dest
76 self.ui = ui
76 self.ui = ui
77 self.opts = opts
77 self.opts = opts
78 self.commitcache = {}
78 self.commitcache = {}
79 self.authors = {}
79 self.authors = {}
80 self.authorfile = None
80 self.authorfile = None
81
81
82 # Record converted revisions persistently: maps source revision
82 # Record converted revisions persistently: maps source revision
83 # ID to target revision ID (both strings). (This is how
83 # ID to target revision ID (both strings). (This is how
84 # incremental conversions work.)
84 # incremental conversions work.)
85 self.map = mapfile(ui, revmapfile)
85 self.map = mapfile(ui, revmapfile)
86
86
87 # Read first the dst author map if any
87 # Read first the dst author map if any
88 authorfile = self.dest.authorfile()
88 authorfile = self.dest.authorfile()
89 if authorfile and os.path.exists(authorfile):
89 if authorfile and os.path.exists(authorfile):
90 self.readauthormap(authorfile)
90 self.readauthormap(authorfile)
91 # Extend/Override with new author map if necessary
91 # Extend/Override with new author map if necessary
92 if opts.get('authors'):
92 if opts.get('authors'):
93 self.readauthormap(opts.get('authors'))
93 self.readauthormap(opts.get('authors'))
94 self.authorfile = self.dest.authorfile()
94 self.authorfile = self.dest.authorfile()
95
95
96 self.splicemap = mapfile(ui, opts.get('splicemap'))
96 self.splicemap = mapfile(ui, opts.get('splicemap'))
97 self.branchmap = mapfile(ui, opts.get('branchmap'))
97 self.branchmap = mapfile(ui, opts.get('branchmap'))
98
98
99 def walktree(self, heads):
99 def walktree(self, heads):
100 '''Return a mapping that identifies the uncommitted parents of every
100 '''Return a mapping that identifies the uncommitted parents of every
101 uncommitted changeset.'''
101 uncommitted changeset.'''
102 visit = heads
102 visit = heads
103 known = set()
103 known = set()
104 parents = {}
104 parents = {}
105 while visit:
105 while visit:
106 n = visit.pop(0)
106 n = visit.pop(0)
107 if n in known or n in self.map: continue
107 if n in known or n in self.map: continue
108 known.add(n)
108 known.add(n)
109 commit = self.cachecommit(n)
109 commit = self.cachecommit(n)
110 parents[n] = []
110 parents[n] = []
111 for p in commit.parents:
111 for p in commit.parents:
112 parents[n].append(p)
112 parents[n].append(p)
113 visit.append(p)
113 visit.append(p)
114
114
115 return parents
115 return parents
116
116
117 def toposort(self, parents):
117 def toposort(self, parents):
118 '''Return an ordering such that every uncommitted changeset is
118 '''Return an ordering such that every uncommitted changeset is
119 preceeded by all its uncommitted ancestors.'''
119 preceeded by all its uncommitted ancestors.'''
120 visit = parents.keys()
120
121 seen = set()
121 def mapchildren(parents):
122 children = {}
122 """Return a (children, roots) tuple where 'children' maps parent
123 actives = []
123 revision identifiers to children ones, and 'roots' is the list of
124 revisions without parents. 'parents' must be a mapping of revision
125 identifier to its parents ones.
126 """
127 visit = parents.keys()
128 seen = set()
129 children = {}
130 roots = []
124
131
125 while visit:
132 while visit:
126 n = visit.pop(0)
133 n = visit.pop(0)
127 if n in seen: continue
134 if n in seen:
128 seen.add(n)
135 continue
129 # Ensure that nodes without parents are present in the 'children'
136 seen.add(n)
130 # mapping.
137 # Ensure that nodes without parents are present in the
131 children.setdefault(n, [])
138 # 'children' mapping.
132 hasparent = False
139 children.setdefault(n, [])
133 for p in parents[n]:
140 hasparent = False
134 if not p in self.map:
141 for p in parents[n]:
135 visit.append(p)
142 if not p in self.map:
136 hasparent = True
143 visit.append(p)
137 children.setdefault(p, []).append(n)
144 hasparent = True
138 if not hasparent:
145 children.setdefault(p, []).append(n)
139 actives.append(n)
146 if not hasparent:
147 roots.append(n)
148
149 return children, roots
140
150
141 del seen
151 # Sort functions are supposed to take a list of revisions which
142 del visit
152 # can be converted immediately and pick one
143
153
144 if self.opts.get('datesort'):
154 def makebranchsorter():
155 """If the previously converted revision has a child in the
156 eligible revisions list, pick it. Return the list head
157 otherwise. Branch sort attempts to minimize branch
158 switching, which is harmful for Mercurial backend
159 compression.
160 """
161 prev = [None]
162 def picknext(nodes):
163 next = nodes[0]
164 for n in nodes:
165 if prev[0] in parents[n]:
166 next = n
167 break
168 prev[0] = next
169 return next
170 return picknext
171
172 def makedatesorter():
173 """Sort revisions by date."""
145 dates = {}
174 dates = {}
146 def getdate(n):
175 def getdate(n):
147 if n not in dates:
176 if n not in dates:
148 dates[n] = util.parsedate(self.commitcache[n].date)
177 dates[n] = util.parsedate(self.commitcache[n].date)
149 return dates[n]
178 return dates[n]
150
179
151 def picknext(nodes):
180 def picknext(nodes):
152 return min([(getdate(n), n) for n in nodes])[1]
181 return min([(getdate(n), n) for n in nodes])[1]
182
183 return picknext
184
185 if self.opts.get('datesort'):
186 picknext = makedatesorter()
153 else:
187 else:
154 prev = [None]
188 picknext = makebranchsorter()
155 def picknext(nodes):
189
156 # Return the first eligible child of the previously converted
190 children, actives = mapchildren(parents)
157 # revision, or any of them.
158 next = nodes[0]
159 for n in nodes:
160 if prev[0] in parents[n]:
161 next = n
162 break
163 prev[0] = next
164 return next
165
191
166 s = []
192 s = []
167 pendings = {}
193 pendings = {}
168 while actives:
194 while actives:
169 n = picknext(actives)
195 n = picknext(actives)
170 actives.remove(n)
196 actives.remove(n)
171 s.append(n)
197 s.append(n)
172
198
173 # Update dependents list
199 # Update dependents list
174 for c in children.get(n, []):
200 for c in children.get(n, []):
175 if c not in pendings:
201 if c not in pendings:
176 pendings[c] = [p for p in parents[c] if p not in self.map]
202 pendings[c] = [p for p in parents[c] if p not in self.map]
177 try:
203 try:
178 pendings[c].remove(n)
204 pendings[c].remove(n)
179 except ValueError:
205 except ValueError:
180 raise util.Abort(_('cycle detected between %s and %s')
206 raise util.Abort(_('cycle detected between %s and %s')
181 % (recode(c), recode(n)))
207 % (recode(c), recode(n)))
182 if not pendings[c]:
208 if not pendings[c]:
183 # Parents are converted, node is eligible
209 # Parents are converted, node is eligible
184 actives.insert(0, c)
210 actives.insert(0, c)
185 pendings[c] = None
211 pendings[c] = None
186
212
187 if len(s) != len(parents):
213 if len(s) != len(parents):
188 raise util.Abort(_("not all revisions were sorted"))
214 raise util.Abort(_("not all revisions were sorted"))
189
215
190 return s
216 return s
191
217
192 def writeauthormap(self):
218 def writeauthormap(self):
193 authorfile = self.authorfile
219 authorfile = self.authorfile
194 if authorfile:
220 if authorfile:
195 self.ui.status(_('Writing author map file %s\n') % authorfile)
221 self.ui.status(_('Writing author map file %s\n') % authorfile)
196 ofile = open(authorfile, 'w+')
222 ofile = open(authorfile, 'w+')
197 for author in self.authors:
223 for author in self.authors:
198 ofile.write("%s=%s\n" % (author, self.authors[author]))
224 ofile.write("%s=%s\n" % (author, self.authors[author]))
199 ofile.close()
225 ofile.close()
200
226
201 def readauthormap(self, authorfile):
227 def readauthormap(self, authorfile):
202 afile = open(authorfile, 'r')
228 afile = open(authorfile, 'r')
203 for line in afile:
229 for line in afile:
204
230
205 line = line.strip()
231 line = line.strip()
206 if not line or line.startswith('#'):
232 if not line or line.startswith('#'):
207 continue
233 continue
208
234
209 try:
235 try:
210 srcauthor, dstauthor = line.split('=', 1)
236 srcauthor, dstauthor = line.split('=', 1)
211 except ValueError:
237 except ValueError:
212 msg = _('Ignoring bad line in author map file %s: %s\n')
238 msg = _('Ignoring bad line in author map file %s: %s\n')
213 self.ui.warn(msg % (authorfile, line.rstrip()))
239 self.ui.warn(msg % (authorfile, line.rstrip()))
214 continue
240 continue
215
241
216 srcauthor = srcauthor.strip()
242 srcauthor = srcauthor.strip()
217 dstauthor = dstauthor.strip()
243 dstauthor = dstauthor.strip()
218 if self.authors.get(srcauthor) in (None, dstauthor):
244 if self.authors.get(srcauthor) in (None, dstauthor):
219 msg = _('mapping author %s to %s\n')
245 msg = _('mapping author %s to %s\n')
220 self.ui.debug(msg % (srcauthor, dstauthor))
246 self.ui.debug(msg % (srcauthor, dstauthor))
221 self.authors[srcauthor] = dstauthor
247 self.authors[srcauthor] = dstauthor
222 continue
248 continue
223
249
224 m = _('overriding mapping for author %s, was %s, will be %s\n')
250 m = _('overriding mapping for author %s, was %s, will be %s\n')
225 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
251 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
226
252
227 afile.close()
253 afile.close()
228
254
229 def cachecommit(self, rev):
255 def cachecommit(self, rev):
230 commit = self.source.getcommit(rev)
256 commit = self.source.getcommit(rev)
231 commit.author = self.authors.get(commit.author, commit.author)
257 commit.author = self.authors.get(commit.author, commit.author)
232 commit.branch = self.branchmap.get(commit.branch, commit.branch)
258 commit.branch = self.branchmap.get(commit.branch, commit.branch)
233 self.commitcache[rev] = commit
259 self.commitcache[rev] = commit
234 return commit
260 return commit
235
261
236 def copy(self, rev):
262 def copy(self, rev):
237 commit = self.commitcache[rev]
263 commit = self.commitcache[rev]
238
264
239 changes = self.source.getchanges(rev)
265 changes = self.source.getchanges(rev)
240 if isinstance(changes, basestring):
266 if isinstance(changes, basestring):
241 if changes == SKIPREV:
267 if changes == SKIPREV:
242 dest = SKIPREV
268 dest = SKIPREV
243 else:
269 else:
244 dest = self.map[changes]
270 dest = self.map[changes]
245 self.map[rev] = dest
271 self.map[rev] = dest
246 return
272 return
247 files, copies = changes
273 files, copies = changes
248 pbranches = []
274 pbranches = []
249 if commit.parents:
275 if commit.parents:
250 for prev in commit.parents:
276 for prev in commit.parents:
251 if prev not in self.commitcache:
277 if prev not in self.commitcache:
252 self.cachecommit(prev)
278 self.cachecommit(prev)
253 pbranches.append((self.map[prev],
279 pbranches.append((self.map[prev],
254 self.commitcache[prev].branch))
280 self.commitcache[prev].branch))
255 self.dest.setbranch(commit.branch, pbranches)
281 self.dest.setbranch(commit.branch, pbranches)
256 try:
282 try:
257 parents = self.splicemap[rev].replace(',', ' ').split()
283 parents = self.splicemap[rev].replace(',', ' ').split()
258 self.ui.status(_('spliced in %s as parents of %s\n') %
284 self.ui.status(_('spliced in %s as parents of %s\n') %
259 (parents, rev))
285 (parents, rev))
260 parents = [self.map.get(p, p) for p in parents]
286 parents = [self.map.get(p, p) for p in parents]
261 except KeyError:
287 except KeyError:
262 parents = [b[0] for b in pbranches]
288 parents = [b[0] for b in pbranches]
263 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
289 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
264 self.source.converted(rev, newnode)
290 self.source.converted(rev, newnode)
265 self.map[rev] = newnode
291 self.map[rev] = newnode
266
292
267 def convert(self):
293 def convert(self):
268
294
269 try:
295 try:
270 self.source.before()
296 self.source.before()
271 self.dest.before()
297 self.dest.before()
272 self.source.setrevmap(self.map)
298 self.source.setrevmap(self.map)
273 self.ui.status(_("scanning source...\n"))
299 self.ui.status(_("scanning source...\n"))
274 heads = self.source.getheads()
300 heads = self.source.getheads()
275 parents = self.walktree(heads)
301 parents = self.walktree(heads)
276 self.ui.status(_("sorting...\n"))
302 self.ui.status(_("sorting...\n"))
277 t = self.toposort(parents)
303 t = self.toposort(parents)
278 num = len(t)
304 num = len(t)
279 c = None
305 c = None
280
306
281 self.ui.status(_("converting...\n"))
307 self.ui.status(_("converting...\n"))
282 for c in t:
308 for c in t:
283 num -= 1
309 num -= 1
284 desc = self.commitcache[c].desc
310 desc = self.commitcache[c].desc
285 if "\n" in desc:
311 if "\n" in desc:
286 desc = desc.splitlines()[0]
312 desc = desc.splitlines()[0]
287 # convert log message to local encoding without using
313 # convert log message to local encoding without using
288 # tolocal() because encoding.encoding conver() use it as
314 # tolocal() because encoding.encoding conver() use it as
289 # 'utf-8'
315 # 'utf-8'
290 self.ui.status("%d %s\n" % (num, recode(desc)))
316 self.ui.status("%d %s\n" % (num, recode(desc)))
291 self.ui.note(_("source: %s\n") % recode(c))
317 self.ui.note(_("source: %s\n") % recode(c))
292 self.copy(c)
318 self.copy(c)
293
319
294 tags = self.source.gettags()
320 tags = self.source.gettags()
295 ctags = {}
321 ctags = {}
296 for k in tags:
322 for k in tags:
297 v = tags[k]
323 v = tags[k]
298 if self.map.get(v, SKIPREV) != SKIPREV:
324 if self.map.get(v, SKIPREV) != SKIPREV:
299 ctags[k] = self.map[v]
325 ctags[k] = self.map[v]
300
326
301 if c and ctags:
327 if c and ctags:
302 nrev = self.dest.puttags(ctags)
328 nrev = self.dest.puttags(ctags)
303 # write another hash correspondence to override the previous
329 # write another hash correspondence to override the previous
304 # one so we don't end up with extra tag heads
330 # one so we don't end up with extra tag heads
305 if nrev:
331 if nrev:
306 self.map[c] = nrev
332 self.map[c] = nrev
307
333
308 self.writeauthormap()
334 self.writeauthormap()
309 finally:
335 finally:
310 self.cleanup()
336 self.cleanup()
311
337
312 def cleanup(self):
338 def cleanup(self):
313 try:
339 try:
314 self.dest.after()
340 self.dest.after()
315 finally:
341 finally:
316 self.source.after()
342 self.source.after()
317 self.map.close()
343 self.map.close()
318
344
319 def convert(ui, src, dest=None, revmapfile=None, **opts):
345 def convert(ui, src, dest=None, revmapfile=None, **opts):
320 global orig_encoding
346 global orig_encoding
321 orig_encoding = encoding.encoding
347 orig_encoding = encoding.encoding
322 encoding.encoding = 'UTF-8'
348 encoding.encoding = 'UTF-8'
323
349
324 if not dest:
350 if not dest:
325 dest = hg.defaultdest(src) + "-hg"
351 dest = hg.defaultdest(src) + "-hg"
326 ui.status(_("assuming destination %s\n") % dest)
352 ui.status(_("assuming destination %s\n") % dest)
327
353
328 destc = convertsink(ui, dest, opts.get('dest_type'))
354 destc = convertsink(ui, dest, opts.get('dest_type'))
329
355
330 try:
356 try:
331 srcc = convertsource(ui, src, opts.get('source_type'),
357 srcc = convertsource(ui, src, opts.get('source_type'),
332 opts.get('rev'))
358 opts.get('rev'))
333 except Exception:
359 except Exception:
334 for path in destc.created:
360 for path in destc.created:
335 shutil.rmtree(path, True)
361 shutil.rmtree(path, True)
336 raise
362 raise
337
363
338 fmap = opts.get('filemap')
364 fmap = opts.get('filemap')
339 if fmap:
365 if fmap:
340 srcc = filemap.filemap_source(ui, srcc, fmap)
366 srcc = filemap.filemap_source(ui, srcc, fmap)
341 destc.setfilemapmode(True)
367 destc.setfilemapmode(True)
342
368
343 if not revmapfile:
369 if not revmapfile:
344 try:
370 try:
345 revmapfile = destc.revmapfile()
371 revmapfile = destc.revmapfile()
346 except:
372 except:
347 revmapfile = os.path.join(destc, "map")
373 revmapfile = os.path.join(destc, "map")
348
374
349 c = converter(ui, srcc, destc, revmapfile, opts)
375 c = converter(ui, srcc, destc, revmapfile, opts)
350 c.convert()
376 c.convert()
351
377
General Comments 0
You need to be logged in to leave comments. Login now