##// END OF EJS Templates
convert: split toposort() into subfunctions for readability
Patrick Mezard -
r8688:31e613a8 default
parent child Browse files
Show More
@@ -1,351 +1,377 b''
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import svn_source, svn_sink
14 14 from monotone import monotone_source
15 15 from gnuarch import gnuarch_source
16 16 from bzr import bzr_source
17 17 from p4 import p4_source
18 18 import filemap
19 19
20 20 import os, shutil
21 21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
25 25
26 26 def recode(s):
27 27 if isinstance(s, unicode):
28 28 return s.encode(orig_encoding, 'replace')
29 29 else:
30 30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31 31
32 32 source_converters = [
33 33 ('cvs', convert_cvs),
34 34 ('git', convert_git),
35 35 ('svn', svn_source),
36 36 ('hg', mercurial_source),
37 37 ('darcs', darcs_source),
38 38 ('mtn', monotone_source),
39 39 ('gnuarch', gnuarch_source),
40 40 ('bzr', bzr_source),
41 41 ('p4', p4_source),
42 42 ]
43 43
44 44 sink_converters = [
45 45 ('hg', mercurial_sink),
46 46 ('svn', svn_sink),
47 47 ]
48 48
49 49 def convertsource(ui, path, type, rev):
50 50 exceptions = []
51 51 for name, source in source_converters:
52 52 try:
53 53 if not type or name == type:
54 54 return source(ui, path, rev)
55 55 except (NoRepo, MissingTool), inst:
56 56 exceptions.append(inst)
57 57 if not ui.quiet:
58 58 for inst in exceptions:
59 59 ui.write("%s\n" % inst)
60 60 raise util.Abort(_('%s: missing or unsupported repository') % path)
61 61
62 62 def convertsink(ui, path, type):
63 63 for name, sink in sink_converters:
64 64 try:
65 65 if not type or name == type:
66 66 return sink(ui, path)
67 67 except NoRepo, inst:
68 68 ui.note(_("convert: %s\n") % inst)
69 69 raise util.Abort(_('%s: unknown repository type') % path)
70 70
71 71 class converter(object):
72 72 def __init__(self, ui, source, dest, revmapfile, opts):
73 73
74 74 self.source = source
75 75 self.dest = dest
76 76 self.ui = ui
77 77 self.opts = opts
78 78 self.commitcache = {}
79 79 self.authors = {}
80 80 self.authorfile = None
81 81
82 82 # Record converted revisions persistently: maps source revision
83 83 # ID to target revision ID (both strings). (This is how
84 84 # incremental conversions work.)
85 85 self.map = mapfile(ui, revmapfile)
86 86
87 87 # Read first the dst author map if any
88 88 authorfile = self.dest.authorfile()
89 89 if authorfile and os.path.exists(authorfile):
90 90 self.readauthormap(authorfile)
91 91 # Extend/Override with new author map if necessary
92 92 if opts.get('authors'):
93 93 self.readauthormap(opts.get('authors'))
94 94 self.authorfile = self.dest.authorfile()
95 95
96 96 self.splicemap = mapfile(ui, opts.get('splicemap'))
97 97 self.branchmap = mapfile(ui, opts.get('branchmap'))
98 98
99 99 def walktree(self, heads):
100 100 '''Return a mapping that identifies the uncommitted parents of every
101 101 uncommitted changeset.'''
102 102 visit = heads
103 103 known = set()
104 104 parents = {}
105 105 while visit:
106 106 n = visit.pop(0)
107 107 if n in known or n in self.map: continue
108 108 known.add(n)
109 109 commit = self.cachecommit(n)
110 110 parents[n] = []
111 111 for p in commit.parents:
112 112 parents[n].append(p)
113 113 visit.append(p)
114 114
115 115 return parents
116 116
117 117 def toposort(self, parents):
118 118 '''Return an ordering such that every uncommitted changeset is
119 119 preceeded by all its uncommitted ancestors.'''
120
121 def mapchildren(parents):
122 """Return a (children, roots) tuple where 'children' maps parent
123 revision identifiers to children ones, and 'roots' is the list of
124 revisions without parents. 'parents' must be a mapping of revision
125 identifier to its parents ones.
126 """
120 127 visit = parents.keys()
121 128 seen = set()
122 129 children = {}
123 actives = []
130 roots = []
124 131
125 132 while visit:
126 133 n = visit.pop(0)
127 if n in seen: continue
134 if n in seen:
135 continue
128 136 seen.add(n)
129 # Ensure that nodes without parents are present in the 'children'
130 # mapping.
137 # Ensure that nodes without parents are present in the
138 # 'children' mapping.
131 139 children.setdefault(n, [])
132 140 hasparent = False
133 141 for p in parents[n]:
134 142 if not p in self.map:
135 143 visit.append(p)
136 144 hasparent = True
137 145 children.setdefault(p, []).append(n)
138 146 if not hasparent:
139 actives.append(n)
147 roots.append(n)
148
149 return children, roots
150
151 # Sort functions are supposed to take a list of revisions which
152 # can be converted immediately and pick one
140 153
141 del seen
142 del visit
154 def makebranchsorter():
155 """If the previously converted revision has a child in the
156 eligible revisions list, pick it. Return the list head
157 otherwise. Branch sort attempts to minimize branch
158 switching, which is harmful for Mercurial backend
159 compression.
160 """
161 prev = [None]
162 def picknext(nodes):
163 next = nodes[0]
164 for n in nodes:
165 if prev[0] in parents[n]:
166 next = n
167 break
168 prev[0] = next
169 return next
170 return picknext
143 171
144 if self.opts.get('datesort'):
172 def makedatesorter():
173 """Sort revisions by date."""
145 174 dates = {}
146 175 def getdate(n):
147 176 if n not in dates:
148 177 dates[n] = util.parsedate(self.commitcache[n].date)
149 178 return dates[n]
150 179
151 180 def picknext(nodes):
152 181 return min([(getdate(n), n) for n in nodes])[1]
182
183 return picknext
184
185 if self.opts.get('datesort'):
186 picknext = makedatesorter()
153 187 else:
154 prev = [None]
155 def picknext(nodes):
156 # Return the first eligible child of the previously converted
157 # revision, or any of them.
158 next = nodes[0]
159 for n in nodes:
160 if prev[0] in parents[n]:
161 next = n
162 break
163 prev[0] = next
164 return next
188 picknext = makebranchsorter()
189
190 children, actives = mapchildren(parents)
165 191
166 192 s = []
167 193 pendings = {}
168 194 while actives:
169 195 n = picknext(actives)
170 196 actives.remove(n)
171 197 s.append(n)
172 198
173 199 # Update dependents list
174 200 for c in children.get(n, []):
175 201 if c not in pendings:
176 202 pendings[c] = [p for p in parents[c] if p not in self.map]
177 203 try:
178 204 pendings[c].remove(n)
179 205 except ValueError:
180 206 raise util.Abort(_('cycle detected between %s and %s')
181 207 % (recode(c), recode(n)))
182 208 if not pendings[c]:
183 209 # Parents are converted, node is eligible
184 210 actives.insert(0, c)
185 211 pendings[c] = None
186 212
187 213 if len(s) != len(parents):
188 214 raise util.Abort(_("not all revisions were sorted"))
189 215
190 216 return s
191 217
192 218 def writeauthormap(self):
193 219 authorfile = self.authorfile
194 220 if authorfile:
195 221 self.ui.status(_('Writing author map file %s\n') % authorfile)
196 222 ofile = open(authorfile, 'w+')
197 223 for author in self.authors:
198 224 ofile.write("%s=%s\n" % (author, self.authors[author]))
199 225 ofile.close()
200 226
201 227 def readauthormap(self, authorfile):
202 228 afile = open(authorfile, 'r')
203 229 for line in afile:
204 230
205 231 line = line.strip()
206 232 if not line or line.startswith('#'):
207 233 continue
208 234
209 235 try:
210 236 srcauthor, dstauthor = line.split('=', 1)
211 237 except ValueError:
212 238 msg = _('Ignoring bad line in author map file %s: %s\n')
213 239 self.ui.warn(msg % (authorfile, line.rstrip()))
214 240 continue
215 241
216 242 srcauthor = srcauthor.strip()
217 243 dstauthor = dstauthor.strip()
218 244 if self.authors.get(srcauthor) in (None, dstauthor):
219 245 msg = _('mapping author %s to %s\n')
220 246 self.ui.debug(msg % (srcauthor, dstauthor))
221 247 self.authors[srcauthor] = dstauthor
222 248 continue
223 249
224 250 m = _('overriding mapping for author %s, was %s, will be %s\n')
225 251 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
226 252
227 253 afile.close()
228 254
229 255 def cachecommit(self, rev):
230 256 commit = self.source.getcommit(rev)
231 257 commit.author = self.authors.get(commit.author, commit.author)
232 258 commit.branch = self.branchmap.get(commit.branch, commit.branch)
233 259 self.commitcache[rev] = commit
234 260 return commit
235 261
236 262 def copy(self, rev):
237 263 commit = self.commitcache[rev]
238 264
239 265 changes = self.source.getchanges(rev)
240 266 if isinstance(changes, basestring):
241 267 if changes == SKIPREV:
242 268 dest = SKIPREV
243 269 else:
244 270 dest = self.map[changes]
245 271 self.map[rev] = dest
246 272 return
247 273 files, copies = changes
248 274 pbranches = []
249 275 if commit.parents:
250 276 for prev in commit.parents:
251 277 if prev not in self.commitcache:
252 278 self.cachecommit(prev)
253 279 pbranches.append((self.map[prev],
254 280 self.commitcache[prev].branch))
255 281 self.dest.setbranch(commit.branch, pbranches)
256 282 try:
257 283 parents = self.splicemap[rev].replace(',', ' ').split()
258 284 self.ui.status(_('spliced in %s as parents of %s\n') %
259 285 (parents, rev))
260 286 parents = [self.map.get(p, p) for p in parents]
261 287 except KeyError:
262 288 parents = [b[0] for b in pbranches]
263 289 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
264 290 self.source.converted(rev, newnode)
265 291 self.map[rev] = newnode
266 292
267 293 def convert(self):
268 294
269 295 try:
270 296 self.source.before()
271 297 self.dest.before()
272 298 self.source.setrevmap(self.map)
273 299 self.ui.status(_("scanning source...\n"))
274 300 heads = self.source.getheads()
275 301 parents = self.walktree(heads)
276 302 self.ui.status(_("sorting...\n"))
277 303 t = self.toposort(parents)
278 304 num = len(t)
279 305 c = None
280 306
281 307 self.ui.status(_("converting...\n"))
282 308 for c in t:
283 309 num -= 1
284 310 desc = self.commitcache[c].desc
285 311 if "\n" in desc:
286 312 desc = desc.splitlines()[0]
287 313 # convert log message to local encoding without using
288 314 # tolocal() because encoding.encoding conver() use it as
289 315 # 'utf-8'
290 316 self.ui.status("%d %s\n" % (num, recode(desc)))
291 317 self.ui.note(_("source: %s\n") % recode(c))
292 318 self.copy(c)
293 319
294 320 tags = self.source.gettags()
295 321 ctags = {}
296 322 for k in tags:
297 323 v = tags[k]
298 324 if self.map.get(v, SKIPREV) != SKIPREV:
299 325 ctags[k] = self.map[v]
300 326
301 327 if c and ctags:
302 328 nrev = self.dest.puttags(ctags)
303 329 # write another hash correspondence to override the previous
304 330 # one so we don't end up with extra tag heads
305 331 if nrev:
306 332 self.map[c] = nrev
307 333
308 334 self.writeauthormap()
309 335 finally:
310 336 self.cleanup()
311 337
312 338 def cleanup(self):
313 339 try:
314 340 self.dest.after()
315 341 finally:
316 342 self.source.after()
317 343 self.map.close()
318 344
319 345 def convert(ui, src, dest=None, revmapfile=None, **opts):
320 346 global orig_encoding
321 347 orig_encoding = encoding.encoding
322 348 encoding.encoding = 'UTF-8'
323 349
324 350 if not dest:
325 351 dest = hg.defaultdest(src) + "-hg"
326 352 ui.status(_("assuming destination %s\n") % dest)
327 353
328 354 destc = convertsink(ui, dest, opts.get('dest_type'))
329 355
330 356 try:
331 357 srcc = convertsource(ui, src, opts.get('source_type'),
332 358 opts.get('rev'))
333 359 except Exception:
334 360 for path in destc.created:
335 361 shutil.rmtree(path, True)
336 362 raise
337 363
338 364 fmap = opts.get('filemap')
339 365 if fmap:
340 366 srcc = filemap.filemap_source(ui, srcc, fmap)
341 367 destc.setfilemapmode(True)
342 368
343 369 if not revmapfile:
344 370 try:
345 371 revmapfile = destc.revmapfile()
346 372 except:
347 373 revmapfile = os.path.join(destc, "map")
348 374
349 375 c = converter(ui, srcc, destc, revmapfile, opts)
350 376 c.convert()
351 377
General Comments 0
You need to be logged in to leave comments. Login now