##// END OF EJS Templates
convert: parse sort mode sooner
Patrick Mezard -
r8689:9bc95f8e default
parent child Browse files
Show More
@@ -1,377 +1,382 b''
1 # convcmd - convert extension commands definition
1 # convcmd - convert extension commands definition
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from common import NoRepo, MissingTool, SKIPREV, mapfile
8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 from cvs import convert_cvs
9 from cvs import convert_cvs
10 from darcs import darcs_source
10 from darcs import darcs_source
11 from git import convert_git
11 from git import convert_git
12 from hg import mercurial_source, mercurial_sink
12 from hg import mercurial_source, mercurial_sink
13 from subversion import svn_source, svn_sink
13 from subversion import svn_source, svn_sink
14 from monotone import monotone_source
14 from monotone import monotone_source
15 from gnuarch import gnuarch_source
15 from gnuarch import gnuarch_source
16 from bzr import bzr_source
16 from bzr import bzr_source
17 from p4 import p4_source
17 from p4 import p4_source
18 import filemap
18 import filemap
19
19
20 import os, shutil
20 import os, shutil
21 from mercurial import hg, util, encoding
21 from mercurial import hg, util, encoding
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24 orig_encoding = 'ascii'
24 orig_encoding = 'ascii'
25
25
26 def recode(s):
26 def recode(s):
27 if isinstance(s, unicode):
27 if isinstance(s, unicode):
28 return s.encode(orig_encoding, 'replace')
28 return s.encode(orig_encoding, 'replace')
29 else:
29 else:
30 return s.decode('utf-8').encode(orig_encoding, 'replace')
30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31
31
32 source_converters = [
32 source_converters = [
33 ('cvs', convert_cvs),
33 ('cvs', convert_cvs),
34 ('git', convert_git),
34 ('git', convert_git),
35 ('svn', svn_source),
35 ('svn', svn_source),
36 ('hg', mercurial_source),
36 ('hg', mercurial_source),
37 ('darcs', darcs_source),
37 ('darcs', darcs_source),
38 ('mtn', monotone_source),
38 ('mtn', monotone_source),
39 ('gnuarch', gnuarch_source),
39 ('gnuarch', gnuarch_source),
40 ('bzr', bzr_source),
40 ('bzr', bzr_source),
41 ('p4', p4_source),
41 ('p4', p4_source),
42 ]
42 ]
43
43
44 sink_converters = [
44 sink_converters = [
45 ('hg', mercurial_sink),
45 ('hg', mercurial_sink),
46 ('svn', svn_sink),
46 ('svn', svn_sink),
47 ]
47 ]
48
48
49 def convertsource(ui, path, type, rev):
49 def convertsource(ui, path, type, rev):
50 exceptions = []
50 exceptions = []
51 for name, source in source_converters:
51 for name, source in source_converters:
52 try:
52 try:
53 if not type or name == type:
53 if not type or name == type:
54 return source(ui, path, rev)
54 return source(ui, path, rev)
55 except (NoRepo, MissingTool), inst:
55 except (NoRepo, MissingTool), inst:
56 exceptions.append(inst)
56 exceptions.append(inst)
57 if not ui.quiet:
57 if not ui.quiet:
58 for inst in exceptions:
58 for inst in exceptions:
59 ui.write("%s\n" % inst)
59 ui.write("%s\n" % inst)
60 raise util.Abort(_('%s: missing or unsupported repository') % path)
60 raise util.Abort(_('%s: missing or unsupported repository') % path)
61
61
62 def convertsink(ui, path, type):
62 def convertsink(ui, path, type):
63 for name, sink in sink_converters:
63 for name, sink in sink_converters:
64 try:
64 try:
65 if not type or name == type:
65 if not type or name == type:
66 return sink(ui, path)
66 return sink(ui, path)
67 except NoRepo, inst:
67 except NoRepo, inst:
68 ui.note(_("convert: %s\n") % inst)
68 ui.note(_("convert: %s\n") % inst)
69 raise util.Abort(_('%s: unknown repository type') % path)
69 raise util.Abort(_('%s: unknown repository type') % path)
70
70
71 class converter(object):
71 class converter(object):
72 def __init__(self, ui, source, dest, revmapfile, opts):
72 def __init__(self, ui, source, dest, revmapfile, opts):
73
73
74 self.source = source
74 self.source = source
75 self.dest = dest
75 self.dest = dest
76 self.ui = ui
76 self.ui = ui
77 self.opts = opts
77 self.opts = opts
78 self.commitcache = {}
78 self.commitcache = {}
79 self.authors = {}
79 self.authors = {}
80 self.authorfile = None
80 self.authorfile = None
81
81
82 # Record converted revisions persistently: maps source revision
82 # Record converted revisions persistently: maps source revision
83 # ID to target revision ID (both strings). (This is how
83 # ID to target revision ID (both strings). (This is how
84 # incremental conversions work.)
84 # incremental conversions work.)
85 self.map = mapfile(ui, revmapfile)
85 self.map = mapfile(ui, revmapfile)
86
86
87 # Read first the dst author map if any
87 # Read first the dst author map if any
88 authorfile = self.dest.authorfile()
88 authorfile = self.dest.authorfile()
89 if authorfile and os.path.exists(authorfile):
89 if authorfile and os.path.exists(authorfile):
90 self.readauthormap(authorfile)
90 self.readauthormap(authorfile)
91 # Extend/Override with new author map if necessary
91 # Extend/Override with new author map if necessary
92 if opts.get('authors'):
92 if opts.get('authors'):
93 self.readauthormap(opts.get('authors'))
93 self.readauthormap(opts.get('authors'))
94 self.authorfile = self.dest.authorfile()
94 self.authorfile = self.dest.authorfile()
95
95
96 self.splicemap = mapfile(ui, opts.get('splicemap'))
96 self.splicemap = mapfile(ui, opts.get('splicemap'))
97 self.branchmap = mapfile(ui, opts.get('branchmap'))
97 self.branchmap = mapfile(ui, opts.get('branchmap'))
98
98
99 def walktree(self, heads):
99 def walktree(self, heads):
100 '''Return a mapping that identifies the uncommitted parents of every
100 '''Return a mapping that identifies the uncommitted parents of every
101 uncommitted changeset.'''
101 uncommitted changeset.'''
102 visit = heads
102 visit = heads
103 known = set()
103 known = set()
104 parents = {}
104 parents = {}
105 while visit:
105 while visit:
106 n = visit.pop(0)
106 n = visit.pop(0)
107 if n in known or n in self.map: continue
107 if n in known or n in self.map: continue
108 known.add(n)
108 known.add(n)
109 commit = self.cachecommit(n)
109 commit = self.cachecommit(n)
110 parents[n] = []
110 parents[n] = []
111 for p in commit.parents:
111 for p in commit.parents:
112 parents[n].append(p)
112 parents[n].append(p)
113 visit.append(p)
113 visit.append(p)
114
114
115 return parents
115 return parents
116
116
117 def toposort(self, parents):
117 def toposort(self, parents, sortmode):
118 '''Return an ordering such that every uncommitted changeset is
118 '''Return an ordering such that every uncommitted changeset is
119 preceeded by all its uncommitted ancestors.'''
119 preceeded by all its uncommitted ancestors.'''
120
120
121 def mapchildren(parents):
121 def mapchildren(parents):
122 """Return a (children, roots) tuple where 'children' maps parent
122 """Return a (children, roots) tuple where 'children' maps parent
123 revision identifiers to children ones, and 'roots' is the list of
123 revision identifiers to children ones, and 'roots' is the list of
124 revisions without parents. 'parents' must be a mapping of revision
124 revisions without parents. 'parents' must be a mapping of revision
125 identifier to its parents ones.
125 identifier to its parents ones.
126 """
126 """
127 visit = parents.keys()
127 visit = parents.keys()
128 seen = set()
128 seen = set()
129 children = {}
129 children = {}
130 roots = []
130 roots = []
131
131
132 while visit:
132 while visit:
133 n = visit.pop(0)
133 n = visit.pop(0)
134 if n in seen:
134 if n in seen:
135 continue
135 continue
136 seen.add(n)
136 seen.add(n)
137 # Ensure that nodes without parents are present in the
137 # Ensure that nodes without parents are present in the
138 # 'children' mapping.
138 # 'children' mapping.
139 children.setdefault(n, [])
139 children.setdefault(n, [])
140 hasparent = False
140 hasparent = False
141 for p in parents[n]:
141 for p in parents[n]:
142 if not p in self.map:
142 if not p in self.map:
143 visit.append(p)
143 visit.append(p)
144 hasparent = True
144 hasparent = True
145 children.setdefault(p, []).append(n)
145 children.setdefault(p, []).append(n)
146 if not hasparent:
146 if not hasparent:
147 roots.append(n)
147 roots.append(n)
148
148
149 return children, roots
149 return children, roots
150
150
151 # Sort functions are supposed to take a list of revisions which
151 # Sort functions are supposed to take a list of revisions which
152 # can be converted immediately and pick one
152 # can be converted immediately and pick one
153
153
154 def makebranchsorter():
154 def makebranchsorter():
155 """If the previously converted revision has a child in the
155 """If the previously converted revision has a child in the
156 eligible revisions list, pick it. Return the list head
156 eligible revisions list, pick it. Return the list head
157 otherwise. Branch sort attempts to minimize branch
157 otherwise. Branch sort attempts to minimize branch
158 switching, which is harmful for Mercurial backend
158 switching, which is harmful for Mercurial backend
159 compression.
159 compression.
160 """
160 """
161 prev = [None]
161 prev = [None]
162 def picknext(nodes):
162 def picknext(nodes):
163 next = nodes[0]
163 next = nodes[0]
164 for n in nodes:
164 for n in nodes:
165 if prev[0] in parents[n]:
165 if prev[0] in parents[n]:
166 next = n
166 next = n
167 break
167 break
168 prev[0] = next
168 prev[0] = next
169 return next
169 return next
170 return picknext
170 return picknext
171
171
172 def makedatesorter():
172 def makedatesorter():
173 """Sort revisions by date."""
173 """Sort revisions by date."""
174 dates = {}
174 dates = {}
175 def getdate(n):
175 def getdate(n):
176 if n not in dates:
176 if n not in dates:
177 dates[n] = util.parsedate(self.commitcache[n].date)
177 dates[n] = util.parsedate(self.commitcache[n].date)
178 return dates[n]
178 return dates[n]
179
179
180 def picknext(nodes):
180 def picknext(nodes):
181 return min([(getdate(n), n) for n in nodes])[1]
181 return min([(getdate(n), n) for n in nodes])[1]
182
182
183 return picknext
183 return picknext
184
184
185 if self.opts.get('datesort'):
185 if sortmode == 'branchsort':
186 picknext = makebranchsorter()
187 elif sortmode == 'datesort':
186 picknext = makedatesorter()
188 picknext = makedatesorter()
187 else:
189 else:
188 picknext = makebranchsorter()
190 raise util.Abort(_('unknown sort mode: %s') % sortmode)
189
191
190 children, actives = mapchildren(parents)
192 children, actives = mapchildren(parents)
191
193
192 s = []
194 s = []
193 pendings = {}
195 pendings = {}
194 while actives:
196 while actives:
195 n = picknext(actives)
197 n = picknext(actives)
196 actives.remove(n)
198 actives.remove(n)
197 s.append(n)
199 s.append(n)
198
200
199 # Update dependents list
201 # Update dependents list
200 for c in children.get(n, []):
202 for c in children.get(n, []):
201 if c not in pendings:
203 if c not in pendings:
202 pendings[c] = [p for p in parents[c] if p not in self.map]
204 pendings[c] = [p for p in parents[c] if p not in self.map]
203 try:
205 try:
204 pendings[c].remove(n)
206 pendings[c].remove(n)
205 except ValueError:
207 except ValueError:
206 raise util.Abort(_('cycle detected between %s and %s')
208 raise util.Abort(_('cycle detected between %s and %s')
207 % (recode(c), recode(n)))
209 % (recode(c), recode(n)))
208 if not pendings[c]:
210 if not pendings[c]:
209 # Parents are converted, node is eligible
211 # Parents are converted, node is eligible
210 actives.insert(0, c)
212 actives.insert(0, c)
211 pendings[c] = None
213 pendings[c] = None
212
214
213 if len(s) != len(parents):
215 if len(s) != len(parents):
214 raise util.Abort(_("not all revisions were sorted"))
216 raise util.Abort(_("not all revisions were sorted"))
215
217
216 return s
218 return s
217
219
218 def writeauthormap(self):
220 def writeauthormap(self):
219 authorfile = self.authorfile
221 authorfile = self.authorfile
220 if authorfile:
222 if authorfile:
221 self.ui.status(_('Writing author map file %s\n') % authorfile)
223 self.ui.status(_('Writing author map file %s\n') % authorfile)
222 ofile = open(authorfile, 'w+')
224 ofile = open(authorfile, 'w+')
223 for author in self.authors:
225 for author in self.authors:
224 ofile.write("%s=%s\n" % (author, self.authors[author]))
226 ofile.write("%s=%s\n" % (author, self.authors[author]))
225 ofile.close()
227 ofile.close()
226
228
227 def readauthormap(self, authorfile):
229 def readauthormap(self, authorfile):
228 afile = open(authorfile, 'r')
230 afile = open(authorfile, 'r')
229 for line in afile:
231 for line in afile:
230
232
231 line = line.strip()
233 line = line.strip()
232 if not line or line.startswith('#'):
234 if not line or line.startswith('#'):
233 continue
235 continue
234
236
235 try:
237 try:
236 srcauthor, dstauthor = line.split('=', 1)
238 srcauthor, dstauthor = line.split('=', 1)
237 except ValueError:
239 except ValueError:
238 msg = _('Ignoring bad line in author map file %s: %s\n')
240 msg = _('Ignoring bad line in author map file %s: %s\n')
239 self.ui.warn(msg % (authorfile, line.rstrip()))
241 self.ui.warn(msg % (authorfile, line.rstrip()))
240 continue
242 continue
241
243
242 srcauthor = srcauthor.strip()
244 srcauthor = srcauthor.strip()
243 dstauthor = dstauthor.strip()
245 dstauthor = dstauthor.strip()
244 if self.authors.get(srcauthor) in (None, dstauthor):
246 if self.authors.get(srcauthor) in (None, dstauthor):
245 msg = _('mapping author %s to %s\n')
247 msg = _('mapping author %s to %s\n')
246 self.ui.debug(msg % (srcauthor, dstauthor))
248 self.ui.debug(msg % (srcauthor, dstauthor))
247 self.authors[srcauthor] = dstauthor
249 self.authors[srcauthor] = dstauthor
248 continue
250 continue
249
251
250 m = _('overriding mapping for author %s, was %s, will be %s\n')
252 m = _('overriding mapping for author %s, was %s, will be %s\n')
251 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
253 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
252
254
253 afile.close()
255 afile.close()
254
256
255 def cachecommit(self, rev):
257 def cachecommit(self, rev):
256 commit = self.source.getcommit(rev)
258 commit = self.source.getcommit(rev)
257 commit.author = self.authors.get(commit.author, commit.author)
259 commit.author = self.authors.get(commit.author, commit.author)
258 commit.branch = self.branchmap.get(commit.branch, commit.branch)
260 commit.branch = self.branchmap.get(commit.branch, commit.branch)
259 self.commitcache[rev] = commit
261 self.commitcache[rev] = commit
260 return commit
262 return commit
261
263
262 def copy(self, rev):
264 def copy(self, rev):
263 commit = self.commitcache[rev]
265 commit = self.commitcache[rev]
264
266
265 changes = self.source.getchanges(rev)
267 changes = self.source.getchanges(rev)
266 if isinstance(changes, basestring):
268 if isinstance(changes, basestring):
267 if changes == SKIPREV:
269 if changes == SKIPREV:
268 dest = SKIPREV
270 dest = SKIPREV
269 else:
271 else:
270 dest = self.map[changes]
272 dest = self.map[changes]
271 self.map[rev] = dest
273 self.map[rev] = dest
272 return
274 return
273 files, copies = changes
275 files, copies = changes
274 pbranches = []
276 pbranches = []
275 if commit.parents:
277 if commit.parents:
276 for prev in commit.parents:
278 for prev in commit.parents:
277 if prev not in self.commitcache:
279 if prev not in self.commitcache:
278 self.cachecommit(prev)
280 self.cachecommit(prev)
279 pbranches.append((self.map[prev],
281 pbranches.append((self.map[prev],
280 self.commitcache[prev].branch))
282 self.commitcache[prev].branch))
281 self.dest.setbranch(commit.branch, pbranches)
283 self.dest.setbranch(commit.branch, pbranches)
282 try:
284 try:
283 parents = self.splicemap[rev].replace(',', ' ').split()
285 parents = self.splicemap[rev].replace(',', ' ').split()
284 self.ui.status(_('spliced in %s as parents of %s\n') %
286 self.ui.status(_('spliced in %s as parents of %s\n') %
285 (parents, rev))
287 (parents, rev))
286 parents = [self.map.get(p, p) for p in parents]
288 parents = [self.map.get(p, p) for p in parents]
287 except KeyError:
289 except KeyError:
288 parents = [b[0] for b in pbranches]
290 parents = [b[0] for b in pbranches]
289 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
291 newnode = self.dest.putcommit(files, copies, parents, commit, self.source)
290 self.source.converted(rev, newnode)
292 self.source.converted(rev, newnode)
291 self.map[rev] = newnode
293 self.map[rev] = newnode
292
294
293 def convert(self):
295 def convert(self, sortmode):
294
295 try:
296 try:
296 self.source.before()
297 self.source.before()
297 self.dest.before()
298 self.dest.before()
298 self.source.setrevmap(self.map)
299 self.source.setrevmap(self.map)
299 self.ui.status(_("scanning source...\n"))
300 self.ui.status(_("scanning source...\n"))
300 heads = self.source.getheads()
301 heads = self.source.getheads()
301 parents = self.walktree(heads)
302 parents = self.walktree(heads)
302 self.ui.status(_("sorting...\n"))
303 self.ui.status(_("sorting...\n"))
303 t = self.toposort(parents)
304 t = self.toposort(parents, sortmode)
304 num = len(t)
305 num = len(t)
305 c = None
306 c = None
306
307
307 self.ui.status(_("converting...\n"))
308 self.ui.status(_("converting...\n"))
308 for c in t:
309 for c in t:
309 num -= 1
310 num -= 1
310 desc = self.commitcache[c].desc
311 desc = self.commitcache[c].desc
311 if "\n" in desc:
312 if "\n" in desc:
312 desc = desc.splitlines()[0]
313 desc = desc.splitlines()[0]
313 # convert log message to local encoding without using
314 # convert log message to local encoding without using
314 # tolocal() because encoding.encoding conver() use it as
315 # tolocal() because encoding.encoding conver() use it as
315 # 'utf-8'
316 # 'utf-8'
316 self.ui.status("%d %s\n" % (num, recode(desc)))
317 self.ui.status("%d %s\n" % (num, recode(desc)))
317 self.ui.note(_("source: %s\n") % recode(c))
318 self.ui.note(_("source: %s\n") % recode(c))
318 self.copy(c)
319 self.copy(c)
319
320
320 tags = self.source.gettags()
321 tags = self.source.gettags()
321 ctags = {}
322 ctags = {}
322 for k in tags:
323 for k in tags:
323 v = tags[k]
324 v = tags[k]
324 if self.map.get(v, SKIPREV) != SKIPREV:
325 if self.map.get(v, SKIPREV) != SKIPREV:
325 ctags[k] = self.map[v]
326 ctags[k] = self.map[v]
326
327
327 if c and ctags:
328 if c and ctags:
328 nrev = self.dest.puttags(ctags)
329 nrev = self.dest.puttags(ctags)
329 # write another hash correspondence to override the previous
330 # write another hash correspondence to override the previous
330 # one so we don't end up with extra tag heads
331 # one so we don't end up with extra tag heads
331 if nrev:
332 if nrev:
332 self.map[c] = nrev
333 self.map[c] = nrev
333
334
334 self.writeauthormap()
335 self.writeauthormap()
335 finally:
336 finally:
336 self.cleanup()
337 self.cleanup()
337
338
338 def cleanup(self):
339 def cleanup(self):
339 try:
340 try:
340 self.dest.after()
341 self.dest.after()
341 finally:
342 finally:
342 self.source.after()
343 self.source.after()
343 self.map.close()
344 self.map.close()
344
345
345 def convert(ui, src, dest=None, revmapfile=None, **opts):
346 def convert(ui, src, dest=None, revmapfile=None, **opts):
346 global orig_encoding
347 global orig_encoding
347 orig_encoding = encoding.encoding
348 orig_encoding = encoding.encoding
348 encoding.encoding = 'UTF-8'
349 encoding.encoding = 'UTF-8'
349
350
350 if not dest:
351 if not dest:
351 dest = hg.defaultdest(src) + "-hg"
352 dest = hg.defaultdest(src) + "-hg"
352 ui.status(_("assuming destination %s\n") % dest)
353 ui.status(_("assuming destination %s\n") % dest)
353
354
354 destc = convertsink(ui, dest, opts.get('dest_type'))
355 destc = convertsink(ui, dest, opts.get('dest_type'))
355
356
356 try:
357 try:
357 srcc = convertsource(ui, src, opts.get('source_type'),
358 srcc = convertsource(ui, src, opts.get('source_type'),
358 opts.get('rev'))
359 opts.get('rev'))
359 except Exception:
360 except Exception:
360 for path in destc.created:
361 for path in destc.created:
361 shutil.rmtree(path, True)
362 shutil.rmtree(path, True)
362 raise
363 raise
363
364
365 sortmode = 'branchsort'
366 if opts.get('datesort'):
367 sortmode = 'datesort'
368
364 fmap = opts.get('filemap')
369 fmap = opts.get('filemap')
365 if fmap:
370 if fmap:
366 srcc = filemap.filemap_source(ui, srcc, fmap)
371 srcc = filemap.filemap_source(ui, srcc, fmap)
367 destc.setfilemapmode(True)
372 destc.setfilemapmode(True)
368
373
369 if not revmapfile:
374 if not revmapfile:
370 try:
375 try:
371 revmapfile = destc.revmapfile()
376 revmapfile = destc.revmapfile()
372 except:
377 except:
373 revmapfile = os.path.join(destc, "map")
378 revmapfile = os.path.join(destc, "map")
374
379
375 c = converter(ui, srcc, destc, revmapfile, opts)
380 c = converter(ui, srcc, destc, revmapfile, opts)
376 c.convert()
381 c.convert(sortmode)
377
382
General Comments 0
You need to be logged in to leave comments. Login now