##// END OF EJS Templates
convert: add support for deterministic progress bar on scanning phase...
Augie Fackler -
r22411:c497e39d default
parent child Browse files
Show More
@@ -1,452 +1,459
1 1 # common.py - common code for the convert extension
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import base64, errno, subprocess, os, datetime, re
9 9 import cPickle as pickle
10 10 from mercurial import util
11 11 from mercurial.i18n import _
12 12
13 13 propertycache = util.propertycache
14 14
15 15 def encodeargs(args):
16 16 def encodearg(s):
17 17 lines = base64.encodestring(s)
18 18 lines = [l.splitlines()[0] for l in lines]
19 19 return ''.join(lines)
20 20
21 21 s = pickle.dumps(args)
22 22 return encodearg(s)
23 23
24 24 def decodeargs(s):
25 25 s = base64.decodestring(s)
26 26 return pickle.loads(s)
27 27
28 28 class MissingTool(Exception):
29 29 pass
30 30
31 31 def checktool(exe, name=None, abort=True):
32 32 name = name or exe
33 33 if not util.findexe(exe):
34 34 exc = abort and util.Abort or MissingTool
35 35 raise exc(_('cannot find required "%s" tool') % name)
36 36
37 37 class NoRepo(Exception):
38 38 pass
39 39
40 40 SKIPREV = 'SKIP'
41 41
42 42 class commit(object):
43 43 def __init__(self, author, date, desc, parents, branch=None, rev=None,
44 44 extra={}, sortkey=None):
45 45 self.author = author or 'unknown'
46 46 self.date = date or '0 0'
47 47 self.desc = desc
48 48 self.parents = parents
49 49 self.branch = branch
50 50 self.rev = rev
51 51 self.extra = extra
52 52 self.sortkey = sortkey
53 53
54 54 class converter_source(object):
55 55 """Conversion source interface"""
56 56
57 57 def __init__(self, ui, path=None, rev=None):
58 58 """Initialize conversion source (or raise NoRepo("message")
59 59 exception if path is not a valid repository)"""
60 60 self.ui = ui
61 61 self.path = path
62 62 self.rev = rev
63 63
64 64 self.encoding = 'utf-8'
65 65
66 66 def checkhexformat(self, revstr, mapname='splicemap'):
67 67 """ fails if revstr is not a 40 byte hex. mercurial and git both uses
68 68 such format for their revision numbering
69 69 """
70 70 if not re.match(r'[0-9a-fA-F]{40,40}$', revstr):
71 71 raise util.Abort(_('%s entry %s is not a valid revision'
72 72 ' identifier') % (mapname, revstr))
73 73
74 74 def before(self):
75 75 pass
76 76
77 77 def after(self):
78 78 pass
79 79
80 80 def setrevmap(self, revmap):
81 81 """set the map of already-converted revisions"""
82 82 pass
83 83
84 84 def getheads(self):
85 85 """Return a list of this repository's heads"""
86 86 raise NotImplementedError
87 87
88 88 def getfile(self, name, rev):
89 89 """Return a pair (data, mode) where data is the file content
90 90 as a string and mode one of '', 'x' or 'l'. rev is the
91 91 identifier returned by a previous call to getchanges().
92 92 Data is None if file is missing/deleted in rev.
93 93 """
94 94 raise NotImplementedError
95 95
96 96 def getchanges(self, version, full):
97 97 """Returns a tuple of (files, copies).
98 98
99 99 files is a sorted list of (filename, id) tuples for all files
100 100 changed between version and its first parent returned by
101 101 getcommit(). If full, all files in that revision is returned.
102 102 id is the source revision id of the file.
103 103
104 104 copies is a dictionary of dest: source
105 105 """
106 106 raise NotImplementedError
107 107
108 108 def getcommit(self, version):
109 109 """Return the commit object for version"""
110 110 raise NotImplementedError
111 111
112 def numcommits(self):
113 """Return the number of commits in this source.
114
115 If unknown, return None.
116 """
117 return None
118
112 119 def gettags(self):
113 120 """Return the tags as a dictionary of name: revision
114 121
115 122 Tag names must be UTF-8 strings.
116 123 """
117 124 raise NotImplementedError
118 125
119 126 def recode(self, s, encoding=None):
120 127 if not encoding:
121 128 encoding = self.encoding or 'utf-8'
122 129
123 130 if isinstance(s, unicode):
124 131 return s.encode("utf-8")
125 132 try:
126 133 return s.decode(encoding).encode("utf-8")
127 134 except UnicodeError:
128 135 try:
129 136 return s.decode("latin-1").encode("utf-8")
130 137 except UnicodeError:
131 138 return s.decode(encoding, "replace").encode("utf-8")
132 139
133 140 def getchangedfiles(self, rev, i):
134 141 """Return the files changed by rev compared to parent[i].
135 142
136 143 i is an index selecting one of the parents of rev. The return
137 144 value should be the list of files that are different in rev and
138 145 this parent.
139 146
140 147 If rev has no parents, i is None.
141 148
142 149 This function is only needed to support --filemap
143 150 """
144 151 raise NotImplementedError
145 152
146 153 def converted(self, rev, sinkrev):
147 154 '''Notify the source that a revision has been converted.'''
148 155 pass
149 156
150 157 def hasnativeorder(self):
151 158 """Return true if this source has a meaningful, native revision
152 159 order. For instance, Mercurial revisions are store sequentially
153 160 while there is no such global ordering with Darcs.
154 161 """
155 162 return False
156 163
157 164 def hasnativeclose(self):
158 165 """Return true if this source has ability to close branch.
159 166 """
160 167 return False
161 168
162 169 def lookuprev(self, rev):
163 170 """If rev is a meaningful revision reference in source, return
164 171 the referenced identifier in the same format used by getcommit().
165 172 return None otherwise.
166 173 """
167 174 return None
168 175
169 176 def getbookmarks(self):
170 177 """Return the bookmarks as a dictionary of name: revision
171 178
172 179 Bookmark names are to be UTF-8 strings.
173 180 """
174 181 return {}
175 182
176 183 def checkrevformat(self, revstr, mapname='splicemap'):
177 184 """revstr is a string that describes a revision in the given
178 185 source control system. Return true if revstr has correct
179 186 format.
180 187 """
181 188 return True
182 189
183 190 class converter_sink(object):
184 191 """Conversion sink (target) interface"""
185 192
186 193 def __init__(self, ui, path):
187 194 """Initialize conversion sink (or raise NoRepo("message")
188 195 exception if path is not a valid repository)
189 196
190 197 created is a list of paths to remove if a fatal error occurs
191 198 later"""
192 199 self.ui = ui
193 200 self.path = path
194 201 self.created = []
195 202
196 203 def revmapfile(self):
197 204 """Path to a file that will contain lines
198 205 source_rev_id sink_rev_id
199 206 mapping equivalent revision identifiers for each system."""
200 207 raise NotImplementedError
201 208
202 209 def authorfile(self):
203 210 """Path to a file that will contain lines
204 211 srcauthor=dstauthor
205 212 mapping equivalent authors identifiers for each system."""
206 213 return None
207 214
208 215 def putcommit(self, files, copies, parents, commit, source, revmap, full):
209 216 """Create a revision with all changed files listed in 'files'
210 217 and having listed parents. 'commit' is a commit object
211 218 containing at a minimum the author, date, and message for this
212 219 changeset. 'files' is a list of (path, version) tuples,
213 220 'copies' is a dictionary mapping destinations to sources,
214 221 'source' is the source repository, and 'revmap' is a mapfile
215 222 of source revisions to converted revisions. Only getfile() and
216 223 lookuprev() should be called on 'source'. 'full' means that 'files'
217 224 is complete and all other files should be removed.
218 225
219 226 Note that the sink repository is not told to update itself to
220 227 a particular revision (or even what that revision would be)
221 228 before it receives the file data.
222 229 """
223 230 raise NotImplementedError
224 231
225 232 def puttags(self, tags):
226 233 """Put tags into sink.
227 234
228 235 tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
229 236 Return a pair (tag_revision, tag_parent_revision), or (None, None)
230 237 if nothing was changed.
231 238 """
232 239 raise NotImplementedError
233 240
234 241 def setbranch(self, branch, pbranches):
235 242 """Set the current branch name. Called before the first putcommit
236 243 on the branch.
237 244 branch: branch name for subsequent commits
238 245 pbranches: (converted parent revision, parent branch) tuples"""
239 246 pass
240 247
241 248 def setfilemapmode(self, active):
242 249 """Tell the destination that we're using a filemap
243 250
244 251 Some converter_sources (svn in particular) can claim that a file
245 252 was changed in a revision, even if there was no change. This method
246 253 tells the destination that we're using a filemap and that it should
247 254 filter empty revisions.
248 255 """
249 256 pass
250 257
251 258 def before(self):
252 259 pass
253 260
254 261 def after(self):
255 262 pass
256 263
257 264 def putbookmarks(self, bookmarks):
258 265 """Put bookmarks into sink.
259 266
260 267 bookmarks: {bookmarkname: sink_rev_id, ...}
261 268 where bookmarkname is an UTF-8 string.
262 269 """
263 270 pass
264 271
265 272 def hascommitfrommap(self, rev):
266 273 """Return False if a rev mentioned in a filemap is known to not be
267 274 present."""
268 275 raise NotImplementedError
269 276
270 277 def hascommitforsplicemap(self, rev):
271 278 """This method is for the special needs for splicemap handling and not
272 279 for general use. Returns True if the sink contains rev, aborts on some
273 280 special cases."""
274 281 raise NotImplementedError
275 282
276 283 class commandline(object):
277 284 def __init__(self, ui, command):
278 285 self.ui = ui
279 286 self.command = command
280 287
281 288 def prerun(self):
282 289 pass
283 290
284 291 def postrun(self):
285 292 pass
286 293
287 294 def _cmdline(self, cmd, *args, **kwargs):
288 295 cmdline = [self.command, cmd] + list(args)
289 296 for k, v in kwargs.iteritems():
290 297 if len(k) == 1:
291 298 cmdline.append('-' + k)
292 299 else:
293 300 cmdline.append('--' + k.replace('_', '-'))
294 301 try:
295 302 if len(k) == 1:
296 303 cmdline.append('' + v)
297 304 else:
298 305 cmdline[-1] += '=' + v
299 306 except TypeError:
300 307 pass
301 308 cmdline = [util.shellquote(arg) for arg in cmdline]
302 309 if not self.ui.debugflag:
303 310 cmdline += ['2>', os.devnull]
304 311 cmdline = ' '.join(cmdline)
305 312 return cmdline
306 313
307 314 def _run(self, cmd, *args, **kwargs):
308 315 def popen(cmdline):
309 316 p = subprocess.Popen(cmdline, shell=True, bufsize=-1,
310 317 close_fds=util.closefds,
311 318 stdout=subprocess.PIPE)
312 319 return p
313 320 return self._dorun(popen, cmd, *args, **kwargs)
314 321
315 322 def _run2(self, cmd, *args, **kwargs):
316 323 return self._dorun(util.popen2, cmd, *args, **kwargs)
317 324
318 325 def _dorun(self, openfunc, cmd, *args, **kwargs):
319 326 cmdline = self._cmdline(cmd, *args, **kwargs)
320 327 self.ui.debug('running: %s\n' % (cmdline,))
321 328 self.prerun()
322 329 try:
323 330 return openfunc(cmdline)
324 331 finally:
325 332 self.postrun()
326 333
327 334 def run(self, cmd, *args, **kwargs):
328 335 p = self._run(cmd, *args, **kwargs)
329 336 output = p.communicate()[0]
330 337 self.ui.debug(output)
331 338 return output, p.returncode
332 339
333 340 def runlines(self, cmd, *args, **kwargs):
334 341 p = self._run(cmd, *args, **kwargs)
335 342 output = p.stdout.readlines()
336 343 p.wait()
337 344 self.ui.debug(''.join(output))
338 345 return output, p.returncode
339 346
340 347 def checkexit(self, status, output=''):
341 348 if status:
342 349 if output:
343 350 self.ui.warn(_('%s error:\n') % self.command)
344 351 self.ui.warn(output)
345 352 msg = util.explainexit(status)[0]
346 353 raise util.Abort('%s %s' % (self.command, msg))
347 354
348 355 def run0(self, cmd, *args, **kwargs):
349 356 output, status = self.run(cmd, *args, **kwargs)
350 357 self.checkexit(status, output)
351 358 return output
352 359
353 360 def runlines0(self, cmd, *args, **kwargs):
354 361 output, status = self.runlines(cmd, *args, **kwargs)
355 362 self.checkexit(status, ''.join(output))
356 363 return output
357 364
358 365 @propertycache
359 366 def argmax(self):
360 367 # POSIX requires at least 4096 bytes for ARG_MAX
361 368 argmax = 4096
362 369 try:
363 370 argmax = os.sysconf("SC_ARG_MAX")
364 371 except (AttributeError, ValueError):
365 372 pass
366 373
367 374 # Windows shells impose their own limits on command line length,
368 375 # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
369 376 # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
370 377 # details about cmd.exe limitations.
371 378
372 379 # Since ARG_MAX is for command line _and_ environment, lower our limit
373 380 # (and make happy Windows shells while doing this).
374 381 return argmax // 2 - 1
375 382
376 383 def _limit_arglist(self, arglist, cmd, *args, **kwargs):
377 384 cmdlen = len(self._cmdline(cmd, *args, **kwargs))
378 385 limit = self.argmax - cmdlen
379 386 bytes = 0
380 387 fl = []
381 388 for fn in arglist:
382 389 b = len(fn) + 3
383 390 if bytes + b < limit or len(fl) == 0:
384 391 fl.append(fn)
385 392 bytes += b
386 393 else:
387 394 yield fl
388 395 fl = [fn]
389 396 bytes = b
390 397 if fl:
391 398 yield fl
392 399
393 400 def xargs(self, arglist, cmd, *args, **kwargs):
394 401 for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
395 402 self.run0(cmd, *(list(args) + l), **kwargs)
396 403
397 404 class mapfile(dict):
398 405 def __init__(self, ui, path):
399 406 super(mapfile, self).__init__()
400 407 self.ui = ui
401 408 self.path = path
402 409 self.fp = None
403 410 self.order = []
404 411 self._read()
405 412
406 413 def _read(self):
407 414 if not self.path:
408 415 return
409 416 try:
410 417 fp = open(self.path, 'r')
411 418 except IOError, err:
412 419 if err.errno != errno.ENOENT:
413 420 raise
414 421 return
415 422 for i, line in enumerate(fp):
416 423 line = line.splitlines()[0].rstrip()
417 424 if not line:
418 425 # Ignore blank lines
419 426 continue
420 427 try:
421 428 key, value = line.rsplit(' ', 1)
422 429 except ValueError:
423 430 raise util.Abort(
424 431 _('syntax error in %s(%d): key/value pair expected')
425 432 % (self.path, i + 1))
426 433 if key not in self:
427 434 self.order.append(key)
428 435 super(mapfile, self).__setitem__(key, value)
429 436 fp.close()
430 437
431 438 def __setitem__(self, key, value):
432 439 if self.fp is None:
433 440 try:
434 441 self.fp = open(self.path, 'a')
435 442 except IOError, err:
436 443 raise util.Abort(_('could not open map file %r: %s') %
437 444 (self.path, err.strerror))
438 445 self.fp.write('%s %s\n' % (key, value))
439 446 self.fp.flush()
440 447 super(mapfile, self).__setitem__(key, value)
441 448
442 449 def close(self):
443 450 if self.fp:
444 451 self.fp.close()
445 452 self.fp = None
446 453
447 454 def makedatetimestamp(t):
448 455 """Like util.makedate() but for time t instead of current time"""
449 456 delta = (datetime.datetime.utcfromtimestamp(t) -
450 457 datetime.datetime.fromtimestamp(t))
451 458 tz = delta.days * 86400 + delta.seconds
452 459 return t, tz
@@ -1,532 +1,534
1 1 # convcmd - convert extension commands definition
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from common import NoRepo, MissingTool, SKIPREV, mapfile
9 9 from cvs import convert_cvs
10 10 from darcs import darcs_source
11 11 from git import convert_git
12 12 from hg import mercurial_source, mercurial_sink
13 13 from subversion import svn_source, svn_sink
14 14 from monotone import monotone_source
15 15 from gnuarch import gnuarch_source
16 16 from bzr import bzr_source
17 17 from p4 import p4_source
18 18 import filemap
19 19
20 20 import os, shutil, shlex
21 21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
25 25
26 26 def recode(s):
27 27 if isinstance(s, unicode):
28 28 return s.encode(orig_encoding, 'replace')
29 29 else:
30 30 return s.decode('utf-8').encode(orig_encoding, 'replace')
31 31
32 32 source_converters = [
33 33 ('cvs', convert_cvs, 'branchsort'),
34 34 ('git', convert_git, 'branchsort'),
35 35 ('svn', svn_source, 'branchsort'),
36 36 ('hg', mercurial_source, 'sourcesort'),
37 37 ('darcs', darcs_source, 'branchsort'),
38 38 ('mtn', monotone_source, 'branchsort'),
39 39 ('gnuarch', gnuarch_source, 'branchsort'),
40 40 ('bzr', bzr_source, 'branchsort'),
41 41 ('p4', p4_source, 'branchsort'),
42 42 ]
43 43
44 44 sink_converters = [
45 45 ('hg', mercurial_sink),
46 46 ('svn', svn_sink),
47 47 ]
48 48
49 49 def convertsource(ui, path, type, rev):
50 50 exceptions = []
51 51 if type and type not in [s[0] for s in source_converters]:
52 52 raise util.Abort(_('%s: invalid source repository type') % type)
53 53 for name, source, sortmode in source_converters:
54 54 try:
55 55 if not type or name == type:
56 56 return source(ui, path, rev), sortmode
57 57 except (NoRepo, MissingTool), inst:
58 58 exceptions.append(inst)
59 59 if not ui.quiet:
60 60 for inst in exceptions:
61 61 ui.write("%s\n" % inst)
62 62 raise util.Abort(_('%s: missing or unsupported repository') % path)
63 63
64 64 def convertsink(ui, path, type):
65 65 if type and type not in [s[0] for s in sink_converters]:
66 66 raise util.Abort(_('%s: invalid destination repository type') % type)
67 67 for name, sink in sink_converters:
68 68 try:
69 69 if not type or name == type:
70 70 return sink(ui, path)
71 71 except NoRepo, inst:
72 72 ui.note(_("convert: %s\n") % inst)
73 73 except MissingTool, inst:
74 74 raise util.Abort('%s\n' % inst)
75 75 raise util.Abort(_('%s: unknown repository type') % path)
76 76
77 77 class progresssource(object):
78 78 def __init__(self, ui, source, filecount):
79 79 self.ui = ui
80 80 self.source = source
81 81 self.filecount = filecount
82 82 self.retrieved = 0
83 83
84 84 def getfile(self, file, rev):
85 85 self.retrieved += 1
86 86 self.ui.progress(_('getting files'), self.retrieved,
87 87 item=file, total=self.filecount)
88 88 return self.source.getfile(file, rev)
89 89
90 90 def lookuprev(self, rev):
91 91 return self.source.lookuprev(rev)
92 92
93 93 def close(self):
94 94 self.ui.progress(_('getting files'), None)
95 95
96 96 class converter(object):
97 97 def __init__(self, ui, source, dest, revmapfile, opts):
98 98
99 99 self.source = source
100 100 self.dest = dest
101 101 self.ui = ui
102 102 self.opts = opts
103 103 self.commitcache = {}
104 104 self.authors = {}
105 105 self.authorfile = None
106 106
107 107 # Record converted revisions persistently: maps source revision
108 108 # ID to target revision ID (both strings). (This is how
109 109 # incremental conversions work.)
110 110 self.map = mapfile(ui, revmapfile)
111 111
112 112 # Read first the dst author map if any
113 113 authorfile = self.dest.authorfile()
114 114 if authorfile and os.path.exists(authorfile):
115 115 self.readauthormap(authorfile)
116 116 # Extend/Override with new author map if necessary
117 117 if opts.get('authormap'):
118 118 self.readauthormap(opts.get('authormap'))
119 119 self.authorfile = self.dest.authorfile()
120 120
121 121 self.splicemap = self.parsesplicemap(opts.get('splicemap'))
122 122 self.branchmap = mapfile(ui, opts.get('branchmap'))
123 123
124 124 def parsesplicemap(self, path):
125 125 """ check and validate the splicemap format and
126 126 return a child/parents dictionary.
127 127 Format checking has two parts.
128 128 1. generic format which is same across all source types
129 129 2. specific format checking which may be different for
130 130 different source type. This logic is implemented in
131 131 checkrevformat function in source files like
132 132 hg.py, subversion.py etc.
133 133 """
134 134
135 135 if not path:
136 136 return {}
137 137 m = {}
138 138 try:
139 139 fp = open(path, 'r')
140 140 for i, line in enumerate(fp):
141 141 line = line.splitlines()[0].rstrip()
142 142 if not line:
143 143 # Ignore blank lines
144 144 continue
145 145 # split line
146 146 lex = shlex.shlex(line, posix=True)
147 147 lex.whitespace_split = True
148 148 lex.whitespace += ','
149 149 line = list(lex)
150 150 # check number of parents
151 151 if not (2 <= len(line) <= 3):
152 152 raise util.Abort(_('syntax error in %s(%d): child parent1'
153 153 '[,parent2] expected') % (path, i + 1))
154 154 for part in line:
155 155 self.source.checkrevformat(part)
156 156 child, p1, p2 = line[0], line[1:2], line[2:]
157 157 if p1 == p2:
158 158 m[child] = p1
159 159 else:
160 160 m[child] = p1 + p2
161 161 # if file does not exist or error reading, exit
162 162 except IOError:
163 163 raise util.Abort(_('splicemap file not found or error reading %s:')
164 164 % path)
165 165 return m
166 166
167 167
168 168 def walktree(self, heads):
169 169 '''Return a mapping that identifies the uncommitted parents of every
170 170 uncommitted changeset.'''
171 171 visit = heads
172 172 known = set()
173 173 parents = {}
174 numcommits = self.source.numcommits()
174 175 while visit:
175 176 n = visit.pop(0)
176 177 if n in known:
177 178 continue
178 179 if n in self.map:
179 180 m = self.map[n]
180 181 if m == SKIPREV or self.dest.hascommitfrommap(m):
181 182 continue
182 183 known.add(n)
183 self.ui.progress(_('scanning'), len(known), unit=_('revisions'))
184 self.ui.progress(_('scanning'), len(known), unit=_('revisions'),
185 total=numcommits)
184 186 commit = self.cachecommit(n)
185 187 parents[n] = []
186 188 for p in commit.parents:
187 189 parents[n].append(p)
188 190 visit.append(p)
189 191 self.ui.progress(_('scanning'), None)
190 192
191 193 return parents
192 194
193 195 def mergesplicemap(self, parents, splicemap):
194 196 """A splicemap redefines child/parent relationships. Check the
195 197 map contains valid revision identifiers and merge the new
196 198 links in the source graph.
197 199 """
198 200 for c in sorted(splicemap):
199 201 if c not in parents:
200 202 if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
201 203 # Could be in source but not converted during this run
202 204 self.ui.warn(_('splice map revision %s is not being '
203 205 'converted, ignoring\n') % c)
204 206 continue
205 207 pc = []
206 208 for p in splicemap[c]:
207 209 # We do not have to wait for nodes already in dest.
208 210 if self.dest.hascommitforsplicemap(self.map.get(p, p)):
209 211 continue
210 212 # Parent is not in dest and not being converted, not good
211 213 if p not in parents:
212 214 raise util.Abort(_('unknown splice map parent: %s') % p)
213 215 pc.append(p)
214 216 parents[c] = pc
215 217
216 218 def toposort(self, parents, sortmode):
217 219 '''Return an ordering such that every uncommitted changeset is
218 220 preceded by all its uncommitted ancestors.'''
219 221
220 222 def mapchildren(parents):
221 223 """Return a (children, roots) tuple where 'children' maps parent
222 224 revision identifiers to children ones, and 'roots' is the list of
223 225 revisions without parents. 'parents' must be a mapping of revision
224 226 identifier to its parents ones.
225 227 """
226 228 visit = sorted(parents)
227 229 seen = set()
228 230 children = {}
229 231 roots = []
230 232
231 233 while visit:
232 234 n = visit.pop(0)
233 235 if n in seen:
234 236 continue
235 237 seen.add(n)
236 238 # Ensure that nodes without parents are present in the
237 239 # 'children' mapping.
238 240 children.setdefault(n, [])
239 241 hasparent = False
240 242 for p in parents[n]:
241 243 if p not in self.map:
242 244 visit.append(p)
243 245 hasparent = True
244 246 children.setdefault(p, []).append(n)
245 247 if not hasparent:
246 248 roots.append(n)
247 249
248 250 return children, roots
249 251
250 252 # Sort functions are supposed to take a list of revisions which
251 253 # can be converted immediately and pick one
252 254
253 255 def makebranchsorter():
254 256 """If the previously converted revision has a child in the
255 257 eligible revisions list, pick it. Return the list head
256 258 otherwise. Branch sort attempts to minimize branch
257 259 switching, which is harmful for Mercurial backend
258 260 compression.
259 261 """
260 262 prev = [None]
261 263 def picknext(nodes):
262 264 next = nodes[0]
263 265 for n in nodes:
264 266 if prev[0] in parents[n]:
265 267 next = n
266 268 break
267 269 prev[0] = next
268 270 return next
269 271 return picknext
270 272
271 273 def makesourcesorter():
272 274 """Source specific sort."""
273 275 keyfn = lambda n: self.commitcache[n].sortkey
274 276 def picknext(nodes):
275 277 return sorted(nodes, key=keyfn)[0]
276 278 return picknext
277 279
278 280 def makeclosesorter():
279 281 """Close order sort."""
280 282 keyfn = lambda n: ('close' not in self.commitcache[n].extra,
281 283 self.commitcache[n].sortkey)
282 284 def picknext(nodes):
283 285 return sorted(nodes, key=keyfn)[0]
284 286 return picknext
285 287
286 288 def makedatesorter():
287 289 """Sort revisions by date."""
288 290 dates = {}
289 291 def getdate(n):
290 292 if n not in dates:
291 293 dates[n] = util.parsedate(self.commitcache[n].date)
292 294 return dates[n]
293 295
294 296 def picknext(nodes):
295 297 return min([(getdate(n), n) for n in nodes])[1]
296 298
297 299 return picknext
298 300
299 301 if sortmode == 'branchsort':
300 302 picknext = makebranchsorter()
301 303 elif sortmode == 'datesort':
302 304 picknext = makedatesorter()
303 305 elif sortmode == 'sourcesort':
304 306 picknext = makesourcesorter()
305 307 elif sortmode == 'closesort':
306 308 picknext = makeclosesorter()
307 309 else:
308 310 raise util.Abort(_('unknown sort mode: %s') % sortmode)
309 311
310 312 children, actives = mapchildren(parents)
311 313
312 314 s = []
313 315 pendings = {}
314 316 while actives:
315 317 n = picknext(actives)
316 318 actives.remove(n)
317 319 s.append(n)
318 320
319 321 # Update dependents list
320 322 for c in children.get(n, []):
321 323 if c not in pendings:
322 324 pendings[c] = [p for p in parents[c] if p not in self.map]
323 325 try:
324 326 pendings[c].remove(n)
325 327 except ValueError:
326 328 raise util.Abort(_('cycle detected between %s and %s')
327 329 % (recode(c), recode(n)))
328 330 if not pendings[c]:
329 331 # Parents are converted, node is eligible
330 332 actives.insert(0, c)
331 333 pendings[c] = None
332 334
333 335 if len(s) != len(parents):
334 336 raise util.Abort(_("not all revisions were sorted"))
335 337
336 338 return s
337 339
338 340 def writeauthormap(self):
339 341 authorfile = self.authorfile
340 342 if authorfile:
341 343 self.ui.status(_('writing author map file %s\n') % authorfile)
342 344 ofile = open(authorfile, 'w+')
343 345 for author in self.authors:
344 346 ofile.write("%s=%s\n" % (author, self.authors[author]))
345 347 ofile.close()
346 348
347 349 def readauthormap(self, authorfile):
348 350 afile = open(authorfile, 'r')
349 351 for line in afile:
350 352
351 353 line = line.strip()
352 354 if not line or line.startswith('#'):
353 355 continue
354 356
355 357 try:
356 358 srcauthor, dstauthor = line.split('=', 1)
357 359 except ValueError:
358 360 msg = _('ignoring bad line in author map file %s: %s\n')
359 361 self.ui.warn(msg % (authorfile, line.rstrip()))
360 362 continue
361 363
362 364 srcauthor = srcauthor.strip()
363 365 dstauthor = dstauthor.strip()
364 366 if self.authors.get(srcauthor) in (None, dstauthor):
365 367 msg = _('mapping author %s to %s\n')
366 368 self.ui.debug(msg % (srcauthor, dstauthor))
367 369 self.authors[srcauthor] = dstauthor
368 370 continue
369 371
370 372 m = _('overriding mapping for author %s, was %s, will be %s\n')
371 373 self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
372 374
373 375 afile.close()
374 376
375 377 def cachecommit(self, rev):
376 378 commit = self.source.getcommit(rev)
377 379 commit.author = self.authors.get(commit.author, commit.author)
378 380 # If commit.branch is None, this commit is coming from the source
379 381 # repository's default branch and destined for the default branch in the
380 382 # destination repository. For such commits, passing a literal "None"
381 383 # string to branchmap.get() below allows the user to map "None" to an
382 384 # alternate default branch in the destination repository.
383 385 commit.branch = self.branchmap.get(str(commit.branch), commit.branch)
384 386 self.commitcache[rev] = commit
385 387 return commit
386 388
387 389 def copy(self, rev):
388 390 commit = self.commitcache[rev]
389 391 full = self.opts.get('full')
390 392 changes = self.source.getchanges(rev, full)
391 393 if isinstance(changes, basestring):
392 394 if changes == SKIPREV:
393 395 dest = SKIPREV
394 396 else:
395 397 dest = self.map[changes]
396 398 self.map[rev] = dest
397 399 return
398 400 files, copies = changes
399 401 pbranches = []
400 402 if commit.parents:
401 403 for prev in commit.parents:
402 404 if prev not in self.commitcache:
403 405 self.cachecommit(prev)
404 406 pbranches.append((self.map[prev],
405 407 self.commitcache[prev].branch))
406 408 self.dest.setbranch(commit.branch, pbranches)
407 409 try:
408 410 parents = self.splicemap[rev]
409 411 self.ui.status(_('spliced in %s as parents of %s\n') %
410 412 (parents, rev))
411 413 parents = [self.map.get(p, p) for p in parents]
412 414 except KeyError:
413 415 parents = [b[0] for b in pbranches]
414 416 source = progresssource(self.ui, self.source, len(files))
415 417 newnode = self.dest.putcommit(files, copies, parents, commit,
416 418 source, self.map, full)
417 419 source.close()
418 420 self.source.converted(rev, newnode)
419 421 self.map[rev] = newnode
420 422
421 423 def convert(self, sortmode):
422 424 try:
423 425 self.source.before()
424 426 self.dest.before()
425 427 self.source.setrevmap(self.map)
426 428 self.ui.status(_("scanning source...\n"))
427 429 heads = self.source.getheads()
428 430 parents = self.walktree(heads)
429 431 self.mergesplicemap(parents, self.splicemap)
430 432 self.ui.status(_("sorting...\n"))
431 433 t = self.toposort(parents, sortmode)
432 434 num = len(t)
433 435 c = None
434 436
435 437 self.ui.status(_("converting...\n"))
436 438 for i, c in enumerate(t):
437 439 num -= 1
438 440 desc = self.commitcache[c].desc
439 441 if "\n" in desc:
440 442 desc = desc.splitlines()[0]
441 443 # convert log message to local encoding without using
442 444 # tolocal() because the encoding.encoding convert()
443 445 # uses is 'utf-8'
444 446 self.ui.status("%d %s\n" % (num, recode(desc)))
445 447 self.ui.note(_("source: %s\n") % recode(c))
446 448 self.ui.progress(_('converting'), i, unit=_('revisions'),
447 449 total=len(t))
448 450 self.copy(c)
449 451 self.ui.progress(_('converting'), None)
450 452
451 453 tags = self.source.gettags()
452 454 ctags = {}
453 455 for k in tags:
454 456 v = tags[k]
455 457 if self.map.get(v, SKIPREV) != SKIPREV:
456 458 ctags[k] = self.map[v]
457 459
458 460 if c and ctags:
459 461 nrev, tagsparent = self.dest.puttags(ctags)
460 462 if nrev and tagsparent:
461 463 # write another hash correspondence to override the previous
462 464 # one so we don't end up with extra tag heads
463 465 tagsparents = [e for e in self.map.iteritems()
464 466 if e[1] == tagsparent]
465 467 if tagsparents:
466 468 self.map[tagsparents[0][0]] = nrev
467 469
468 470 bookmarks = self.source.getbookmarks()
469 471 cbookmarks = {}
470 472 for k in bookmarks:
471 473 v = bookmarks[k]
472 474 if self.map.get(v, SKIPREV) != SKIPREV:
473 475 cbookmarks[k] = self.map[v]
474 476
475 477 if c and cbookmarks:
476 478 self.dest.putbookmarks(cbookmarks)
477 479
478 480 self.writeauthormap()
479 481 finally:
480 482 self.cleanup()
481 483
482 484 def cleanup(self):
483 485 try:
484 486 self.dest.after()
485 487 finally:
486 488 self.source.after()
487 489 self.map.close()
488 490
489 491 def convert(ui, src, dest=None, revmapfile=None, **opts):
490 492 global orig_encoding
491 493 orig_encoding = encoding.encoding
492 494 encoding.encoding = 'UTF-8'
493 495
494 496 # support --authors as an alias for --authormap
495 497 if not opts.get('authormap'):
496 498 opts['authormap'] = opts.get('authors')
497 499
498 500 if not dest:
499 501 dest = hg.defaultdest(src) + "-hg"
500 502 ui.status(_("assuming destination %s\n") % dest)
501 503
502 504 destc = convertsink(ui, dest, opts.get('dest_type'))
503 505
504 506 try:
505 507 srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
506 508 opts.get('rev'))
507 509 except Exception:
508 510 for path in destc.created:
509 511 shutil.rmtree(path, True)
510 512 raise
511 513
512 514 sortmodes = ('branchsort', 'datesort', 'sourcesort', 'closesort')
513 515 sortmode = [m for m in sortmodes if opts.get(m)]
514 516 if len(sortmode) > 1:
515 517 raise util.Abort(_('more than one sort mode specified'))
516 518 sortmode = sortmode and sortmode[0] or defaultsort
517 519 if sortmode == 'sourcesort' and not srcc.hasnativeorder():
518 520 raise util.Abort(_('--sourcesort is not supported by this data source'))
519 521 if sortmode == 'closesort' and not srcc.hasnativeclose():
520 522 raise util.Abort(_('--closesort is not supported by this data source'))
521 523
522 524 fmap = opts.get('filemap')
523 525 if fmap:
524 526 srcc = filemap.filemap_source(ui, srcc, fmap)
525 527 destc.setfilemapmode(True)
526 528
527 529 if not revmapfile:
528 530 revmapfile = destc.revmapfile()
529 531
530 532 c = converter(ui, srcc, destc, revmapfile, opts)
531 533 c.convert(sortmode)
532 534
General Comments 0
You need to be logged in to leave comments. Login now