##// END OF EJS Templates
convert: split converter into convertsource and convertsink
Brendan Cully -
r4763:8e9d3fae default
parent child Browse files
Show More
@@ -1,334 +1,341
1 1 # convert.py Foreign SCM converter
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 from common import NoRepo
8 from common import NoRepo, converter_source, converter_sink
9 9 from cvs import convert_cvs
10 10 from git import convert_git
11 11 from hg import convert_mercurial
12 12
13 13 import os, shutil
14 14 from mercurial import hg, ui, util, commands
15 15
16 16 commands.norepo += " convert"
17 17
18 18 converters = [convert_cvs, convert_git, convert_mercurial]
19 19
20 def converter(ui, path, rev=None):
20 def convertsource(ui, path, rev=None):
21 for c in converters:
22 try:
23 converter = c(ui, path, rev=rev)
24 if not isinstance(converter, converter_source):
25 raise util.Abort('%s: cannot read from this repository type' % path)
26 return converter
27 except NoRepo:
28 pass
29 raise util.Abort('%s: unknown repository type' % path)
30
31 def convertsink(ui, path):
21 32 if not os.path.isdir(path):
22 33 raise util.Abort("%s: not a directory" % path)
23 34 for c in converters:
24 35 try:
25 if rev:
26 return c(ui, path, rev=rev)
27 else:
28 return c(ui, path)
36 converter = c(ui, path)
37 if not isinstance(converter, converter_sink):
38 raise util.Abort('%s: cannot write to this repository type' % path)
39 return converter
29 40 except NoRepo:
30 41 pass
31 raise util.Abort("%s: unknown repository type" % path)
42 raise util.Abort('%s: unknown repository type' % path)
32 43
33 44 class convert(object):
34 45 def __init__(self, ui, source, dest, mapfile, opts):
35 46
36 47 self.source = source
37 48 self.dest = dest
38 49 self.ui = ui
39 50 self.opts = opts
40 51 self.commitcache = {}
41 52 self.mapfile = mapfile
42 53 self.mapfilefd = None
43 54 self.authors = {}
44 55 self.authorfile = None
45 56
46 57 self.map = {}
47 58 try:
48 59 origmapfile = open(self.mapfile, 'r')
49 60 for l in origmapfile:
50 61 sv, dv = l[:-1].split()
51 62 self.map[sv] = dv
52 63 origmapfile.close()
53 64 except IOError:
54 65 pass
55 66
56 67 # Read first the dst author map if any
57 68 authorfile = self.dest.authorfile()
58 69 if authorfile and os.path.exists(authorfile):
59 70 self.readauthormap(authorfile)
60 71 # Extend/Override with new author map if necessary
61 72 if opts.get('authors'):
62 73 self.readauthormap(opts.get('authors'))
63 74 self.authorfile = self.dest.authorfile()
64 75
65 76 def walktree(self, heads):
66 77 '''Return a mapping that identifies the uncommitted parents of every
67 78 uncommitted changeset.'''
68 79 visit = heads
69 80 known = {}
70 81 parents = {}
71 82 while visit:
72 83 n = visit.pop(0)
73 84 if n in known or n in self.map: continue
74 85 known[n] = 1
75 86 self.commitcache[n] = self.source.getcommit(n)
76 87 cp = self.commitcache[n].parents
77 88 parents[n] = []
78 89 for p in cp:
79 90 parents[n].append(p)
80 91 visit.append(p)
81 92
82 93 return parents
83 94
84 95 def toposort(self, parents):
85 96 '''Return an ordering such that every uncommitted changeset is
86 97 preceeded by all its uncommitted ancestors.'''
87 98 visit = parents.keys()
88 99 seen = {}
89 100 children = {}
90 101
91 102 while visit:
92 103 n = visit.pop(0)
93 104 if n in seen: continue
94 105 seen[n] = 1
95 106 # Ensure that nodes without parents are present in the 'children'
96 107 # mapping.
97 108 children.setdefault(n, [])
98 109 for p in parents[n]:
99 110 if not p in self.map:
100 111 visit.append(p)
101 112 children.setdefault(p, []).append(n)
102 113
103 114 s = []
104 115 removed = {}
105 116 visit = children.keys()
106 117 while visit:
107 118 n = visit.pop(0)
108 119 if n in removed: continue
109 120 dep = 0
110 121 if n in parents:
111 122 for p in parents[n]:
112 123 if p in self.map: continue
113 124 if p not in removed:
114 125 # we're still dependent
115 126 visit.append(n)
116 127 dep = 1
117 128 break
118 129
119 130 if not dep:
120 131 # all n's parents are in the list
121 132 removed[n] = 1
122 133 if n not in self.map:
123 134 s.append(n)
124 135 if n in children:
125 136 for c in children[n]:
126 137 visit.insert(0, c)
127 138
128 139 if self.opts.get('datesort'):
129 140 depth = {}
130 141 for n in s:
131 142 depth[n] = 0
132 143 pl = [p for p in self.commitcache[n].parents
133 144 if p not in self.map]
134 145 if pl:
135 146 depth[n] = max([depth[p] for p in pl]) + 1
136 147
137 148 s = [(depth[n], self.commitcache[n].date, n) for n in s]
138 149 s.sort()
139 150 s = [e[2] for e in s]
140 151
141 152 return s
142 153
143 154 def mapentry(self, src, dst):
144 155 if self.mapfilefd is None:
145 156 try:
146 157 self.mapfilefd = open(self.mapfile, "a")
147 158 except IOError, (errno, strerror):
148 159 raise util.Abort("Could not open map file %s: %s, %s\n" % (self.mapfile, errno, strerror))
149 160 self.map[src] = dst
150 161 self.mapfilefd.write("%s %s\n" % (src, dst))
151 162 self.mapfilefd.flush()
152 163
153 164 def writeauthormap(self):
154 165 authorfile = self.authorfile
155 166 if authorfile:
156 167 self.ui.status('Writing author map file %s\n' % authorfile)
157 168 ofile = open(authorfile, 'w+')
158 169 for author in self.authors:
159 170 ofile.write("%s=%s\n" % (author, self.authors[author]))
160 171 ofile.close()
161 172
162 173 def readauthormap(self, authorfile):
163 174 afile = open(authorfile, 'r')
164 175 for line in afile:
165 176 try:
166 177 srcauthor = line.split('=')[0].strip()
167 178 dstauthor = line.split('=')[1].strip()
168 179 if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
169 180 self.ui.status(
170 181 'Overriding mapping for author %s, was %s, will be %s\n'
171 182 % (srcauthor, self.authors[srcauthor], dstauthor))
172 183 else:
173 184 self.ui.debug('Mapping author %s to %s\n'
174 185 % (srcauthor, dstauthor))
175 186 self.authors[srcauthor] = dstauthor
176 187 except IndexError:
177 188 self.ui.warn(
178 189 'Ignoring bad line in author file map %s: %s\n'
179 190 % (authorfile, line))
180 191 afile.close()
181 192
182 193 def copy(self, rev):
183 194 c = self.commitcache[rev]
184 195 files = self.source.getchanges(rev)
185 196
186 197 for f, v in files:
187 198 try:
188 199 data = self.source.getfile(f, v)
189 200 except IOError, inst:
190 201 self.dest.delfile(f)
191 202 else:
192 203 e = self.source.getmode(f, v)
193 204 self.dest.putfile(f, e, data)
194 205
195 206 r = [self.map[v] for v in c.parents]
196 207 f = [f for f, v in files]
197 208 newnode = self.dest.putcommit(f, r, c)
198 209 self.mapentry(rev, newnode)
199 210
200 211 def convert(self):
201 212 try:
202 213 self.ui.status("scanning source...\n")
203 214 heads = self.source.getheads()
204 215 parents = self.walktree(heads)
205 216 self.ui.status("sorting...\n")
206 217 t = self.toposort(parents)
207 218 num = len(t)
208 219 c = None
209 220
210 221 self.ui.status("converting...\n")
211 222 for c in t:
212 223 num -= 1
213 224 desc = self.commitcache[c].desc
214 225 if "\n" in desc:
215 226 desc = desc.splitlines()[0]
216 227 author = self.commitcache[c].author
217 228 author = self.authors.get(author, author)
218 229 self.commitcache[c].author = author
219 230 self.ui.status("%d %s\n" % (num, desc))
220 231 self.copy(c)
221 232
222 233 tags = self.source.gettags()
223 234 ctags = {}
224 235 for k in tags:
225 236 v = tags[k]
226 237 if v in self.map:
227 238 ctags[k] = self.map[v]
228 239
229 240 if c and ctags:
230 241 nrev = self.dest.puttags(ctags)
231 242 # write another hash correspondence to override the previous
232 243 # one so we don't end up with extra tag heads
233 244 if nrev:
234 245 self.mapentry(c, nrev)
235 246
236 247 self.writeauthormap()
237 248 finally:
238 249 self.cleanup()
239 250
240 251 def cleanup(self):
241 252 if self.mapfilefd:
242 253 self.mapfilefd.close()
243 254
244 255 def _convert(ui, src, dest=None, mapfile=None, **opts):
245 256 '''Convert a foreign SCM repository to a Mercurial one.
246 257
247 258 Accepted source formats:
248 259 - GIT
249 260 - CVS
250 261
251 262 Accepted destination formats:
252 263 - Mercurial
253 264
254 265 If no revision is given, all revisions will be converted. Otherwise,
255 266 convert will only import up to the named revision (given in a format
256 267 understood by the source).
257 268
258 269 If destination isn't given, a new Mercurial repo named <src>-hg will
259 270 be created. If <mapfile> isn't given, it will be put in a default
260 271 location (<dest>/.hg/shamap by default)
261 272
262 273 The <mapfile> is a simple text file that maps each source commit ID to
263 274 the destination ID for that revision, like so:
264 275 <source ID> <destination ID>
265 276
266 277 If the file doesn't exist, it's automatically created. It's updated
267 278 on each commit copied, so convert-repo can be interrupted and can
268 279 be run repeatedly to copy new commits.
269 280
270 281 The [username mapping] file is a simple text file that maps each source
271 282 commit author to a destination commit author. It is handy for source SCMs
272 283 that use unix logins to identify authors (eg: CVS). One line per author
273 284 mapping and the line format is:
274 285 srcauthor=whatever string you want
275 286 '''
276 287
277 288 if not dest:
278 289 dest = src + "-hg"
279 290 ui.status("assuming destination %s\n" % dest)
280 291
281 292 # Try to be smart and initalize things when required
282 293 created = False
283 294 if os.path.isdir(dest):
284 295 if len(os.listdir(dest)) > 0:
285 296 try:
286 297 hg.repository(ui, dest)
287 298 ui.status("destination %s is a Mercurial repository\n" % dest)
288 299 except hg.RepoError:
289 300 raise util.Abort(
290 301 "destination directory %s is not empty.\n"
291 302 "Please specify an empty directory to be initialized\n"
292 303 "or an already initialized mercurial repository"
293 304 % dest)
294 305 else:
295 306 ui.status("initializing destination %s repository\n" % dest)
296 307 hg.repository(ui, dest, create=True)
297 308 created = True
298 309 elif os.path.exists(dest):
299 310 raise util.Abort("destination %s exists and is not a directory" % dest)
300 311 else:
301 312 ui.status("initializing destination %s repository\n" % dest)
302 313 hg.repository(ui, dest, create=True)
303 314 created = True
304 315
305 destc = converter(ui, dest)
306 if not hasattr(destc, "putcommit"):
307 raise util.Abort("%s: can't write to this repo type" % src)
316 destc = convertsink(ui, dest)
308 317
309 318 try:
310 srcc = converter(ui, src, rev=opts.get('rev'))
311 if not hasattr(srcc, "getcommit"):
312 raise util.Abort("%s: can't read from this repo type" % src)
319 srcc = convertsource(ui, src, rev=opts.get('rev'))
313 320 except Exception:
314 321 if created:
315 322 shutil.rmtree(dest, True)
316 323 raise
317 324
318 325 if not mapfile:
319 326 try:
320 327 mapfile = destc.mapfile()
321 328 except:
322 329 mapfile = os.path.join(destc, "map")
323 330
324 331 c = convert(ui, srcc, destc, mapfile, opts)
325 332 c.convert()
326 333
327 334 cmdtable = {
328 335 "convert":
329 336 (_convert,
330 337 [('A', 'authors', '', 'username mapping filename'),
331 338 ('r', 'rev', '', 'import up to target revision REV'),
332 339 ('', 'datesort', None, 'try to sort changesets by date')],
333 340 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
334 341 }
@@ -1,94 +1,94
1 1 # hg backend for convert extension
2 2
3 3 import os, time
4 4 from mercurial import hg
5 5
6 6 from common import NoRepo, converter_sink
7 7
8 8 class convert_mercurial(converter_sink):
9 def __init__(self, ui, path, rev=None):
9 def __init__(self, ui, path):
10 10 self.path = path
11 11 self.ui = ui
12 12 try:
13 13 self.repo = hg.repository(self.ui, path)
14 14 except:
15 15 raise NoRepo("could open hg repo %s" % path)
16 16
17 17 def mapfile(self):
18 18 return os.path.join(self.path, ".hg", "shamap")
19 19
20 20 def authorfile(self):
21 21 return os.path.join(self.path, ".hg", "authormap")
22 22
23 23 def getheads(self):
24 24 h = self.repo.changelog.heads()
25 25 return [ hg.hex(x) for x in h ]
26 26
27 27 def putfile(self, f, e, data):
28 28 self.repo.wwrite(f, data, e)
29 29 if self.repo.dirstate.state(f) == '?':
30 30 self.repo.dirstate.update([f], "a")
31 31
32 32 def delfile(self, f):
33 33 try:
34 34 os.unlink(self.repo.wjoin(f))
35 35 #self.repo.remove([f])
36 36 except:
37 37 pass
38 38
39 39 def putcommit(self, files, parents, commit):
40 40 seen = {}
41 41 pl = []
42 42 for p in parents:
43 43 if p not in seen:
44 44 pl.append(p)
45 45 seen[p] = 1
46 46 parents = pl
47 47
48 48 if len(parents) < 2: parents.append("0" * 40)
49 49 if len(parents) < 2: parents.append("0" * 40)
50 50 p2 = parents.pop(0)
51 51
52 52 text = commit.desc
53 53 extra = {}
54 54 try:
55 55 extra["branch"] = commit.branch
56 56 except AttributeError:
57 57 pass
58 58
59 59 while parents:
60 60 p1 = p2
61 61 p2 = parents.pop(0)
62 62 a = self.repo.rawcommit(files, text, commit.author, commit.date,
63 63 hg.bin(p1), hg.bin(p2), extra=extra)
64 64 text = "(octopus merge fixup)\n"
65 65 p2 = hg.hex(self.repo.changelog.tip())
66 66
67 67 return p2
68 68
69 69 def puttags(self, tags):
70 70 try:
71 71 old = self.repo.wfile(".hgtags").read()
72 72 oldlines = old.splitlines(1)
73 73 oldlines.sort()
74 74 except:
75 75 oldlines = []
76 76
77 77 k = tags.keys()
78 78 k.sort()
79 79 newlines = []
80 80 for tag in k:
81 81 newlines.append("%s %s\n" % (tags[tag], tag))
82 82
83 83 newlines.sort()
84 84
85 85 if newlines != oldlines:
86 86 self.ui.status("updating tags\n")
87 87 f = self.repo.wfile(".hgtags", "w")
88 88 f.write("".join(newlines))
89 89 f.close()
90 90 if not oldlines: self.repo.add([".hgtags"])
91 91 date = "%s 0" % int(time.mktime(time.gmtime()))
92 92 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
93 93 date, self.repo.changelog.tip(), hg.nullid)
94 94 return hg.hex(self.repo.changelog.tip())
General Comments 0
You need to be logged in to leave comments. Login now