##// END OF EJS Templates
convert: look up copies in getchanges instead of getcommit...
Brendan Cully -
r5121:ef338e34 default
parent child Browse files
Show More
@@ -1,441 +1,442
1 1 # convert.py Foreign SCM converter
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from common import NoRepo, converter_source, converter_sink
9 9 from cvs import convert_cvs
10 10 from git import convert_git
11 11 from hg import mercurial_source, mercurial_sink
12 12 from subversion import convert_svn
13 13
14 14 import os, shlex, shutil
15 15 from mercurial import hg, ui, util, commands
16 16 from mercurial.i18n import _
17 17
18 18 commands.norepo += " convert"
19 19
20 20 converters = [convert_cvs, convert_git, convert_svn, mercurial_source,
21 21 mercurial_sink]
22 22
23 23 def convertsource(ui, path, **opts):
24 24 for c in converters:
25 25 try:
26 26 return c.getcommit and c(ui, path, **opts)
27 27 except (AttributeError, NoRepo):
28 28 pass
29 29 raise util.Abort('%s: unknown repository type' % path)
30 30
31 31 def convertsink(ui, path):
32 32 if not os.path.isdir(path):
33 33 raise util.Abort("%s: not a directory" % path)
34 34 for c in converters:
35 35 try:
36 36 return c.putcommit and c(ui, path)
37 37 except (AttributeError, NoRepo):
38 38 pass
39 39 raise util.Abort('%s: unknown repository type' % path)
40 40
41 41 class convert(object):
42 42 def __init__(self, ui, source, dest, revmapfile, filemapper, opts):
43 43
44 44 self.source = source
45 45 self.dest = dest
46 46 self.ui = ui
47 47 self.opts = opts
48 48 self.commitcache = {}
49 49 self.revmapfile = revmapfile
50 50 self.revmapfilefd = None
51 51 self.authors = {}
52 52 self.authorfile = None
53 53 self.mapfile = filemapper
54 54
55 55 self.map = {}
56 56 try:
57 57 origrevmapfile = open(self.revmapfile, 'r')
58 58 for l in origrevmapfile:
59 59 sv, dv = l[:-1].split()
60 60 self.map[sv] = dv
61 61 origrevmapfile.close()
62 62 except IOError:
63 63 pass
64 64
65 65 # Read first the dst author map if any
66 66 authorfile = self.dest.authorfile()
67 67 if authorfile and os.path.exists(authorfile):
68 68 self.readauthormap(authorfile)
69 69 # Extend/Override with new author map if necessary
70 70 if opts.get('authors'):
71 71 self.readauthormap(opts.get('authors'))
72 72 self.authorfile = self.dest.authorfile()
73 73
74 74 def walktree(self, heads):
75 75 '''Return a mapping that identifies the uncommitted parents of every
76 76 uncommitted changeset.'''
77 77 visit = heads
78 78 known = {}
79 79 parents = {}
80 80 while visit:
81 81 n = visit.pop(0)
82 82 if n in known or n in self.map: continue
83 83 known[n] = 1
84 84 self.commitcache[n] = self.source.getcommit(n)
85 85 cp = self.commitcache[n].parents
86 86 parents[n] = []
87 87 for p in cp:
88 88 parents[n].append(p)
89 89 visit.append(p)
90 90
91 91 return parents
92 92
93 93 def toposort(self, parents):
94 94 '''Return an ordering such that every uncommitted changeset is
95 95 preceeded by all its uncommitted ancestors.'''
96 96 visit = parents.keys()
97 97 seen = {}
98 98 children = {}
99 99
100 100 while visit:
101 101 n = visit.pop(0)
102 102 if n in seen: continue
103 103 seen[n] = 1
104 104 # Ensure that nodes without parents are present in the 'children'
105 105 # mapping.
106 106 children.setdefault(n, [])
107 107 for p in parents[n]:
108 108 if not p in self.map:
109 109 visit.append(p)
110 110 children.setdefault(p, []).append(n)
111 111
112 112 s = []
113 113 removed = {}
114 114 visit = children.keys()
115 115 while visit:
116 116 n = visit.pop(0)
117 117 if n in removed: continue
118 118 dep = 0
119 119 if n in parents:
120 120 for p in parents[n]:
121 121 if p in self.map: continue
122 122 if p not in removed:
123 123 # we're still dependent
124 124 visit.append(n)
125 125 dep = 1
126 126 break
127 127
128 128 if not dep:
129 129 # all n's parents are in the list
130 130 removed[n] = 1
131 131 if n not in self.map:
132 132 s.append(n)
133 133 if n in children:
134 134 for c in children[n]:
135 135 visit.insert(0, c)
136 136
137 137 if self.opts.get('datesort'):
138 138 depth = {}
139 139 for n in s:
140 140 depth[n] = 0
141 141 pl = [p for p in self.commitcache[n].parents
142 142 if p not in self.map]
143 143 if pl:
144 144 depth[n] = max([depth[p] for p in pl]) + 1
145 145
146 146 s = [(depth[n], self.commitcache[n].date, n) for n in s]
147 147 s.sort()
148 148 s = [e[2] for e in s]
149 149
150 150 return s
151 151
152 152 def mapentry(self, src, dst):
153 153 if self.revmapfilefd is None:
154 154 try:
155 155 self.revmapfilefd = open(self.revmapfile, "a")
156 156 except IOError, (errno, strerror):
157 157 raise util.Abort("Could not open map file %s: %s, %s\n" % (self.revmapfile, errno, strerror))
158 158 self.map[src] = dst
159 159 self.revmapfilefd.write("%s %s\n" % (src, dst))
160 160 self.revmapfilefd.flush()
161 161
162 162 def writeauthormap(self):
163 163 authorfile = self.authorfile
164 164 if authorfile:
165 165 self.ui.status('Writing author map file %s\n' % authorfile)
166 166 ofile = open(authorfile, 'w+')
167 167 for author in self.authors:
168 168 ofile.write("%s=%s\n" % (author, self.authors[author]))
169 169 ofile.close()
170 170
171 171 def readauthormap(self, authorfile):
172 172 afile = open(authorfile, 'r')
173 173 for line in afile:
174 174 try:
175 175 srcauthor = line.split('=')[0].strip()
176 176 dstauthor = line.split('=')[1].strip()
177 177 if srcauthor in self.authors and dstauthor != self.authors[srcauthor]:
178 178 self.ui.status(
179 179 'Overriding mapping for author %s, was %s, will be %s\n'
180 180 % (srcauthor, self.authors[srcauthor], dstauthor))
181 181 else:
182 182 self.ui.debug('Mapping author %s to %s\n'
183 183 % (srcauthor, dstauthor))
184 184 self.authors[srcauthor] = dstauthor
185 185 except IndexError:
186 186 self.ui.warn(
187 187 'Ignoring bad line in author file map %s: %s\n'
188 188 % (authorfile, line))
189 189 afile.close()
190 190
191 191 def copy(self, rev):
192 192 commit = self.commitcache[rev]
193 193 do_copies = hasattr(self.dest, 'copyfile')
194 194 filenames = []
195 195
196 for f, v in self.source.getchanges(rev):
196 files, copies = self.source.getchanges(rev)
197 for f, v in files:
197 198 newf = self.mapfile(f)
198 199 if not newf:
199 200 continue
200 201 filenames.append(newf)
201 202 try:
202 203 data = self.source.getfile(f, v)
203 204 except IOError, inst:
204 205 self.dest.delfile(newf)
205 206 else:
206 207 e = self.source.getmode(f, v)
207 208 self.dest.putfile(newf, e, data)
208 209 if do_copies:
209 if f in commit.copies:
210 copyf = self.mapfile(commit.copies[f])
210 if f in copies:
211 copyf = self.mapfile(copies[f])
211 212 if copyf:
212 213 # Merely marks that a copy happened.
213 214 self.dest.copyfile(copyf, newf)
214 215
215 216 parents = [self.map[r] for r in commit.parents]
216 217 newnode = self.dest.putcommit(filenames, parents, commit)
217 218 self.mapentry(rev, newnode)
218 219
219 220 def convert(self):
220 221 try:
221 222 self.dest.before()
222 223 self.source.setrevmap(self.map)
223 224 self.ui.status("scanning source...\n")
224 225 heads = self.source.getheads()
225 226 parents = self.walktree(heads)
226 227 self.ui.status("sorting...\n")
227 228 t = self.toposort(parents)
228 229 num = len(t)
229 230 c = None
230 231
231 232 self.ui.status("converting...\n")
232 233 for c in t:
233 234 num -= 1
234 235 desc = self.commitcache[c].desc
235 236 if "\n" in desc:
236 237 desc = desc.splitlines()[0]
237 238 author = self.commitcache[c].author
238 239 author = self.authors.get(author, author)
239 240 self.commitcache[c].author = author
240 241 self.ui.status("%d %s\n" % (num, desc))
241 242 self.copy(c)
242 243
243 244 tags = self.source.gettags()
244 245 ctags = {}
245 246 for k in tags:
246 247 v = tags[k]
247 248 if v in self.map:
248 249 ctags[k] = self.map[v]
249 250
250 251 if c and ctags:
251 252 nrev = self.dest.puttags(ctags)
252 253 # write another hash correspondence to override the previous
253 254 # one so we don't end up with extra tag heads
254 255 if nrev:
255 256 self.mapentry(c, nrev)
256 257
257 258 self.writeauthormap()
258 259 finally:
259 260 self.cleanup()
260 261
261 262 def cleanup(self):
262 263 self.dest.after()
263 264 if self.revmapfilefd:
264 265 self.revmapfilefd.close()
265 266
266 267 def rpairs(name):
267 268 e = len(name)
268 269 while e != -1:
269 270 yield name[:e], name[e+1:]
270 271 e = name.rfind('/', 0, e)
271 272
272 273 class filemapper(object):
273 274 '''Map and filter filenames when importing.
274 275 A name can be mapped to itself, a new name, or None (omit from new
275 276 repository).'''
276 277
277 278 def __init__(self, ui, path=None):
278 279 self.ui = ui
279 280 self.include = {}
280 281 self.exclude = {}
281 282 self.rename = {}
282 283 if path:
283 284 if self.parse(path):
284 285 raise util.Abort(_('errors in filemap'))
285 286
286 287 def parse(self, path):
287 288 errs = 0
288 289 def check(name, mapping, listname):
289 290 if name in mapping:
290 291 self.ui.warn(_('%s:%d: %r already in %s list\n') %
291 292 (lex.infile, lex.lineno, name, listname))
292 293 return 1
293 294 return 0
294 295 lex = shlex.shlex(open(path), path, True)
295 296 lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
296 297 cmd = lex.get_token()
297 298 while cmd:
298 299 if cmd == 'include':
299 300 name = lex.get_token()
300 301 errs += check(name, self.exclude, 'exclude')
301 302 self.include[name] = name
302 303 elif cmd == 'exclude':
303 304 name = lex.get_token()
304 305 errs += check(name, self.include, 'include')
305 306 errs += check(name, self.rename, 'rename')
306 307 self.exclude[name] = name
307 308 elif cmd == 'rename':
308 309 src = lex.get_token()
309 310 dest = lex.get_token()
310 311 errs += check(src, self.exclude, 'exclude')
311 312 self.rename[src] = dest
312 313 elif cmd == 'source':
313 314 errs += self.parse(lex.get_token())
314 315 else:
315 316 self.ui.warn(_('%s:%d: unknown directive %r\n') %
316 317 (lex.infile, lex.lineno, cmd))
317 318 errs += 1
318 319 cmd = lex.get_token()
319 320 return errs
320 321
321 322 def lookup(self, name, mapping):
322 323 for pre, suf in rpairs(name):
323 324 try:
324 325 return mapping[pre], pre, suf
325 326 except KeyError, err:
326 327 pass
327 328 return '', name, ''
328 329
329 330 def __call__(self, name):
330 331 if self.include:
331 332 inc = self.lookup(name, self.include)[0]
332 333 else:
333 334 inc = name
334 335 if self.exclude:
335 336 exc = self.lookup(name, self.exclude)[0]
336 337 else:
337 338 exc = ''
338 339 if not inc or exc:
339 340 return None
340 341 newpre, pre, suf = self.lookup(name, self.rename)
341 342 if newpre:
342 343 if suf:
343 344 return newpre + '/' + suf
344 345 return newpre
345 346 return name
346 347
347 348 def _convert(ui, src, dest=None, revmapfile=None, **opts):
348 349 """Convert a foreign SCM repository to a Mercurial one.
349 350
350 351 Accepted source formats:
351 352 - GIT
352 353 - CVS
353 354 - SVN
354 355
355 356 Accepted destination formats:
356 357 - Mercurial
357 358
358 359 If no revision is given, all revisions will be converted. Otherwise,
359 360 convert will only import up to the named revision (given in a format
360 361 understood by the source).
361 362
362 363 If no destination directory name is specified, it defaults to the
363 364 basename of the source with '-hg' appended. If the destination
364 365 repository doesn't exist, it will be created.
365 366
366 367 If <revmapfile> isn't given, it will be put in a default location
367 368 (<dest>/.hg/shamap by default). The <revmapfile> is a simple text
368 369 file that maps each source commit ID to the destination ID for
369 370 that revision, like so:
370 371 <source ID> <destination ID>
371 372
372 373 If the file doesn't exist, it's automatically created. It's updated
373 374 on each commit copied, so convert-repo can be interrupted and can
374 375 be run repeatedly to copy new commits.
375 376
376 377 The [username mapping] file is a simple text file that maps each source
377 378 commit author to a destination commit author. It is handy for source SCMs
378 379 that use unix logins to identify authors (eg: CVS). One line per author
379 380 mapping and the line format is:
380 381 srcauthor=whatever string you want
381 382 """
382 383
383 384 util._encoding = 'UTF-8'
384 385
385 386 if not dest:
386 387 dest = hg.defaultdest(src) + "-hg"
387 388 ui.status("assuming destination %s\n" % dest)
388 389
389 390 # Try to be smart and initalize things when required
390 391 created = False
391 392 if os.path.isdir(dest):
392 393 if len(os.listdir(dest)) > 0:
393 394 try:
394 395 hg.repository(ui, dest)
395 396 ui.status("destination %s is a Mercurial repository\n" % dest)
396 397 except hg.RepoError:
397 398 raise util.Abort(
398 399 "destination directory %s is not empty.\n"
399 400 "Please specify an empty directory to be initialized\n"
400 401 "or an already initialized mercurial repository"
401 402 % dest)
402 403 else:
403 404 ui.status("initializing destination %s repository\n" % dest)
404 405 hg.repository(ui, dest, create=True)
405 406 created = True
406 407 elif os.path.exists(dest):
407 408 raise util.Abort("destination %s exists and is not a directory" % dest)
408 409 else:
409 410 ui.status("initializing destination %s repository\n" % dest)
410 411 hg.repository(ui, dest, create=True)
411 412 created = True
412 413
413 414 destc = convertsink(ui, dest)
414 415
415 416 try:
416 417 srcc = convertsource(ui, src, rev=opts.get('rev'))
417 418 except Exception:
418 419 if created:
419 420 shutil.rmtree(dest, True)
420 421 raise
421 422
422 423 if not revmapfile:
423 424 try:
424 425 revmapfile = destc.revmapfile()
425 426 except:
426 427 revmapfile = os.path.join(destc, "map")
427 428
428 429
429 430 c = convert(ui, srcc, destc, revmapfile, filemapper(ui, opts['filemap']),
430 431 opts)
431 432 c.convert()
432 433
433 434 cmdtable = {
434 435 "convert":
435 436 (_convert,
436 437 [('A', 'authors', '', 'username mapping filename'),
437 438 ('', 'filemap', '', 'remap file names using contents of file'),
438 439 ('r', 'rev', '', 'import up to target revision REV'),
439 440 ('', 'datesort', None, 'try to sort changesets by date')],
440 441 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'),
441 442 }
@@ -1,120 +1,120
1 1 # common code for the convert extension
2 2
3 3 class NoRepo(Exception): pass
4 4
5 5 class commit(object):
6 def __init__(self, author, date, desc, parents, branch=None, rev=None,
7 copies={}):
6 def __init__(self, author, date, desc, parents, branch=None, rev=None):
8 7 self.author = author
9 8 self.date = date
10 9 self.desc = desc
11 10 self.parents = parents
12 11 self.branch = branch
13 12 self.rev = rev
14 self.copies = copies
15 13
16 14 class converter_source(object):
17 15 """Conversion source interface"""
18 16
19 17 def __init__(self, ui, path, rev=None):
20 18 """Initialize conversion source (or raise NoRepo("message")
21 19 exception if path is not a valid repository)"""
22 20 self.ui = ui
23 21 self.path = path
24 22 self.rev = rev
25 23
26 24 self.encoding = 'utf-8'
27 25
28 26 def setrevmap(self, revmap):
29 27 """set the map of already-converted revisions"""
30 28 pass
31 29
32 30 def getheads(self):
33 31 """Return a list of this repository's heads"""
34 32 raise NotImplementedError()
35 33
36 34 def getfile(self, name, rev):
37 35 """Return file contents as a string"""
38 36 raise NotImplementedError()
39 37
40 38 def getmode(self, name, rev):
41 39 """Return file mode, eg. '', 'x', or 'l'"""
42 40 raise NotImplementedError()
43 41
44 42 def getchanges(self, version):
45 """Return sorted list of (filename, id) tuples for all files changed in rev.
43 """Returns a tuple of (files, copies)
44 Files is a sorted list of (filename, id) tuples for all files changed
45 in version, where id is the source revision id of the file.
46 46
47 id just tells us which revision to return in getfile(), e.g. in
48 git it's an object hash."""
47 copies is a dictionary of dest: source
48 """
49 49 raise NotImplementedError()
50 50
51 51 def getcommit(self, version):
52 52 """Return the commit object for version"""
53 53 raise NotImplementedError()
54 54
55 55 def gettags(self):
56 56 """Return the tags as a dictionary of name: revision"""
57 57 raise NotImplementedError()
58 58
59 59 def recode(self, s, encoding=None):
60 60 if not encoding:
61 61 encoding = self.encoding or 'utf-8'
62 62
63 63 try:
64 64 return s.decode(encoding).encode("utf-8")
65 65 except:
66 66 try:
67 67 return s.decode("latin-1").encode("utf-8")
68 68 except:
69 69 return s.decode(encoding, "replace").encode("utf-8")
70 70
71 71 class converter_sink(object):
72 72 """Conversion sink (target) interface"""
73 73
74 74 def __init__(self, ui, path):
75 75 """Initialize conversion sink (or raise NoRepo("message")
76 76 exception if path is not a valid repository)"""
77 77 raise NotImplementedError()
78 78
79 79 def getheads(self):
80 80 """Return a list of this repository's heads"""
81 81 raise NotImplementedError()
82 82
83 83 def revmapfile(self):
84 84 """Path to a file that will contain lines
85 85 source_rev_id sink_rev_id
86 86 mapping equivalent revision identifiers for each system."""
87 87 raise NotImplementedError()
88 88
89 89 def authorfile(self):
90 90 """Path to a file that will contain lines
91 91 srcauthor=dstauthor
92 92 mapping equivalent authors identifiers for each system."""
93 93 return None
94 94
95 95 def putfile(self, f, e, data):
96 96 """Put file for next putcommit().
97 97 f: path to file
98 98 e: '', 'x', or 'l' (regular file, executable, or symlink)
99 99 data: file contents"""
100 100 raise NotImplementedError()
101 101
102 102 def delfile(self, f):
103 103 """Delete file for next putcommit().
104 104 f: path to file"""
105 105 raise NotImplementedError()
106 106
107 107 def putcommit(self, files, parents, commit):
108 108 """Create a revision with all changed files listed in 'files'
109 109 and having listed parents. 'commit' is a commit object containing
110 110 at a minimum the author, date, and message for this changeset.
111 111 Called after putfile() and delfile() calls. Note that the sink
112 112 repository is not told to update itself to a particular revision
113 113 (or even what that revision would be) before it receives the
114 114 file data."""
115 115 raise NotImplementedError()
116 116
117 117 def puttags(self, tags):
118 118 """Put tags into sink.
119 119 tags: {tagname: sink_rev_id, ...}"""
120 120 raise NotImplementedError()
@@ -1,259 +1,259
1 1 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
2 2
3 3 import os, locale, re, socket
4 4 from mercurial import util
5 5
6 6 from common import NoRepo, commit, converter_source
7 7
8 8 class convert_cvs(converter_source):
9 9 def __init__(self, ui, path, rev=None):
10 10 super(convert_cvs, self).__init__(ui, path, rev=rev)
11 11
12 12 cvs = os.path.join(path, "CVS")
13 13 if not os.path.exists(cvs):
14 14 raise NoRepo("couldn't open CVS repo %s" % path)
15 15
16 16 self.changeset = {}
17 17 self.files = {}
18 18 self.tags = {}
19 19 self.lastbranch = {}
20 20 self.parent = {}
21 21 self.socket = None
22 22 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
23 23 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
24 24 self.encoding = locale.getpreferredencoding()
25 25 self._parse()
26 26 self._connect()
27 27
28 28 def _parse(self):
29 29 if self.changeset:
30 30 return
31 31
32 32 maxrev = 0
33 33 cmd = 'cvsps -A -u --cvs-direct -q'
34 34 if self.rev:
35 35 # TODO: handle tags
36 36 try:
37 37 # patchset number?
38 38 maxrev = int(self.rev)
39 39 except ValueError:
40 40 try:
41 41 # date
42 42 util.parsedate(self.rev, ['%Y/%m/%d %H:%M:%S'])
43 43 cmd = "%s -d '1970/01/01 00:00:01' -d '%s'" % (cmd, self.rev)
44 44 except util.Abort:
45 45 raise util.Abort('revision %s is not a patchset number or date' % self.rev)
46 46
47 47 d = os.getcwd()
48 48 try:
49 49 os.chdir(self.path)
50 50 id = None
51 51 state = 0
52 52 for l in os.popen(cmd):
53 53 if state == 0: # header
54 54 if l.startswith("PatchSet"):
55 55 id = l[9:-2]
56 56 if maxrev and int(id) > maxrev:
57 57 state = 3
58 58 elif l.startswith("Date"):
59 59 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
60 60 date = util.datestr(date)
61 61 elif l.startswith("Branch"):
62 62 branch = l[8:-1]
63 63 self.parent[id] = self.lastbranch.get(branch, 'bad')
64 64 self.lastbranch[branch] = id
65 65 elif l.startswith("Ancestor branch"):
66 66 ancestor = l[17:-1]
67 67 self.parent[id] = self.lastbranch[ancestor]
68 68 elif l.startswith("Author"):
69 69 author = self.recode(l[8:-1])
70 70 elif l.startswith("Tag:") or l.startswith("Tags:"):
71 71 t = l[l.index(':')+1:]
72 72 t = [ut.strip() for ut in t.split(',')]
73 73 if (len(t) > 1) or (t[0] and (t[0] != "(none)")):
74 74 self.tags.update(dict.fromkeys(t, id))
75 75 elif l.startswith("Log:"):
76 76 state = 1
77 77 log = ""
78 78 elif state == 1: # log
79 79 if l == "Members: \n":
80 80 files = {}
81 81 log = self.recode(log[:-1])
82 82 state = 2
83 83 else:
84 84 log += l
85 85 elif state == 2:
86 86 if l == "\n": #
87 87 state = 0
88 88 p = [self.parent[id]]
89 89 if id == "1":
90 90 p = []
91 91 if branch == "HEAD":
92 92 branch = ""
93 93 c = commit(author=author, date=date, parents=p,
94 94 desc=log, branch=branch)
95 95 self.changeset[id] = c
96 96 self.files[id] = files
97 97 else:
98 98 colon = l.rfind(':')
99 99 file = l[1:colon]
100 100 rev = l[colon+1:-2]
101 101 rev = rev.split("->")[1]
102 102 files[file] = rev
103 103 elif state == 3:
104 104 continue
105 105
106 106 self.heads = self.lastbranch.values()
107 107 finally:
108 108 os.chdir(d)
109 109
110 110 def _connect(self):
111 111 root = self.cvsroot
112 112 conntype = None
113 113 user, host = None, None
114 114 cmd = ['cvs', 'server']
115 115
116 116 self.ui.status("connecting to %s\n" % root)
117 117
118 118 if root.startswith(":pserver:"):
119 119 root = root[9:]
120 120 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
121 121 root)
122 122 if m:
123 123 conntype = "pserver"
124 124 user, passw, serv, port, root = m.groups()
125 125 if not user:
126 126 user = "anonymous"
127 127 rr = ":pserver:" + user + "@" + serv + ":" + root
128 128 if port:
129 129 rr2, port = "-", int(port)
130 130 else:
131 131 rr2, port = rr, 2401
132 132 rr += str(port)
133 133
134 134 if not passw:
135 135 passw = "A"
136 136 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
137 137 for l in pf:
138 138 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
139 139 m = re.match(r'(/\d+\s+/)?(.*)', l)
140 140 l = m.group(2)
141 141 w, p = l.split(' ', 1)
142 142 if w in [rr, rr2]:
143 143 passw = p
144 144 break
145 145 pf.close()
146 146
147 147 sck = socket.socket()
148 148 sck.connect((serv, port))
149 149 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
150 150 "END AUTH REQUEST", ""]))
151 151 if sck.recv(128) != "I LOVE YOU\n":
152 152 raise NoRepo("CVS pserver authentication failed")
153 153
154 154 self.writep = self.readp = sck.makefile('r+')
155 155
156 156 if not conntype and root.startswith(":local:"):
157 157 conntype = "local"
158 158 root = root[7:]
159 159
160 160 if not conntype:
161 161 # :ext:user@host/home/user/path/to/cvsroot
162 162 if root.startswith(":ext:"):
163 163 root = root[5:]
164 164 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
165 165 if not m:
166 166 conntype = "local"
167 167 else:
168 168 conntype = "rsh"
169 169 user, host, root = m.group(1), m.group(2), m.group(3)
170 170
171 171 if conntype != "pserver":
172 172 if conntype == "rsh":
173 173 rsh = os.environ.get("CVS_RSH" or "rsh")
174 174 if user:
175 175 cmd = [rsh, '-l', user, host] + cmd
176 176 else:
177 177 cmd = [rsh, host] + cmd
178 178
179 179 self.writep, self.readp = os.popen2(cmd)
180 180
181 181 self.realroot = root
182 182
183 183 self.writep.write("Root %s\n" % root)
184 184 self.writep.write("Valid-responses ok error Valid-requests Mode"
185 185 " M Mbinary E Checked-in Created Updated"
186 186 " Merged Removed\n")
187 187 self.writep.write("valid-requests\n")
188 188 self.writep.flush()
189 189 r = self.readp.readline()
190 190 if not r.startswith("Valid-requests"):
191 191 raise util.Abort("server sucks")
192 192 if "UseUnchanged" in r:
193 193 self.writep.write("UseUnchanged\n")
194 194 self.writep.flush()
195 195 r = self.readp.readline()
196 196
197 197 def getheads(self):
198 198 return self.heads
199 199
200 200 def _getfile(self, name, rev):
201 201 if rev.endswith("(DEAD)"):
202 202 raise IOError
203 203
204 204 args = ("-N -P -kk -r %s --" % rev).split()
205 205 args.append(os.path.join(self.cvsrepo, name))
206 206 for x in args:
207 207 self.writep.write("Argument %s\n" % x)
208 208 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
209 209 self.writep.flush()
210 210
211 211 data = ""
212 212 while 1:
213 213 line = self.readp.readline()
214 214 if line.startswith("Created ") or line.startswith("Updated "):
215 215 self.readp.readline() # path
216 216 self.readp.readline() # entries
217 217 mode = self.readp.readline()[:-1]
218 218 count = int(self.readp.readline()[:-1])
219 219 data = self.readp.read(count)
220 220 elif line.startswith(" "):
221 221 data += line[1:]
222 222 elif line.startswith("M "):
223 223 pass
224 224 elif line.startswith("Mbinary "):
225 225 count = int(self.readp.readline()[:-1])
226 226 data = self.readp.read(count)
227 227 else:
228 228 if line == "ok\n":
229 229 return (data, "x" in mode and "x" or "")
230 230 elif line.startswith("E "):
231 231 self.ui.warn("cvs server: %s\n" % line[2:])
232 232 elif line.startswith("Remove"):
233 233 l = self.readp.readline()
234 234 l = self.readp.readline()
235 235 if l != "ok\n":
236 236 raise util.Abort("unknown CVS response: %s" % l)
237 237 else:
238 238 raise util.Abort("unknown CVS response: %s" % line)
239 239
240 240 def getfile(self, file, rev):
241 241 data, mode = self._getfile(file, rev)
242 242 self.modecache[(file, rev)] = mode
243 243 return data
244 244
245 245 def getmode(self, file, rev):
246 246 return self.modecache[(file, rev)]
247 247
248 248 def getchanges(self, rev):
249 249 self.modecache = {}
250 250 files = self.files[rev]
251 251 cl = files.items()
252 252 cl.sort()
253 return cl
253 return (cl, {})
254 254
255 255 def getcommit(self, rev):
256 256 return self.changeset[rev]
257 257
258 258 def gettags(self):
259 259 return self.tags
@@ -1,101 +1,101
1 1 # git support for the convert extension
2 2
3 3 import os
4 4
5 5 from common import NoRepo, commit, converter_source
6 6
7 7 class convert_git(converter_source):
8 8 def gitcmd(self, s):
9 9 return os.popen('GIT_DIR=%s %s' % (self.path, s))
10 10
11 11 def __init__(self, ui, path, rev=None):
12 12 super(convert_git, self).__init__(ui, path, rev=rev)
13 13
14 14 if os.path.isdir(path + "/.git"):
15 15 path += "/.git"
16 16 if not os.path.exists(path + "/objects"):
17 17 raise NoRepo("couldn't open GIT repo %s" % path)
18 18 self.path = path
19 19
20 20 def getheads(self):
21 21 if not self.rev:
22 22 return self.gitcmd('git-rev-parse --branches').read().splitlines()
23 23 else:
24 24 fh = self.gitcmd("git-rev-parse --verify %s" % self.rev)
25 25 return [fh.read()[:-1]]
26 26
27 27 def catfile(self, rev, type):
28 28 if rev == "0" * 40: raise IOError()
29 29 fh = self.gitcmd("git-cat-file %s %s 2>/dev/null" % (type, rev))
30 30 return fh.read()
31 31
32 32 def getfile(self, name, rev):
33 33 return self.catfile(rev, "blob")
34 34
35 35 def getmode(self, name, rev):
36 36 return self.modecache[(name, rev)]
37 37
38 38 def getchanges(self, version):
39 39 self.modecache = {}
40 40 fh = self.gitcmd("git-diff-tree --root -m -r %s" % version)
41 41 changes = []
42 42 for l in fh:
43 43 if "\t" not in l: continue
44 44 m, f = l[:-1].split("\t")
45 45 m = m.split()
46 46 h = m[3]
47 47 p = (m[1] == "100755")
48 48 s = (m[1] == "120000")
49 49 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
50 50 changes.append((f, h))
51 return changes
51 return (changes, {})
52 52
53 53 def getcommit(self, version):
54 54 c = self.catfile(version, "commit") # read the commit hash
55 55 end = c.find("\n\n")
56 56 message = c[end+2:]
57 57 message = self.recode(message)
58 58 l = c[:end].splitlines()
59 59 manifest = l[0].split()[1]
60 60 parents = []
61 61 for e in l[1:]:
62 62 n, v = e.split(" ", 1)
63 63 if n == "author":
64 64 p = v.split()
65 65 tm, tz = p[-2:]
66 66 author = " ".join(p[:-2])
67 67 if author[0] == "<": author = author[1:-1]
68 68 author = self.recode(author)
69 69 if n == "committer":
70 70 p = v.split()
71 71 tm, tz = p[-2:]
72 72 committer = " ".join(p[:-2])
73 73 if committer[0] == "<": committer = committer[1:-1]
74 74 committer = self.recode(committer)
75 75 message += "\ncommitter: %s\n" % committer
76 76 if n == "parent": parents.append(v)
77 77
78 78 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
79 79 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
80 80 date = tm + " " + str(tz)
81 81 author = author or "unknown"
82 82
83 83 c = commit(parents=parents, date=date, author=author, desc=message,
84 84 rev=version)
85 85 return c
86 86
87 87 def gettags(self):
88 88 tags = {}
89 89 fh = self.gitcmd('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
90 90 prefix = 'refs/tags/'
91 91 for line in fh:
92 92 line = line.strip()
93 93 if not line.endswith("^{}"):
94 94 continue
95 95 node, tag = line.split(None, 1)
96 96 if not tag.startswith(prefix):
97 97 continue
98 98 tag = tag[len(prefix):-3]
99 99 tags[tag] = node
100 100
101 101 return tags
@@ -1,175 +1,175
1 1 # hg backend for convert extension
2 2
3 3 # Note for hg->hg conversion: Old versions of Mercurial didn't trim
4 4 # the whitespace from the ends of commit messages, but new versions
5 5 # do. Changesets created by those older versions, then converted, may
6 6 # thus have different hashes for changesets that are otherwise
7 7 # identical.
8 8
9 9
10 10 import os, time
11 11 from mercurial.i18n import _
12 12 from mercurial.node import *
13 13 from mercurial import hg, lock, revlog, util
14 14
15 15 from common import NoRepo, commit, converter_source, converter_sink
16 16
17 17 class mercurial_sink(converter_sink):
18 18 def __init__(self, ui, path):
19 19 self.path = path
20 20 self.ui = ui
21 21 try:
22 22 self.repo = hg.repository(self.ui, path)
23 23 except:
24 24 raise NoRepo("could not open hg repo %s as sink" % path)
25 25 self.lock = None
26 26 self.wlock = None
27 27 self.branchnames = ui.configbool('convert', 'hg.usebranchnames', True)
28 28
29 29 def before(self):
30 30 self.wlock = self.repo.wlock()
31 31 self.lock = self.repo.lock()
32 32
33 33 def after(self):
34 34 self.lock = None
35 35 self.wlock = None
36 36
37 37 def revmapfile(self):
38 38 return os.path.join(self.path, ".hg", "shamap")
39 39
40 40 def authorfile(self):
41 41 return os.path.join(self.path, ".hg", "authormap")
42 42
43 43 def getheads(self):
44 44 h = self.repo.changelog.heads()
45 45 return [ hex(x) for x in h ]
46 46
47 47 def putfile(self, f, e, data):
48 48 self.repo.wwrite(f, data, e)
49 49 if f not in self.repo.dirstate:
50 50 self.repo.dirstate.add(f)
51 51
52 52 def copyfile(self, source, dest):
53 53 self.repo.copy(source, dest)
54 54
55 55 def delfile(self, f):
56 56 try:
57 57 os.unlink(self.repo.wjoin(f))
58 58 #self.repo.remove([f])
59 59 except:
60 60 pass
61 61
62 62 def putcommit(self, files, parents, commit):
63 63 if not files:
64 64 return hex(self.repo.changelog.tip())
65 65
66 66 seen = {hex(nullid): 1}
67 67 pl = []
68 68 for p in parents:
69 69 if p not in seen:
70 70 pl.append(p)
71 71 seen[p] = 1
72 72 parents = pl
73 73
74 74 if len(parents) < 2: parents.append("0" * 40)
75 75 if len(parents) < 2: parents.append("0" * 40)
76 76 p2 = parents.pop(0)
77 77
78 78 text = commit.desc
79 79 extra = {}
80 80 if self.branchnames and commit.branch:
81 81 extra['branch'] = commit.branch
82 82 if commit.rev:
83 83 extra['convert_revision'] = commit.rev
84 84
85 85 while parents:
86 86 p1 = p2
87 87 p2 = parents.pop(0)
88 88 a = self.repo.rawcommit(files, text, commit.author, commit.date,
89 89 bin(p1), bin(p2), extra=extra)
90 90 self.repo.dirstate.invalidate()
91 91 text = "(octopus merge fixup)\n"
92 92 p2 = hg.hex(self.repo.changelog.tip())
93 93
94 94 return p2
95 95
96 96 def puttags(self, tags):
97 97 try:
98 98 old = self.repo.wfile(".hgtags").read()
99 99 oldlines = old.splitlines(1)
100 100 oldlines.sort()
101 101 except:
102 102 oldlines = []
103 103
104 104 k = tags.keys()
105 105 k.sort()
106 106 newlines = []
107 107 for tag in k:
108 108 newlines.append("%s %s\n" % (tags[tag], tag))
109 109
110 110 newlines.sort()
111 111
112 112 if newlines != oldlines:
113 113 self.ui.status("updating tags\n")
114 114 f = self.repo.wfile(".hgtags", "w")
115 115 f.write("".join(newlines))
116 116 f.close()
117 117 if not oldlines: self.repo.add([".hgtags"])
118 118 date = "%s 0" % int(time.mktime(time.gmtime()))
119 119 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
120 120 date, self.repo.changelog.tip(), nullid)
121 121 return hex(self.repo.changelog.tip())
122 122
123 123 class mercurial_source(converter_source):
124 124 def __init__(self, ui, path, rev=None):
125 125 converter_source.__init__(self, ui, path, rev)
126 126 self.repo = hg.repository(self.ui, path)
127 127 self.lastrev = None
128 128 self.lastctx = None
129 129
130 130 def changectx(self, rev):
131 131 if self.lastrev != rev:
132 132 self.lastctx = self.repo.changectx(rev)
133 133 self.lastrev = rev
134 134 return self.lastctx
135 135
136 136 def getheads(self):
137 137 return [hex(node) for node in self.repo.heads()]
138 138
139 139 def getfile(self, name, rev):
140 140 try:
141 141 return self.changectx(rev).filectx(name).data()
142 142 except revlog.LookupError, err:
143 143 raise IOError(err)
144 144
145 145 def getmode(self, name, rev):
146 146 m = self.changectx(rev).manifest()
147 147 return (m.execf(name) and 'x' or '') + (m.linkf(name) and 'l' or '')
148 148
149 149 def getchanges(self, rev):
150 150 ctx = self.changectx(rev)
151 151 m, a, r = self.repo.status(ctx.parents()[0].node(), ctx.node())[:3]
152 152 changes = [(name, rev) for name in m + a + r]
153 153 changes.sort()
154 return changes
154 return (changes, self.getcopies(ctx))
155 155
156 156 def getcopies(self, ctx):
157 157 added = self.repo.status(ctx.parents()[0].node(), ctx.node())[1]
158 158 copies = {}
159 159 for name in added:
160 160 try:
161 161 copies[name] = ctx.filectx(name).renamed()[0]
162 162 except TypeError:
163 163 pass
164 164 return copies
165 165
166 166 def getcommit(self, rev):
167 167 ctx = self.changectx(rev)
168 168 parents = [hex(p.node()) for p in ctx.parents() if p.node() != nullid]
169 169 return commit(author=ctx.user(), date=util.datestr(ctx.date()),
170 170 desc=ctx.description(), parents=parents,
171 branch=ctx.branch(), copies=self.getcopies(ctx))
171 branch=ctx.branch())
172 172
173 173 def gettags(self):
174 174 tags = [t for t in self.repo.tagslist() if t[0] != 'tip']
175 175 return dict([(name, hex(node)) for name, node in tags])
@@ -1,641 +1,643
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 #
12 12 # Set these in a hgrc, or on the command line as follows:
13 13 #
14 14 # hg convert --config convert.svn.trunk=wackoname [...]
15 15
16 16 import locale
17 17 import os
18 18 import cPickle as pickle
19 19 from mercurial import util
20 20
21 21 # Subversion stuff. Works best with very recent Python SVN bindings
22 22 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
23 23 # these bindings.
24 24
25 25 from cStringIO import StringIO
26 26
27 27 from common import NoRepo, commit, converter_source
28 28
29 29 try:
30 30 from svn.core import SubversionException, Pool
31 31 import svn
32 32 import svn.client
33 33 import svn.core
34 34 import svn.ra
35 35 import svn.delta
36 36 import transport
37 37 except ImportError:
38 38 pass
39 39
40 40 def geturl(path):
41 41 try:
42 42 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
43 43 except SubversionException:
44 44 pass
45 45 if os.path.isdir(path):
46 46 return 'file://%s' % os.path.normpath(os.path.abspath(path))
47 47 return path
48 48
49 49 def optrev(number):
50 50 optrev = svn.core.svn_opt_revision_t()
51 51 optrev.kind = svn.core.svn_opt_revision_number
52 52 optrev.value.number = number
53 53 return optrev
54 54
55 55 class changedpath(object):
56 56 def __init__(self, p):
57 57 self.copyfrom_path = p.copyfrom_path
58 58 self.copyfrom_rev = p.copyfrom_rev
59 59 self.action = p.action
60 60
61 61 # SVN conversion code stolen from bzr-svn and tailor
62 62 class convert_svn(converter_source):
63 63 def __init__(self, ui, url, rev=None):
64 64 super(convert_svn, self).__init__(ui, url, rev=rev)
65 65
66 66 try:
67 67 SubversionException
68 68 except NameError:
69 69 msg = 'subversion python bindings could not be loaded\n'
70 70 ui.warn(msg)
71 71 raise NoRepo(msg)
72 72
73 73 self.encoding = locale.getpreferredencoding()
74 74 self.lastrevs = {}
75 75
76 76 latest = None
77 77 if rev:
78 78 try:
79 79 latest = int(rev)
80 80 except ValueError:
81 81 raise util.Abort('svn: revision %s is not an integer' % rev)
82 82 try:
83 83 # Support file://path@rev syntax. Useful e.g. to convert
84 84 # deleted branches.
85 85 at = url.rfind('@')
86 86 if at >= 0:
87 87 latest = int(url[at+1:])
88 88 url = url[:at]
89 89 except ValueError, e:
90 90 pass
91 91 self.url = geturl(url)
92 92 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
93 93 try:
94 94 self.transport = transport.SvnRaTransport(url=self.url)
95 95 self.ra = self.transport.ra
96 96 self.ctx = self.transport.client
97 97 self.base = svn.ra.get_repos_root(self.ra)
98 98 self.module = self.url[len(self.base):]
99 99 self.modulemap = {} # revision, module
100 100 self.commits = {}
101 self.files = {}
101 self.paths = {}
102 102 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
103 103 except SubversionException, e:
104 104 raise NoRepo("couldn't open SVN repo %s" % self.url)
105 105
106 106 try:
107 107 self.get_blacklist()
108 108 except IOError, e:
109 109 pass
110 110
111 111 self.last_changed = self.latest(self.module, latest)
112 112
113 113 self.head = self.revid(self.last_changed)
114 114
115 115 def setrevmap(self, revmap):
116 116 lastrevs = {}
117 117 for revid in revmap.keys():
118 118 uuid, module, revnum = self.revsplit(revid)
119 119 lastrevnum = lastrevs.setdefault(module, revnum)
120 120 if revnum > lastrevnum:
121 121 lastrevs[module] = revnum
122 122 self.lastrevs = lastrevs
123 123
124 124 def exists(self, path, optrev):
125 125 try:
126 126 return svn.client.ls(self.url.rstrip('/') + '/' + path,
127 127 optrev, False, self.ctx)
128 128 except SubversionException, err:
129 129 return []
130 130
131 131 def getheads(self):
132 132 # detect standard /branches, /tags, /trunk layout
133 133 rev = optrev(self.last_changed)
134 134 rpath = self.url.strip('/')
135 135 cfgtrunk = self.ui.config('convert', 'svn.trunk')
136 136 cfgbranches = self.ui.config('convert', 'svn.branches')
137 137 trunk = (cfgtrunk or 'trunk').strip('/')
138 138 branches = (cfgbranches or 'branches').strip('/')
139 139 if self.exists(trunk, rev) and self.exists(branches, rev):
140 140 self.ui.note('found trunk at %r and branches at %r\n' %
141 141 (trunk, branches))
142 142 oldmodule = self.module
143 143 self.module += '/' + trunk
144 144 lt = self.latest(self.module, self.last_changed)
145 145 self.head = self.revid(lt)
146 146 self.heads = [self.head]
147 147 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
148 148 self.ctx)
149 149 for branch in branchnames.keys():
150 150 if oldmodule:
151 151 module = '/' + oldmodule + '/' + branches + '/' + branch
152 152 else:
153 153 module = '/' + branches + '/' + branch
154 154 brevnum = self.latest(module, self.last_changed)
155 155 brev = self.revid(brevnum, module)
156 156 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
157 157 self.heads.append(brev)
158 158 elif cfgtrunk or cfgbranches:
159 159 raise util.Abort(_('trunk/branch layout expected, '
160 160 'but not found'))
161 161 else:
162 162 self.ui.note('working with one branch\n')
163 163 self.heads = [self.head]
164 164 return self.heads
165 165
166 166 def getfile(self, file, rev):
167 167 data, mode = self._getfile(file, rev)
168 168 self.modecache[(file, rev)] = mode
169 169 return data
170 170
171 171 def getmode(self, file, rev):
172 172 return self.modecache[(file, rev)]
173 173
174 174 def getchanges(self, rev):
175 175 self.modecache = {}
176 files = self.files[rev]
177 cl = files
178 cl.sort()
176 (paths, parents) = self.paths[rev]
177 files, copies = self.expandpaths(rev, paths, parents)
178 files.sort()
179 files = zip(files, [rev] * len(files))
180
179 181 # caller caches the result, so free it here to release memory
180 del self.files[rev]
181 return cl
182 del self.paths[rev]
183 return (files, copies)
182 184
183 185 def getcommit(self, rev):
184 186 if rev not in self.commits:
185 187 uuid, module, revnum = self.revsplit(rev)
186 188 self.module = module
187 189 self.reparent(module)
188 190 stop = self.lastrevs.get(module, 0)
189 191 self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
190 192 commit = self.commits[rev]
191 193 # caller caches the result, so free it here to release memory
192 194 del self.commits[rev]
193 195 return commit
194 196
195 197 def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
196 198 strict_node_history=False):
197 199 '''wrapper for svn.ra.get_log.
198 200 on a large repository, svn.ra.get_log pins huge amounts of
199 201 memory that cannot be recovered. work around it by forking
200 202 and writing results over a pipe.'''
201 203
202 204 def child(fp):
203 205 protocol = -1
204 206 def receiver(orig_paths, revnum, author, date, message, pool):
205 207 if orig_paths is not None:
206 208 for k, v in orig_paths.iteritems():
207 209 orig_paths[k] = changedpath(v)
208 210 pickle.dump((orig_paths, revnum, author, date, message),
209 211 fp, protocol)
210 212
211 213 try:
212 214 # Use an ra of our own so that our parent can consume
213 215 # our results without confusing the server.
214 216 t = transport.SvnRaTransport(url=self.url)
215 217 svn.ra.get_log(t.ra, paths, start, end, limit,
216 218 discover_changed_paths,
217 219 strict_node_history,
218 220 receiver)
219 221 except SubversionException, (_, num):
220 222 self.ui.print_exc()
221 223 pickle.dump(num, fp, protocol)
222 224 else:
223 225 pickle.dump(None, fp, protocol)
224 226 fp.close()
225 227
226 228 def parent(fp):
227 229 while True:
228 230 entry = pickle.load(fp)
229 231 try:
230 232 orig_paths, revnum, author, date, message = entry
231 233 except:
232 234 if entry is None:
233 235 break
234 236 raise SubversionException("child raised exception", entry)
235 237 yield entry
236 238
237 239 rfd, wfd = os.pipe()
238 240 pid = os.fork()
239 241 if pid:
240 242 os.close(wfd)
241 243 for p in parent(os.fdopen(rfd, 'rb')):
242 244 yield p
243 245 ret = os.waitpid(pid, 0)[1]
244 246 if ret:
245 247 raise util.Abort(_('get_log %s') % util.explain_exit(ret))
246 248 else:
247 249 os.close(rfd)
248 250 child(os.fdopen(wfd, 'wb'))
249 251 os._exit(0)
250 252
251 253 def gettags(self):
252 254 tags = {}
253 255 start = self.revnum(self.head)
254 256 try:
255 257 for entry in self.get_log(['/tags'], 0, start):
256 258 orig_paths, revnum, author, date, message = entry
257 259 for path in orig_paths:
258 260 if not path.startswith('/tags/'):
259 261 continue
260 262 ent = orig_paths[path]
261 263 source = ent.copyfrom_path
262 264 rev = ent.copyfrom_rev
263 265 tag = path.split('/', 2)[2]
264 266 tags[tag] = self.revid(rev, module=source)
265 267 except SubversionException, (_, num):
266 268 self.ui.note('no tags found at revision %d\n' % start)
267 269 return tags
268 270
269 271 # -- helper functions --
270 272
271 273 def revid(self, revnum, module=None):
272 274 if not module:
273 275 module = self.module
274 276 return (u"svn:%s%s@%s" % (self.uuid, module, revnum)).decode(self.encoding)
275 277
276 278 def revnum(self, rev):
277 279 return int(rev.split('@')[-1])
278 280
279 281 def revsplit(self, rev):
280 282 url, revnum = rev.encode(self.encoding).split('@', 1)
281 283 revnum = int(revnum)
282 284 parts = url.split('/', 1)
283 285 uuid = parts.pop(0)[4:]
284 286 mod = ''
285 287 if parts:
286 288 mod = '/' + parts[0]
287 289 return uuid, mod, revnum
288 290
289 291 def latest(self, path, stop=0):
290 292 'find the latest revision affecting path, up to stop'
291 293 if not stop:
292 294 stop = svn.ra.get_latest_revnum(self.ra)
293 295 try:
294 296 self.reparent('')
295 297 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
296 298 self.reparent(self.module)
297 299 except SubversionException:
298 300 dirent = None
299 301 if not dirent:
300 302 print self.base, path
301 303 raise util.Abort('%s not found up to revision %d' % (path, stop))
302 304
303 305 return dirent.created_rev
304 306
305 307 def get_blacklist(self):
306 308 """Avoid certain revision numbers.
307 309 It is not uncommon for two nearby revisions to cancel each other
308 310 out, e.g. 'I copied trunk into a subdirectory of itself instead
309 311 of making a branch'. The converted repository is significantly
310 312 smaller if we ignore such revisions."""
311 313 self.blacklist = set()
312 314 blacklist = self.blacklist
313 315 for line in file("blacklist.txt", "r"):
314 316 if not line.startswith("#"):
315 317 try:
316 318 svn_rev = int(line.strip())
317 319 blacklist.add(svn_rev)
318 320 except ValueError, e:
319 321 pass # not an integer or a comment
320 322
321 323 def is_blacklisted(self, svn_rev):
322 324 return svn_rev in self.blacklist
323 325
324 326 def reparent(self, module):
325 327 svn_url = self.base + module
326 328 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
327 329 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
328 330
329 331 def expandpaths(self, rev, paths, parents):
330 332 def get_entry_from_path(path, module=self.module):
331 333 # Given the repository url of this wc, say
332 334 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
333 335 # extract the "entry" portion (a relative path) from what
334 336 # svn log --xml says, ie
335 337 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
336 338 # that is to say "tests/PloneTestCase.py"
337 339 if path.startswith(module):
338 340 relative = path[len(module):]
339 341 if relative.startswith('/'):
340 342 return relative[1:]
341 343 else:
342 344 return relative
343 345
344 346 # The path is outside our tracked tree...
345 347 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
346 348 return None
347 349
348 350 entries = []
349 351 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
350 352 copies = {}
351 353 revnum = self.revnum(rev)
352 354
355 if revnum in self.modulemap:
356 new_module = self.modulemap[revnum]
357 if new_module != self.module:
358 self.module = new_module
359 self.reparent(self.module)
360
353 361 for path, ent in paths:
354 # self.ui.write("path %s\n" % path)
362 self.ui.write("path %s\n" % path)
355 363 entrypath = get_entry_from_path(path, module=self.module)
356 364 entry = entrypath.decode(self.encoding)
357 365
358 366 kind = svn.ra.check_path(self.ra, entrypath, revnum)
359 367 if kind == svn.core.svn_node_file:
360 368 if ent.copyfrom_path:
361 369 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
362 370 if copyfrom_path:
363 371 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
364 372 # It's probably important for hg that the source
365 373 # exists in the revision's parent, not just the
366 374 # ent.copyfrom_rev
367 375 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
368 376 if fromkind != 0:
369 377 copies[self.recode(entry)] = self.recode(copyfrom_path)
370 378 entries.append(self.recode(entry))
371 379 elif kind == 0: # gone, but had better be a deleted *file*
372 380 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
373 381
374 382 # if a branch is created but entries are removed in the same
375 383 # changeset, get the right fromrev
376 384 if parents:
377 385 uuid, old_module, fromrev = self.revsplit(parents[0])
378 386 else:
379 387 fromrev = revnum - 1
380 388 # might always need to be revnum - 1 in these 3 lines?
381 389 old_module = self.modulemap.get(fromrev, self.module)
382 390
383 391 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
384 392 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
385 393
386 394 def lookup_parts(p):
387 395 rc = None
388 396 parts = p.split("/")
389 397 for i in range(len(parts)):
390 398 part = "/".join(parts[:i])
391 399 info = part, copyfrom.get(part, None)
392 400 if info[1] is not None:
393 401 self.ui.debug("Found parent directory %s\n" % info[1])
394 402 rc = info
395 403 return rc
396 404
397 405 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
398 406
399 407 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
400 408
401 409 # need to remove fragment from lookup_parts and replace with copyfrom_path
402 410 if frompath is not None:
403 411 self.ui.debug("munge-o-matic\n")
404 412 self.ui.debug(entrypath + '\n')
405 413 self.ui.debug(entrypath[len(frompath):] + '\n')
406 414 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
407 415 fromrev = froment.copyfrom_rev
408 416 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
409 417
410 418 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
411 419 if fromkind == svn.core.svn_node_file: # a deleted file
412 420 entries.append(self.recode(entry))
413 421 elif fromkind == svn.core.svn_node_dir:
414 422 # print "Deleted/moved non-file:", revnum, path, ent
415 423 # children = self._find_children(path, revnum - 1)
416 424 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
417 425 # Sometimes this is tricky. For example: in
418 426 # The Subversion Repository revision 6940 a dir
419 427 # was copied and one of its files was deleted
420 428 # from the new location in the same commit. This
421 429 # code can't deal with that yet.
422 430 if ent.action == 'C':
423 431 children = self._find_children(path, fromrev)
424 432 else:
425 433 oroot = entrypath.strip('/')
426 434 nroot = path.strip('/')
427 435 children = self._find_children(oroot, fromrev)
428 436 children = [s.replace(oroot,nroot) for s in children]
429 437 # Mark all [files, not directories] as deleted.
430 438 for child in children:
431 439 # Can we move a child directory and its
432 440 # parent in the same commit? (probably can). Could
433 441 # cause problems if instead of revnum -1,
434 442 # we have to look in (copyfrom_path, revnum - 1)
435 443 entrypath = get_entry_from_path("/" + child, module=old_module)
436 444 if entrypath:
437 445 entry = self.recode(entrypath.decode(self.encoding))
438 446 if entry in copies:
439 447 # deleted file within a copy
440 448 del copies[entry]
441 449 else:
442 450 entries.append(entry)
443 451 else:
444 452 self.ui.debug('unknown path in revision %d: %s\n' % \
445 453 (revnum, path))
446 454 elif kind == svn.core.svn_node_dir:
447 455 # Should probably synthesize normal file entries
448 456 # and handle as above to clean up copy/rename handling.
449 457
450 458 # If the directory just had a prop change,
451 459 # then we shouldn't need to look for its children.
452 460 # Also this could create duplicate entries. Not sure
453 461 # whether this will matter. Maybe should make entries a set.
454 462 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
455 463 # This will fail if a directory was copied
456 464 # from another branch and then some of its files
457 465 # were deleted in the same transaction.
458 466 children = self._find_children(path, revnum)
459 467 children.sort()
460 468 for child in children:
461 469 # Can we move a child directory and its
462 470 # parent in the same commit? (probably can). Could
463 471 # cause problems if instead of revnum -1,
464 472 # we have to look in (copyfrom_path, revnum - 1)
465 473 entrypath = get_entry_from_path("/" + child, module=self.module)
466 474 # print child, self.module, entrypath
467 475 if entrypath:
468 476 # Need to filter out directories here...
469 477 kind = svn.ra.check_path(self.ra, entrypath, revnum)
470 478 if kind != svn.core.svn_node_dir:
471 479 entries.append(self.recode(entrypath))
472 480
473 481 # Copies here (must copy all from source)
474 482 # Probably not a real problem for us if
475 483 # source does not exist
476 484
477 485 # Can do this with the copy command "hg copy"
478 486 # if ent.copyfrom_path:
479 487 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
480 488 # module=self.module)
481 489 # copyto_entry = entrypath
482 490 #
483 491 # print "copy directory", copyfrom_entry, 'to', copyto_entry
484 492 #
485 493 # copies.append((copyfrom_entry, copyto_entry))
486 494
487 495 if ent.copyfrom_path:
488 496 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
489 497 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
490 498 if copyfrom_entry:
491 499 copyfrom[path] = ent
492 500 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
493 501
494 502 # Good, /probably/ a regular copy. Really should check
495 503 # to see whether the parent revision actually contains
496 504 # the directory in question.
497 505 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
498 506 children.sort()
499 507 for child in children:
500 508 entrypath = get_entry_from_path("/" + child, module=self.module)
501 509 if entrypath:
502 510 entry = entrypath.decode(self.encoding)
503 511 # print "COPY COPY From", copyfrom_entry, entry
504 512 copyto_path = path + entry[len(copyfrom_entry):]
505 513 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
506 514 # print "COPY", entry, "COPY To", copyto_entry
507 515 copies[self.recode(copyto_entry)] = self.recode(entry)
508 516 # copy from quux splort/quuxfile
509 517
510 518 return (entries, copies)
511 519
512 520 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
513 521 self.child_cset = None
514 522 def parselogentry(orig_paths, revnum, author, date, message):
515 523 self.ui.debug("parsing revision %d (%d changes)\n" %
516 524 (revnum, len(orig_paths)))
517 525
518 526 if revnum in self.modulemap:
519 527 new_module = self.modulemap[revnum]
520 528 if new_module != self.module:
521 529 self.module = new_module
522 530 self.reparent(self.module)
523 531
524 532 rev = self.revid(revnum)
525 533 # branch log might return entries for a parent we already have
526 534 if (rev in self.commits or
527 535 (revnum < self.lastrevs.get(self.module, 0))):
528 536 return
529 537
530 538 parents = []
531 539 orig_paths = orig_paths.items()
532 540 orig_paths.sort()
533 541
534 542 # check whether this revision is the start of a branch
535 543 path, ent = orig_paths and orig_paths[0] or (None, None)
536 544 if ent and path == self.module:
537 545 if ent.copyfrom_path:
538 546 # ent.copyfrom_rev may not be the actual last revision
539 547 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
540 548 self.modulemap[prev] = ent.copyfrom_path
541 549 parents = [self.revid(prev, ent.copyfrom_path)]
542 550 self.ui.note('found parent of branch %s at %d: %s\n' % \
543 551 (self.module, prev, ent.copyfrom_path))
544 552 else:
545 553 self.ui.debug("No copyfrom path, don't know what to do.\n")
546 554
547 555 self.modulemap[revnum] = self.module # track backwards in time
548 556
549 557 paths = []
550 558 # filter out unrelated paths
551 559 for path, ent in orig_paths:
552 560 if not path.startswith(self.module):
553 561 self.ui.debug("boring@%s: %s\n" % (revnum, path))
554 562 continue
555 563 paths.append((path, ent))
556 564
557 entries, copies = self.expandpaths(rev, paths, parents)
558 # a list of (filename, id) where id lets us retrieve the file.
559 # eg in git, id is the object hash. for svn it'll be the
560 self.files[rev] = zip(entries, [rev] * len(entries))
561 if not entries:
562 return
565 self.paths[rev] = (paths, parents)
563 566
564 567 # Example SVN datetime. Includes microseconds.
565 568 # ISO-8601 conformant
566 569 # '2007-01-04T17:35:00.902377Z'
567 570 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
568 571
569 572 log = message and self.recode(message)
570 573 author = author and self.recode(author) or ''
571 574 try:
572 575 branch = self.module.split("/")[-1]
573 576 if branch == 'trunk':
574 577 branch = ''
575 578 except IndexError:
576 579 branch = None
577 580
578 581 cset = commit(author=author,
579 582 date=util.datestr(date),
580 583 desc=log,
581 584 parents=parents,
582 copies=copies,
583 585 branch=branch,
584 586 rev=rev.encode('utf-8'))
585 587
586 588 self.commits[rev] = cset
587 589 if self.child_cset and not self.child_cset.parents:
588 590 self.child_cset.parents = [rev]
589 591 self.child_cset = cset
590 592
591 593 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
592 594 (self.module, from_revnum, to_revnum))
593 595
594 596 try:
595 597 for entry in self.get_log([self.module], from_revnum, to_revnum):
596 598 orig_paths, revnum, author, date, message = entry
597 599 if self.is_blacklisted(revnum):
598 600 self.ui.note('skipping blacklisted revision %d\n' % revnum)
599 601 continue
600 602 if orig_paths is None:
601 603 self.ui.debug('revision %d has no entries\n' % revnum)
602 604 continue
603 605 parselogentry(orig_paths, revnum, author, date, message)
604 606 except SubversionException, (_, num):
605 607 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
606 608 raise NoSuchRevision(branch=self,
607 609 revision="Revision number %d" % to_revnum)
608 610 raise
609 611
610 612 def _getfile(self, file, rev):
611 613 io = StringIO()
612 614 # TODO: ra.get_file transmits the whole file instead of diffs.
613 615 mode = ''
614 616 try:
615 617 revnum = self.revnum(rev)
616 618 if self.module != self.modulemap[revnum]:
617 619 self.module = self.modulemap[revnum]
618 620 self.reparent(self.module)
619 621 info = svn.ra.get_file(self.ra, file, revnum, io)
620 622 if isinstance(info, list):
621 623 info = info[-1]
622 624 mode = ("svn:executable" in info) and 'x' or ''
623 625 mode = ("svn:special" in info) and 'l' or mode
624 626 except SubversionException, e:
625 627 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
626 628 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
627 629 if e.apr_err in notfound: # File not found
628 630 raise IOError()
629 631 raise
630 632 data = io.getvalue()
631 633 if mode == 'l':
632 634 link_prefix = "link "
633 635 if data.startswith(link_prefix):
634 636 data = data[len(link_prefix):]
635 637 return data, mode
636 638
637 639 def _find_children(self, path, revnum):
638 640 path = path.strip('/')
639 641 pool = Pool()
640 642 rpath = '/'.join([self.base, path]).strip('/')
641 643 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
General Comments 0
You need to be logged in to leave comments. Login now