##// END OF EJS Templates
convert: checkout svn root revisions...
Patrick Mezard -
r5956:094638b3 default
parent child Browse files
Show More
@@ -1,1008 +1,1018 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream:
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
130 130 strict_node_history=False):
131 131 args = [url, paths, start, end, limit, discover_changed_paths,
132 132 strict_node_history]
133 133 arg = encodeargs(args)
134 134 hgexe = util.hgexecutable()
135 135 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
136 136 stdin, stdout = os.popen2(cmd, 'b')
137 137 stdin.write(arg)
138 138 stdin.close()
139 139 return logstream(stdout)
140 140
141 141 # SVN conversion code stolen from bzr-svn and tailor
142 142 #
143 143 # Subversion looks like a versioned filesystem, branches structures
144 144 # are defined by conventions and not enforced by the tool. First,
145 145 # we define the potential branches (modules) as "trunk" and "branches"
146 146 # children directories. Revisions are then identified by their
147 147 # module and revision number (and a repository identifier).
148 148 #
149 149 # The revision graph is really a tree (or a forest). By default, a
150 150 # revision parent is the previous revision in the same module. If the
151 151 # module directory is copied/moved from another module then the
152 152 # revision is the module root and its parent the source revision in
153 153 # the parent module. A revision has at most one parent.
154 154 #
155 155 class svn_source(converter_source):
156 156 def __init__(self, ui, url, rev=None):
157 157 super(svn_source, self).__init__(ui, url, rev=rev)
158 158
159 159 try:
160 160 SubversionException
161 161 except NameError:
162 162 raise NoRepo('Subversion python bindings could not be loaded')
163 163
164 164 self.encoding = locale.getpreferredencoding()
165 165 self.lastrevs = {}
166 166
167 167 latest = None
168 168 try:
169 169 # Support file://path@rev syntax. Useful e.g. to convert
170 170 # deleted branches.
171 171 at = url.rfind('@')
172 172 if at >= 0:
173 173 latest = int(url[at+1:])
174 174 url = url[:at]
175 175 except ValueError, e:
176 176 pass
177 177 self.url = geturl(url)
178 178 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
179 179 try:
180 180 self.transport = transport.SvnRaTransport(url=self.url)
181 181 self.ra = self.transport.ra
182 182 self.ctx = self.transport.client
183 183 self.base = svn.ra.get_repos_root(self.ra)
184 184 self.module = self.url[len(self.base):]
185 185 self.commits = {}
186 186 self.paths = {}
187 187 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
188 188 except SubversionException, e:
189 189 ui.print_exc()
190 190 raise NoRepo("%s does not look like a Subversion repo" % self.url)
191 191
192 192 if rev:
193 193 try:
194 194 latest = int(rev)
195 195 except ValueError:
196 196 raise util.Abort('svn: revision %s is not an integer' % rev)
197 197
198 198 try:
199 199 self.get_blacklist()
200 200 except IOError, e:
201 201 pass
202 202
203 203 self.head = self.latest(self.module, latest)
204 204 self.last_changed = self.revnum(self.head)
205 205
206 206 self._changescache = None
207 207
208 208 if os.path.exists(os.path.join(url, '.svn/entries')):
209 209 self.wc = url
210 210 else:
211 211 self.wc = None
212 212 self.convertfp = None
213 213
214 214 def setrevmap(self, revmap):
215 215 lastrevs = {}
216 216 for revid in revmap.iterkeys():
217 217 uuid, module, revnum = self.revsplit(revid)
218 218 lastrevnum = lastrevs.setdefault(module, revnum)
219 219 if revnum > lastrevnum:
220 220 lastrevs[module] = revnum
221 221 self.lastrevs = lastrevs
222 222
223 223 def exists(self, path, optrev):
224 224 try:
225 225 svn.client.ls(self.url.rstrip('/') + '/' + path,
226 226 optrev, False, self.ctx)
227 227 return True
228 228 except SubversionException, err:
229 229 return False
230 230
231 231 def getheads(self):
232 232
233 233 def getcfgpath(name, rev):
234 234 cfgpath = self.ui.config('convert', 'svn.' + name)
235 235 path = (cfgpath or name).strip('/')
236 236 if not self.exists(path, rev):
237 237 if cfgpath:
238 238 raise util.Abort(_('expected %s to be at %r, but not found')
239 239 % (name, path))
240 240 return None
241 241 self.ui.note(_('found %s at %r\n') % (name, path))
242 242 return path
243 243
244 244 rev = optrev(self.last_changed)
245 245 oldmodule = ''
246 246 trunk = getcfgpath('trunk', rev)
247 247 tags = getcfgpath('tags', rev)
248 248 branches = getcfgpath('branches', rev)
249 249
250 250 # If the project has a trunk or branches, we will extract heads
251 251 # from them. We keep the project root otherwise.
252 252 if trunk:
253 253 oldmodule = self.module or ''
254 254 self.module += '/' + trunk
255 255 self.head = self.latest(self.module, self.last_changed)
256 256
257 257 # First head in the list is the module's head
258 258 self.heads = [self.head]
259 259 self.tags = '%s/%s' % (oldmodule , (tags or 'tags'))
260 260
261 261 # Check if branches bring a few more heads to the list
262 262 if branches:
263 263 rpath = self.url.strip('/')
264 264 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
265 265 self.ctx)
266 266 for branch in branchnames.keys():
267 267 module = '%s/%s/%s' % (oldmodule, branches, branch)
268 268 brevid = self.latest(module, self.last_changed)
269 269 self.ui.note('found branch %s at %d\n' %
270 270 (branch, self.revnum(brevid)))
271 271 self.heads.append(brevid)
272 272
273 273 return self.heads
274 274
275 275 def getfile(self, file, rev):
276 276 data, mode = self._getfile(file, rev)
277 277 self.modecache[(file, rev)] = mode
278 278 return data
279 279
280 280 def getmode(self, file, rev):
281 281 return self.modecache[(file, rev)]
282 282
283 283 def getchanges(self, rev):
284 284 if self._changescache and self._changescache[0] == rev:
285 285 return self._changescache[1]
286 286 self._changescache = None
287 287 self.modecache = {}
288 288 (paths, parents) = self.paths[rev]
289 files, copies = self.expandpaths(rev, paths, parents)
289 if parents:
290 files, copies = self.expandpaths(rev, paths, parents)
291 else:
292 # Perform a full checkout on roots
293 uuid, module, revnum = self.revsplit(rev)
294 entries = svn.client.ls(self.base + module, optrev(revnum),
295 True, self.ctx)
296 files = [n for n,e in entries.iteritems()
297 if e.kind == svn.core.svn_node_file]
298 copies = {}
299
290 300 files.sort()
291 301 files = zip(files, [rev] * len(files))
292 302
293 303 # caller caches the result, so free it here to release memory
294 304 del self.paths[rev]
295 305 return (files, copies)
296 306
297 307 def getchangedfiles(self, rev, i):
298 308 changes = self.getchanges(rev)
299 309 self._changescache = (rev, changes)
300 310 return [f[0] for f in changes[0]]
301 311
302 312 def getcommit(self, rev):
303 313 if rev not in self.commits:
304 314 uuid, module, revnum = self.revsplit(rev)
305 315 self.module = module
306 316 self.reparent(module)
307 317 # We assume that:
308 318 # - requests for revisions after "stop" come from the
309 319 # revision graph backward traversal. Cache all of them
310 320 # down to stop, they will be used eventually.
311 321 # - requests for revisions before "stop" come to get
312 322 # isolated branches parents. Just fetch what is needed.
313 323 stop = self.lastrevs.get(module, 0)
314 324 if revnum < stop:
315 325 stop = revnum + 1
316 326 self._fetch_revisions(revnum, stop)
317 327 commit = self.commits[rev]
318 328 # caller caches the result, so free it here to release memory
319 329 del self.commits[rev]
320 330 return commit
321 331
322 332 def gettags(self):
323 333 tags = {}
324 334 start = self.revnum(self.head)
325 335 try:
326 336 for entry in get_log(self.url, [self.tags], 0, start):
327 337 orig_paths, revnum, author, date, message = entry
328 338 for path in orig_paths:
329 339 if not path.startswith(self.tags+'/'):
330 340 continue
331 341 ent = orig_paths[path]
332 342 source = ent.copyfrom_path
333 343 rev = ent.copyfrom_rev
334 344 tag = path.split('/')[-1]
335 345 tags[tag] = self.revid(rev, module=source)
336 346 except SubversionException, (inst, num):
337 347 self.ui.note('no tags found at revision %d\n' % start)
338 348 return tags
339 349
340 350 def converted(self, rev, destrev):
341 351 if not self.wc:
342 352 return
343 353 if self.convertfp is None:
344 354 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
345 355 'a')
346 356 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
347 357 self.convertfp.flush()
348 358
349 359 # -- helper functions --
350 360
351 361 def revid(self, revnum, module=None):
352 362 if not module:
353 363 module = self.module
354 364 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
355 365 revnum)
356 366
357 367 def revnum(self, rev):
358 368 return int(rev.split('@')[-1])
359 369
360 370 def revsplit(self, rev):
361 371 url, revnum = rev.encode(self.encoding).split('@', 1)
362 372 revnum = int(revnum)
363 373 parts = url.split('/', 1)
364 374 uuid = parts.pop(0)[4:]
365 375 mod = ''
366 376 if parts:
367 377 mod = '/' + parts[0]
368 378 return uuid, mod, revnum
369 379
370 380 def latest(self, path, stop=0):
371 381 """Find the latest revid affecting path, up to stop. It may return
372 382 a revision in a different module, since a branch may be moved without
373 383 a change being reported.
374 384 """
375 385 if not stop:
376 386 stop = svn.ra.get_latest_revnum(self.ra)
377 387 try:
378 388 self.reparent('')
379 389 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
380 390 self.reparent(self.module)
381 391 except SubversionException:
382 392 dirent = None
383 393 if not dirent:
384 394 raise util.Abort('%s not found up to revision %d' % (path, stop))
385 395
386 396 # stat() gives us the previous revision on this line of development, but
387 397 # it might be in *another module*. Fetch the log and detect renames down
388 398 # to the latest revision.
389 399 stream = get_log(self.url, [path], stop, dirent.created_rev)
390 400 try:
391 401 for entry in stream:
392 402 paths, revnum, author, date, message = entry
393 403 if revnum <= dirent.created_rev:
394 404 break
395 405
396 406 for p in paths:
397 407 if not path.startswith(p) or not paths[p].copyfrom_path:
398 408 continue
399 409 newpath = paths[p].copyfrom_path + path[len(p):]
400 410 self.ui.debug("branch renamed from %s to %s at %d\n" %
401 411 (path, newpath, revnum))
402 412 path = newpath
403 413 break
404 414 finally:
405 415 stream.close()
406 416
407 417 return self.revid(dirent.created_rev, path)
408 418
409 419 def get_blacklist(self):
410 420 """Avoid certain revision numbers.
411 421 It is not uncommon for two nearby revisions to cancel each other
412 422 out, e.g. 'I copied trunk into a subdirectory of itself instead
413 423 of making a branch'. The converted repository is significantly
414 424 smaller if we ignore such revisions."""
415 425 self.blacklist = util.set()
416 426 blacklist = self.blacklist
417 427 for line in file("blacklist.txt", "r"):
418 428 if not line.startswith("#"):
419 429 try:
420 430 svn_rev = int(line.strip())
421 431 blacklist.add(svn_rev)
422 432 except ValueError, e:
423 433 pass # not an integer or a comment
424 434
425 435 def is_blacklisted(self, svn_rev):
426 436 return svn_rev in self.blacklist
427 437
428 438 def reparent(self, module):
429 439 svn_url = self.base + module
430 440 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
431 441 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
432 442
433 443 def expandpaths(self, rev, paths, parents):
434 444 def get_entry_from_path(path, module=self.module):
435 445 # Given the repository url of this wc, say
436 446 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
437 447 # extract the "entry" portion (a relative path) from what
438 448 # svn log --xml says, ie
439 449 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
440 450 # that is to say "tests/PloneTestCase.py"
441 451 if path.startswith(module):
442 452 relative = path[len(module):]
443 453 if relative.startswith('/'):
444 454 return relative[1:]
445 455 else:
446 456 return relative
447 457
448 458 # The path is outside our tracked tree...
449 459 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
450 460 return None
451 461
452 462 entries = []
453 463 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
454 464 copies = {}
455 465
456 466 new_module, revnum = self.revsplit(rev)[1:]
457 467 if new_module != self.module:
458 468 self.module = new_module
459 469 self.reparent(self.module)
460 470
461 471 for path, ent in paths:
462 472 entrypath = get_entry_from_path(path, module=self.module)
463 473 entry = entrypath.decode(self.encoding)
464 474
465 475 kind = svn.ra.check_path(self.ra, entrypath, revnum)
466 476 if kind == svn.core.svn_node_file:
467 477 if ent.copyfrom_path:
468 478 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
469 479 if copyfrom_path:
470 480 self.ui.debug("Copied to %s from %s@%s\n" %
471 481 (entrypath, copyfrom_path,
472 482 ent.copyfrom_rev))
473 483 # It's probably important for hg that the source
474 484 # exists in the revision's parent, not just the
475 485 # ent.copyfrom_rev
476 486 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
477 487 if fromkind != 0:
478 488 copies[self.recode(entry)] = self.recode(copyfrom_path)
479 489 entries.append(self.recode(entry))
480 490 elif kind == 0: # gone, but had better be a deleted *file*
481 491 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
482 492
483 493 # if a branch is created but entries are removed in the same
484 494 # changeset, get the right fromrev
485 495 # parents cannot be empty here, you cannot remove things from
486 496 # a root revision.
487 497 uuid, old_module, fromrev = self.revsplit(parents[0])
488 498
489 499 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
490 500 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
491 501
492 502 def lookup_parts(p):
493 503 rc = None
494 504 parts = p.split("/")
495 505 for i in range(len(parts)):
496 506 part = "/".join(parts[:i])
497 507 info = part, copyfrom.get(part, None)
498 508 if info[1] is not None:
499 509 self.ui.debug("Found parent directory %s\n" % info[1])
500 510 rc = info
501 511 return rc
502 512
503 513 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
504 514
505 515 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
506 516
507 517 # need to remove fragment from lookup_parts and replace with copyfrom_path
508 518 if frompath is not None:
509 519 self.ui.debug("munge-o-matic\n")
510 520 self.ui.debug(entrypath + '\n')
511 521 self.ui.debug(entrypath[len(frompath):] + '\n')
512 522 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
513 523 fromrev = froment.copyfrom_rev
514 524 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
515 525
516 526 # We can avoid the reparent calls if the module has not changed
517 527 # but it probably does not worth the pain.
518 528 self.reparent('')
519 529 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
520 530 self.reparent(self.module)
521 531
522 532 if fromkind == svn.core.svn_node_file: # a deleted file
523 533 entries.append(self.recode(entry))
524 534 elif fromkind == svn.core.svn_node_dir:
525 535 # print "Deleted/moved non-file:", revnum, path, ent
526 536 # children = self._find_children(path, revnum - 1)
527 537 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
528 538 # Sometimes this is tricky. For example: in
529 539 # The Subversion Repository revision 6940 a dir
530 540 # was copied and one of its files was deleted
531 541 # from the new location in the same commit. This
532 542 # code can't deal with that yet.
533 543 if ent.action == 'C':
534 544 children = self._find_children(path, fromrev)
535 545 else:
536 546 oroot = entrypath.strip('/')
537 547 nroot = path.strip('/')
538 548 children = self._find_children(oroot, fromrev)
539 549 children = [s.replace(oroot,nroot) for s in children]
540 550 # Mark all [files, not directories] as deleted.
541 551 for child in children:
542 552 # Can we move a child directory and its
543 553 # parent in the same commit? (probably can). Could
544 554 # cause problems if instead of revnum -1,
545 555 # we have to look in (copyfrom_path, revnum - 1)
546 556 entrypath = get_entry_from_path("/" + child, module=old_module)
547 557 if entrypath:
548 558 entry = self.recode(entrypath.decode(self.encoding))
549 559 if entry in copies:
550 560 # deleted file within a copy
551 561 del copies[entry]
552 562 else:
553 563 entries.append(entry)
554 564 else:
555 565 self.ui.debug('unknown path in revision %d: %s\n' % \
556 566 (revnum, path))
557 567 elif kind == svn.core.svn_node_dir:
558 568 # Should probably synthesize normal file entries
559 569 # and handle as above to clean up copy/rename handling.
560 570
561 571 # If the directory just had a prop change,
562 572 # then we shouldn't need to look for its children.
563 573 if ent.action == 'M':
564 574 continue
565 575
566 576 # Also this could create duplicate entries. Not sure
567 577 # whether this will matter. Maybe should make entries a set.
568 578 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
569 579 # This will fail if a directory was copied
570 580 # from another branch and then some of its files
571 581 # were deleted in the same transaction.
572 582 children = self._find_children(path, revnum)
573 583 children.sort()
574 584 for child in children:
575 585 # Can we move a child directory and its
576 586 # parent in the same commit? (probably can). Could
577 587 # cause problems if instead of revnum -1,
578 588 # we have to look in (copyfrom_path, revnum - 1)
579 589 entrypath = get_entry_from_path("/" + child, module=self.module)
580 590 # print child, self.module, entrypath
581 591 if entrypath:
582 592 # Need to filter out directories here...
583 593 kind = svn.ra.check_path(self.ra, entrypath, revnum)
584 594 if kind != svn.core.svn_node_dir:
585 595 entries.append(self.recode(entrypath))
586 596
587 597 # Copies here (must copy all from source)
588 598 # Probably not a real problem for us if
589 599 # source does not exist
590 600
591 601 # Can do this with the copy command "hg copy"
592 602 # if ent.copyfrom_path:
593 603 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
594 604 # module=self.module)
595 605 # copyto_entry = entrypath
596 606 #
597 607 # print "copy directory", copyfrom_entry, 'to', copyto_entry
598 608 #
599 609 # copies.append((copyfrom_entry, copyto_entry))
600 610
601 611 if ent.copyfrom_path:
602 612 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
603 613 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
604 614 if copyfrom_entry:
605 615 copyfrom[path] = ent
606 616 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
607 617
608 618 # Good, /probably/ a regular copy. Really should check
609 619 # to see whether the parent revision actually contains
610 620 # the directory in question.
611 621 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
612 622 children.sort()
613 623 for child in children:
614 624 entrypath = get_entry_from_path("/" + child, module=self.module)
615 625 if entrypath:
616 626 entry = entrypath.decode(self.encoding)
617 627 # print "COPY COPY From", copyfrom_entry, entry
618 628 copyto_path = path + entry[len(copyfrom_entry):]
619 629 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
620 630 # print "COPY", entry, "COPY To", copyto_entry
621 631 copies[self.recode(copyto_entry)] = self.recode(entry)
622 632 # copy from quux splort/quuxfile
623 633
624 634 return (util.unique(entries), copies)
625 635
626 636 def _fetch_revisions(self, from_revnum, to_revnum):
627 637 if from_revnum < to_revnum:
628 638 from_revnum, to_revnum = to_revnum, from_revnum
629 639
630 640 self.child_cset = None
631 641 def parselogentry(orig_paths, revnum, author, date, message):
632 642 """Return the parsed commit object or None, and True if
633 643 the revision is a branch root.
634 644 """
635 645 self.ui.debug("parsing revision %d (%d changes)\n" %
636 646 (revnum, len(orig_paths)))
637 647
638 648 rev = self.revid(revnum)
639 649 # branch log might return entries for a parent we already have
640 650
641 651 if (rev in self.commits or revnum < to_revnum):
642 652 return None, False
643 653
644 654 parents = []
645 655 # check whether this revision is the start of a branch
646 656 if self.module in orig_paths:
647 657 ent = orig_paths[self.module]
648 658 if ent.copyfrom_path:
649 659 # ent.copyfrom_rev may not be the actual last revision
650 660 previd = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
651 661 parents = [previd]
652 662 prevmodule, prevnum = self.revsplit(previd)[1:]
653 663 self.ui.note('found parent of branch %s at %d: %s\n' %
654 664 (self.module, prevnum, prevmodule))
655 665 else:
656 666 self.ui.debug("No copyfrom path, don't know what to do.\n")
657 667
658 668 orig_paths = orig_paths.items()
659 669 orig_paths.sort()
660 670 paths = []
661 671 # filter out unrelated paths
662 672 for path, ent in orig_paths:
663 673 if not path.startswith(self.module):
664 674 self.ui.debug("boring@%s: %s\n" % (revnum, path))
665 675 continue
666 676 paths.append((path, ent))
667 677
668 678 # Example SVN datetime. Includes microseconds.
669 679 # ISO-8601 conformant
670 680 # '2007-01-04T17:35:00.902377Z'
671 681 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
672 682
673 683 log = message and self.recode(message) or ''
674 684 author = author and self.recode(author) or ''
675 685 try:
676 686 branch = self.module.split("/")[-1]
677 687 if branch == 'trunk':
678 688 branch = ''
679 689 except IndexError:
680 690 branch = None
681 691
682 692 cset = commit(author=author,
683 693 date=util.datestr(date),
684 694 desc=log,
685 695 parents=parents,
686 696 branch=branch,
687 697 rev=rev.encode('utf-8'))
688 698
689 699 self.commits[rev] = cset
690 700 # The parents list is *shared* among self.paths and the
691 701 # commit object. Both will be updated below.
692 702 self.paths[rev] = (paths, cset.parents)
693 703 if self.child_cset and not self.child_cset.parents:
694 704 self.child_cset.parents[:] = [rev]
695 705 self.child_cset = cset
696 706 return cset, len(parents) > 0
697 707
698 708 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
699 709 (self.module, from_revnum, to_revnum))
700 710
701 711 try:
702 712 firstcset = None
703 713 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
704 714 try:
705 715 for entry in stream:
706 716 paths, revnum, author, date, message = entry
707 717 if self.is_blacklisted(revnum):
708 718 self.ui.note('skipping blacklisted revision %d\n'
709 719 % revnum)
710 720 continue
711 721 if paths is None:
712 722 self.ui.debug('revision %d has no entries\n' % revnum)
713 723 continue
714 724 cset, branched = parselogentry(paths, revnum, author,
715 725 date, message)
716 726 if cset:
717 727 firstcset = cset
718 728 if branched:
719 729 break
720 730 finally:
721 731 stream.close()
722 732
723 733 if firstcset and not firstcset.parents:
724 734 # The first revision of the sequence (the last fetched one)
725 735 # has invalid parents if not a branch root. Find the parent
726 736 # revision now, if any.
727 737 try:
728 738 firstrevnum = self.revnum(firstcset.rev)
729 739 if firstrevnum > 1:
730 740 latest = self.latest(self.module, firstrevnum - 1)
731 741 firstcset.parents.append(latest)
732 742 except util.Abort:
733 743 pass
734 744 except SubversionException, (inst, num):
735 745 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
736 746 raise NoSuchRevision(branch=self,
737 747 revision="Revision number %d" % to_revnum)
738 748 raise
739 749
740 750 def _getfile(self, file, rev):
741 751 io = StringIO()
742 752 # TODO: ra.get_file transmits the whole file instead of diffs.
743 753 mode = ''
744 754 try:
745 755 new_module, revnum = self.revsplit(rev)[1:]
746 756 if self.module != new_module:
747 757 self.module = new_module
748 758 self.reparent(self.module)
749 759 info = svn.ra.get_file(self.ra, file, revnum, io)
750 760 if isinstance(info, list):
751 761 info = info[-1]
752 762 mode = ("svn:executable" in info) and 'x' or ''
753 763 mode = ("svn:special" in info) and 'l' or mode
754 764 except SubversionException, e:
755 765 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
756 766 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
757 767 if e.apr_err in notfound: # File not found
758 768 raise IOError()
759 769 raise
760 770 data = io.getvalue()
761 771 if mode == 'l':
762 772 link_prefix = "link "
763 773 if data.startswith(link_prefix):
764 774 data = data[len(link_prefix):]
765 775 return data, mode
766 776
767 777 def _find_children(self, path, revnum):
768 778 path = path.strip('/')
769 779 pool = Pool()
770 780 rpath = '/'.join([self.base, path]).strip('/')
771 781 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
772 782
773 783 pre_revprop_change = '''#!/bin/sh
774 784
775 785 REPOS="$1"
776 786 REV="$2"
777 787 USER="$3"
778 788 PROPNAME="$4"
779 789 ACTION="$5"
780 790
781 791 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
782 792 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
783 793 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
784 794
785 795 echo "Changing prohibited revision property" >&2
786 796 exit 1
787 797 '''
788 798
789 799 class svn_sink(converter_sink, commandline):
790 800 commit_re = re.compile(r'Committed revision (\d+).', re.M)
791 801
792 802 def prerun(self):
793 803 if self.wc:
794 804 os.chdir(self.wc)
795 805
796 806 def postrun(self):
797 807 if self.wc:
798 808 os.chdir(self.cwd)
799 809
800 810 def join(self, name):
801 811 return os.path.join(self.wc, '.svn', name)
802 812
803 813 def revmapfile(self):
804 814 return self.join('hg-shamap')
805 815
806 816 def authorfile(self):
807 817 return self.join('hg-authormap')
808 818
809 819 def __init__(self, ui, path):
810 820 converter_sink.__init__(self, ui, path)
811 821 commandline.__init__(self, ui, 'svn')
812 822 self.delete = []
813 823 self.setexec = []
814 824 self.delexec = []
815 825 self.copies = []
816 826 self.wc = None
817 827 self.cwd = os.getcwd()
818 828
819 829 path = os.path.realpath(path)
820 830
821 831 created = False
822 832 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
823 833 self.wc = path
824 834 self.run0('update')
825 835 else:
826 836 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
827 837
828 838 if os.path.isdir(os.path.dirname(path)):
829 839 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
830 840 ui.status(_('initializing svn repo %r\n') %
831 841 os.path.basename(path))
832 842 commandline(ui, 'svnadmin').run0('create', path)
833 843 created = path
834 844 path = util.normpath(path)
835 845 if not path.startswith('/'):
836 846 path = '/' + path
837 847 path = 'file://' + path
838 848
839 849 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
840 850 self.run0('checkout', path, wcpath)
841 851
842 852 self.wc = wcpath
843 853 self.opener = util.opener(self.wc)
844 854 self.wopener = util.opener(self.wc)
845 855 self.childmap = mapfile(ui, self.join('hg-childmap'))
846 856 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
847 857
848 858 if created:
849 859 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
850 860 fp = open(hook, 'w')
851 861 fp.write(pre_revprop_change)
852 862 fp.close()
853 863 util.set_flags(hook, "x")
854 864
855 865 xport = transport.SvnRaTransport(url=geturl(path))
856 866 self.uuid = svn.ra.get_uuid(xport.ra)
857 867
858 868 def wjoin(self, *names):
859 869 return os.path.join(self.wc, *names)
860 870
861 871 def putfile(self, filename, flags, data):
862 872 if 'l' in flags:
863 873 self.wopener.symlink(data, filename)
864 874 else:
865 875 try:
866 876 if os.path.islink(self.wjoin(filename)):
867 877 os.unlink(filename)
868 878 except OSError:
869 879 pass
870 880 self.wopener(filename, 'w').write(data)
871 881
872 882 if self.is_exec:
873 883 was_exec = self.is_exec(self.wjoin(filename))
874 884 else:
875 885 # On filesystems not supporting execute-bit, there is no way
876 886 # to know if it is set but asking subversion. Setting it
877 887 # systematically is just as expensive and much simpler.
878 888 was_exec = 'x' not in flags
879 889
880 890 util.set_flags(self.wjoin(filename), flags)
881 891 if was_exec:
882 892 if 'x' not in flags:
883 893 self.delexec.append(filename)
884 894 else:
885 895 if 'x' in flags:
886 896 self.setexec.append(filename)
887 897
888 898 def delfile(self, name):
889 899 self.delete.append(name)
890 900
891 901 def copyfile(self, source, dest):
892 902 self.copies.append([source, dest])
893 903
894 904 def _copyfile(self, source, dest):
895 905 # SVN's copy command pukes if the destination file exists, but
896 906 # our copyfile method expects to record a copy that has
897 907 # already occurred. Cross the semantic gap.
898 908 wdest = self.wjoin(dest)
899 909 exists = os.path.exists(wdest)
900 910 if exists:
901 911 fd, tempname = tempfile.mkstemp(
902 912 prefix='hg-copy-', dir=os.path.dirname(wdest))
903 913 os.close(fd)
904 914 os.unlink(tempname)
905 915 os.rename(wdest, tempname)
906 916 try:
907 917 self.run0('copy', source, dest)
908 918 finally:
909 919 if exists:
910 920 try:
911 921 os.unlink(wdest)
912 922 except OSError:
913 923 pass
914 924 os.rename(tempname, wdest)
915 925
916 926 def dirs_of(self, files):
917 927 dirs = set()
918 928 for f in files:
919 929 if os.path.isdir(self.wjoin(f)):
920 930 dirs.add(f)
921 931 for i in strutil.rfindall(f, '/'):
922 932 dirs.add(f[:i])
923 933 return dirs
924 934
925 935 def add_dirs(self, files):
926 936 add_dirs = [d for d in self.dirs_of(files)
927 937 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
928 938 if add_dirs:
929 939 add_dirs.sort()
930 940 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
931 941 return add_dirs
932 942
933 943 def add_files(self, files):
934 944 if files:
935 945 self.xargs(files, 'add', quiet=True)
936 946 return files
937 947
938 948 def tidy_dirs(self, names):
939 949 dirs = list(self.dirs_of(names))
940 950 dirs.sort(reverse=True)
941 951 deleted = []
942 952 for d in dirs:
943 953 wd = self.wjoin(d)
944 954 if os.listdir(wd) == '.svn':
945 955 self.run0('delete', d)
946 956 deleted.append(d)
947 957 return deleted
948 958
949 959 def addchild(self, parent, child):
950 960 self.childmap[parent] = child
951 961
952 962 def revid(self, rev):
953 963 return u"svn:%s@%s" % (self.uuid, rev)
954 964
955 965 def putcommit(self, files, parents, commit):
956 966 for parent in parents:
957 967 try:
958 968 return self.revid(self.childmap[parent])
959 969 except KeyError:
960 970 pass
961 971 entries = set(self.delete)
962 972 files = util.frozenset(files)
963 973 entries.update(self.add_dirs(files.difference(entries)))
964 974 if self.copies:
965 975 for s, d in self.copies:
966 976 self._copyfile(s, d)
967 977 self.copies = []
968 978 if self.delete:
969 979 self.xargs(self.delete, 'delete')
970 980 self.delete = []
971 981 entries.update(self.add_files(files.difference(entries)))
972 982 entries.update(self.tidy_dirs(entries))
973 983 if self.delexec:
974 984 self.xargs(self.delexec, 'propdel', 'svn:executable')
975 985 self.delexec = []
976 986 if self.setexec:
977 987 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
978 988 self.setexec = []
979 989
980 990 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
981 991 fp = os.fdopen(fd, 'w')
982 992 fp.write(commit.desc)
983 993 fp.close()
984 994 try:
985 995 output = self.run0('commit',
986 996 username=util.shortuser(commit.author),
987 997 file=messagefile,
988 998 encoding='utf-8')
989 999 try:
990 1000 rev = self.commit_re.search(output).group(1)
991 1001 except AttributeError:
992 1002 self.ui.warn(_('unexpected svn output:\n'))
993 1003 self.ui.warn(output)
994 1004 raise util.Abort(_('unable to cope with svn output'))
995 1005 if commit.rev:
996 1006 self.run('propset', 'hg:convert-rev', commit.rev,
997 1007 revprop=True, revision=rev)
998 1008 if commit.branch and commit.branch != 'default':
999 1009 self.run('propset', 'hg:convert-branch', commit.branch,
1000 1010 revprop=True, revision=rev)
1001 1011 for parent in parents:
1002 1012 self.addchild(parent, rev)
1003 1013 return self.revid(rev)
1004 1014 finally:
1005 1015 os.unlink(messagefile)
1006 1016
1007 1017 def puttags(self, tags):
1008 1018 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now