##// END OF EJS Templates
convert/svn: read trunk name once, use None for default
Patrick Mezard -
r13529:9b62cbe8 default
parent child Browse files
Show More
@@ -1,1172 +1,1172 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda *args: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo(_("%s does not look like a Subversion repository")
211 211 % url)
212 212 if svn is None:
213 213 raise MissingTool(_('Could not load Subversion python bindings'))
214 214
215 215 try:
216 216 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
217 217 if version < (1, 4):
218 218 raise MissingTool(_('Subversion python bindings %d.%d found, '
219 219 '1.4 or later required') % version)
220 220 except AttributeError:
221 221 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
222 222 'or later required'))
223 223
224 224 self.lastrevs = {}
225 225
226 226 latest = None
227 227 try:
228 228 # Support file://path@rev syntax. Useful e.g. to convert
229 229 # deleted branches.
230 230 at = url.rfind('@')
231 231 if at >= 0:
232 232 latest = int(url[at + 1:])
233 233 url = url[:at]
234 234 except ValueError:
235 235 pass
236 236 self.url = geturl(url)
237 237 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
238 238 try:
239 239 self.transport = transport.SvnRaTransport(url=self.url)
240 240 self.ra = self.transport.ra
241 241 self.ctx = self.transport.client
242 242 self.baseurl = svn.ra.get_repos_root(self.ra)
243 243 # Module is either empty or a repository path starting with
244 244 # a slash and not ending with a slash.
245 245 self.module = urllib.unquote(self.url[len(self.baseurl):])
246 246 self.prevmodule = None
247 247 self.rootmodule = self.module
248 248 self.commits = {}
249 249 self.paths = {}
250 250 self.uuid = svn.ra.get_uuid(self.ra)
251 251 except SubversionException:
252 252 ui.traceback()
253 253 raise NoRepo(_("%s does not look like a Subversion repository")
254 254 % self.url)
255 255
256 256 if rev:
257 257 try:
258 258 latest = int(rev)
259 259 except ValueError:
260 260 raise util.Abort(_('svn: revision %s is not an integer') % rev)
261 261
262 self.trunkname = self.ui.config('convert', 'svn.trunk', 'trunk').strip('/')
262 263 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
263 264 try:
264 265 self.startrev = int(self.startrev)
265 266 if self.startrev < 0:
266 267 self.startrev = 0
267 268 except ValueError:
268 269 raise util.Abort(_('svn: start revision %s is not an integer')
269 270 % self.startrev)
270 271
271 272 self.head = self.latest(self.module, latest)
272 273 if not self.head:
273 274 raise util.Abort(_('no revision found in module %s')
274 275 % self.module)
275 276 self.last_changed = self.revnum(self.head)
276 277
277 278 self._changescache = None
278 279
279 280 if os.path.exists(os.path.join(url, '.svn/entries')):
280 281 self.wc = url
281 282 else:
282 283 self.wc = None
283 284 self.convertfp = None
284 285
285 286 def setrevmap(self, revmap):
286 287 lastrevs = {}
287 288 for revid in revmap.iterkeys():
288 289 uuid, module, revnum = self.revsplit(revid)
289 290 lastrevnum = lastrevs.setdefault(module, revnum)
290 291 if revnum > lastrevnum:
291 292 lastrevs[module] = revnum
292 293 self.lastrevs = lastrevs
293 294
294 295 def exists(self, path, optrev):
295 296 try:
296 297 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
297 298 optrev, False, self.ctx)
298 299 return True
299 300 except SubversionException:
300 301 return False
301 302
302 303 def getheads(self):
303 304
304 305 def isdir(path, revnum):
305 306 kind = self._checkpath(path, revnum)
306 307 return kind == svn.core.svn_node_dir
307 308
308 309 def getcfgpath(name, rev):
309 310 cfgpath = self.ui.config('convert', 'svn.' + name)
310 311 if cfgpath is not None and cfgpath.strip() == '':
311 312 return None
312 313 path = (cfgpath or name).strip('/')
313 314 if not self.exists(path, rev):
314 315 if self.module.endswith(path) and name == 'trunk':
315 316 # we are converting from inside this directory
316 317 return None
317 318 if cfgpath:
318 319 raise util.Abort(_('expected %s to be at %r, but not found')
319 320 % (name, path))
320 321 return None
321 322 self.ui.note(_('found %s at %r\n') % (name, path))
322 323 return path
323 324
324 325 rev = optrev(self.last_changed)
325 326 oldmodule = ''
326 327 trunk = getcfgpath('trunk', rev)
327 328 self.tags = getcfgpath('tags', rev)
328 329 branches = getcfgpath('branches', rev)
329 330
330 331 # If the project has a trunk or branches, we will extract heads
331 332 # from them. We keep the project root otherwise.
332 333 if trunk:
333 334 oldmodule = self.module or ''
334 335 self.module += '/' + trunk
335 336 self.head = self.latest(self.module, self.last_changed)
336 337 if not self.head:
337 338 raise util.Abort(_('no revision found in module %s')
338 339 % self.module)
339 340
340 341 # First head in the list is the module's head
341 342 self.heads = [self.head]
342 343 if self.tags is not None:
343 344 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
344 345
345 346 # Check if branches bring a few more heads to the list
346 347 if branches:
347 348 rpath = self.url.strip('/')
348 349 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
349 350 rev, False, self.ctx)
350 351 for branch in branchnames.keys():
351 352 module = '%s/%s/%s' % (oldmodule, branches, branch)
352 353 if not isdir(module, self.last_changed):
353 354 continue
354 355 brevid = self.latest(module, self.last_changed)
355 356 if not brevid:
356 357 self.ui.note(_('ignoring empty branch %s\n') % branch)
357 358 continue
358 359 self.ui.note(_('found branch %s at %d\n') %
359 360 (branch, self.revnum(brevid)))
360 361 self.heads.append(brevid)
361 362
362 363 if self.startrev and self.heads:
363 364 if len(self.heads) > 1:
364 365 raise util.Abort(_('svn: start revision is not supported '
365 366 'with more than one branch'))
366 367 revnum = self.revnum(self.heads[0])
367 368 if revnum < self.startrev:
368 369 raise util.Abort(
369 370 _('svn: no revision found after start revision %d')
370 371 % self.startrev)
371 372
372 373 return self.heads
373 374
374 375 def getchanges(self, rev):
375 376 if self._changescache and self._changescache[0] == rev:
376 377 return self._changescache[1]
377 378 self._changescache = None
378 379 (paths, parents) = self.paths[rev]
379 380 if parents:
380 381 files, self.removed, copies = self.expandpaths(rev, paths, parents)
381 382 else:
382 383 # Perform a full checkout on roots
383 384 uuid, module, revnum = self.revsplit(rev)
384 385 entries = svn.client.ls(self.baseurl + urllib.quote(module),
385 386 optrev(revnum), True, self.ctx)
386 387 files = [n for n, e in entries.iteritems()
387 388 if e.kind == svn.core.svn_node_file]
388 389 copies = {}
389 390 self.removed = set()
390 391
391 392 files.sort()
392 393 files = zip(files, [rev] * len(files))
393 394
394 395 # caller caches the result, so free it here to release memory
395 396 del self.paths[rev]
396 397 return (files, copies)
397 398
398 399 def getchangedfiles(self, rev, i):
399 400 changes = self.getchanges(rev)
400 401 self._changescache = (rev, changes)
401 402 return [f[0] for f in changes[0]]
402 403
403 404 def getcommit(self, rev):
404 405 if rev not in self.commits:
405 406 uuid, module, revnum = self.revsplit(rev)
406 407 self.module = module
407 408 self.reparent(module)
408 409 # We assume that:
409 410 # - requests for revisions after "stop" come from the
410 411 # revision graph backward traversal. Cache all of them
411 412 # down to stop, they will be used eventually.
412 413 # - requests for revisions before "stop" come to get
413 414 # isolated branches parents. Just fetch what is needed.
414 415 stop = self.lastrevs.get(module, 0)
415 416 if revnum < stop:
416 417 stop = revnum + 1
417 418 self._fetch_revisions(revnum, stop)
418 419 commit = self.commits[rev]
419 420 # caller caches the result, so free it here to release memory
420 421 del self.commits[rev]
421 422 return commit
422 423
423 424 def gettags(self):
424 425 tags = {}
425 426 if self.tags is None:
426 427 return tags
427 428
428 429 # svn tags are just a convention, project branches left in a
429 430 # 'tags' directory. There is no other relationship than
430 431 # ancestry, which is expensive to discover and makes them hard
431 432 # to update incrementally. Worse, past revisions may be
432 433 # referenced by tags far away in the future, requiring a deep
433 434 # history traversal on every calculation. Current code
434 435 # performs a single backward traversal, tracking moves within
435 436 # the tags directory (tag renaming) and recording a new tag
436 437 # everytime a project is copied from outside the tags
437 438 # directory. It also lists deleted tags, this behaviour may
438 439 # change in the future.
439 440 pendings = []
440 441 tagspath = self.tags
441 442 start = svn.ra.get_latest_revnum(self.ra)
442 443 stream = self._getlog([self.tags], start, self.startrev)
443 444 try:
444 445 for entry in stream:
445 446 origpaths, revnum, author, date, message = entry
446 447 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
447 448 in origpaths.iteritems() if e.copyfrom_path]
448 449 # Apply moves/copies from more specific to general
449 450 copies.sort(reverse=True)
450 451
451 452 srctagspath = tagspath
452 453 if copies and copies[-1][2] == tagspath:
453 454 # Track tags directory moves
454 455 srctagspath = copies.pop()[0]
455 456
456 457 for source, sourcerev, dest in copies:
457 458 if not dest.startswith(tagspath + '/'):
458 459 continue
459 460 for tag in pendings:
460 461 if tag[0].startswith(dest):
461 462 tagpath = source + tag[0][len(dest):]
462 463 tag[:2] = [tagpath, sourcerev]
463 464 break
464 465 else:
465 466 pendings.append([source, sourcerev, dest])
466 467
467 468 # Filter out tags with children coming from different
468 469 # parts of the repository like:
469 470 # /tags/tag.1 (from /trunk:10)
470 471 # /tags/tag.1/foo (from /branches/foo:12)
471 472 # Here/tags/tag.1 discarded as well as its children.
472 473 # It happens with tools like cvs2svn. Such tags cannot
473 474 # be represented in mercurial.
474 475 addeds = dict((p, e.copyfrom_path) for p, e
475 476 in origpaths.iteritems()
476 477 if e.action == 'A' and e.copyfrom_path)
477 478 badroots = set()
478 479 for destroot in addeds:
479 480 for source, sourcerev, dest in pendings:
480 481 if (not dest.startswith(destroot + '/')
481 482 or source.startswith(addeds[destroot] + '/')):
482 483 continue
483 484 badroots.add(destroot)
484 485 break
485 486
486 487 for badroot in badroots:
487 488 pendings = [p for p in pendings if p[2] != badroot
488 489 and not p[2].startswith(badroot + '/')]
489 490
490 491 # Tell tag renamings from tag creations
491 492 remainings = []
492 493 for source, sourcerev, dest in pendings:
493 494 tagname = dest.split('/')[-1]
494 495 if source.startswith(srctagspath):
495 496 remainings.append([source, sourcerev, tagname])
496 497 continue
497 498 if tagname in tags:
498 499 # Keep the latest tag value
499 500 continue
500 501 # From revision may be fake, get one with changes
501 502 try:
502 503 tagid = self.latest(source, sourcerev)
503 504 if tagid and tagname not in tags:
504 505 tags[tagname] = tagid
505 506 except SvnPathNotFound:
506 507 # It happens when we are following directories
507 508 # we assumed were copied with their parents
508 509 # but were really created in the tag
509 510 # directory.
510 511 pass
511 512 pendings = remainings
512 513 tagspath = srctagspath
513 514 finally:
514 515 stream.close()
515 516 return tags
516 517
517 518 def converted(self, rev, destrev):
518 519 if not self.wc:
519 520 return
520 521 if self.convertfp is None:
521 522 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
522 523 'a')
523 524 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
524 525 self.convertfp.flush()
525 526
526 527 def revid(self, revnum, module=None):
527 528 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
528 529
529 530 def revnum(self, rev):
530 531 return int(rev.split('@')[-1])
531 532
532 533 def revsplit(self, rev):
533 534 url, revnum = rev.rsplit('@', 1)
534 535 revnum = int(revnum)
535 536 parts = url.split('/', 1)
536 537 uuid = parts.pop(0)[4:]
537 538 mod = ''
538 539 if parts:
539 540 mod = '/' + parts[0]
540 541 return uuid, mod, revnum
541 542
542 543 def latest(self, path, stop=0):
543 544 """Find the latest revid affecting path, up to stop. It may return
544 545 a revision in a different module, since a branch may be moved without
545 546 a change being reported. Return None if computed module does not
546 547 belong to rootmodule subtree.
547 548 """
548 549 if not path.startswith(self.rootmodule):
549 550 # Requests on foreign branches may be forbidden at server level
550 551 self.ui.debug('ignoring foreign branch %r\n' % path)
551 552 return None
552 553
553 554 if not stop:
554 555 stop = svn.ra.get_latest_revnum(self.ra)
555 556 try:
556 557 prevmodule = self.reparent('')
557 558 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
558 559 self.reparent(prevmodule)
559 560 except SubversionException:
560 561 dirent = None
561 562 if not dirent:
562 563 raise SvnPathNotFound(_('%s not found up to revision %d')
563 564 % (path, stop))
564 565
565 566 # stat() gives us the previous revision on this line of
566 567 # development, but it might be in *another module*. Fetch the
567 568 # log and detect renames down to the latest revision.
568 569 stream = self._getlog([path], stop, dirent.created_rev)
569 570 try:
570 571 for entry in stream:
571 572 paths, revnum, author, date, message = entry
572 573 if revnum <= dirent.created_rev:
573 574 break
574 575
575 576 for p in paths:
576 577 if not path.startswith(p) or not paths[p].copyfrom_path:
577 578 continue
578 579 newpath = paths[p].copyfrom_path + path[len(p):]
579 580 self.ui.debug("branch renamed from %s to %s at %d\n" %
580 581 (path, newpath, revnum))
581 582 path = newpath
582 583 break
583 584 finally:
584 585 stream.close()
585 586
586 587 if not path.startswith(self.rootmodule):
587 588 self.ui.debug('ignoring foreign branch %r\n' % path)
588 589 return None
589 590 return self.revid(dirent.created_rev, path)
590 591
591 592 def reparent(self, module):
592 593 """Reparent the svn transport and return the previous parent."""
593 594 if self.prevmodule == module:
594 595 return module
595 596 svnurl = self.baseurl + urllib.quote(module)
596 597 prevmodule = self.prevmodule
597 598 if prevmodule is None:
598 599 prevmodule = ''
599 600 self.ui.debug("reparent to %s\n" % svnurl)
600 601 svn.ra.reparent(self.ra, svnurl)
601 602 self.prevmodule = module
602 603 return prevmodule
603 604
604 605 def expandpaths(self, rev, paths, parents):
605 606 changed, removed = set(), set()
606 607 copies = {}
607 608
608 609 new_module, revnum = self.revsplit(rev)[1:]
609 610 if new_module != self.module:
610 611 self.module = new_module
611 612 self.reparent(self.module)
612 613
613 614 for i, (path, ent) in enumerate(paths):
614 615 self.ui.progress(_('scanning paths'), i, item=path,
615 616 total=len(paths))
616 617 entrypath = self.getrelpath(path)
617 618
618 619 kind = self._checkpath(entrypath, revnum)
619 620 if kind == svn.core.svn_node_file:
620 621 changed.add(self.recode(entrypath))
621 622 if not ent.copyfrom_path or not parents:
622 623 continue
623 624 # Copy sources not in parent revisions cannot be
624 625 # represented, ignore their origin for now
625 626 pmodule, prevnum = self.revsplit(parents[0])[1:]
626 627 if ent.copyfrom_rev < prevnum:
627 628 continue
628 629 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
629 630 if not copyfrom_path:
630 631 continue
631 632 self.ui.debug("copied to %s from %s@%s\n" %
632 633 (entrypath, copyfrom_path, ent.copyfrom_rev))
633 634 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
634 635 elif kind == 0: # gone, but had better be a deleted *file*
635 636 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
636 637 pmodule, prevnum = self.revsplit(parents[0])[1:]
637 638 parentpath = pmodule + "/" + entrypath
638 639 fromkind = self._checkpath(entrypath, prevnum, pmodule)
639 640
640 641 if fromkind == svn.core.svn_node_file:
641 642 removed.add(self.recode(entrypath))
642 643 elif fromkind == svn.core.svn_node_dir:
643 644 oroot = parentpath.strip('/')
644 645 nroot = path.strip('/')
645 646 children = self._iterfiles(oroot, prevnum)
646 647 for childpath in children:
647 648 childpath = childpath.replace(oroot, nroot)
648 649 childpath = self.getrelpath("/" + childpath, pmodule)
649 650 if childpath:
650 651 removed.add(self.recode(childpath))
651 652 else:
652 653 self.ui.debug('unknown path in revision %d: %s\n' % \
653 654 (revnum, path))
654 655 elif kind == svn.core.svn_node_dir:
655 656 if ent.action == 'M':
656 657 # If the directory just had a prop change,
657 658 # then we shouldn't need to look for its children.
658 659 continue
659 660 if ent.action == 'R' and parents:
660 661 # If a directory is replacing a file, mark the previous
661 662 # file as deleted
662 663 pmodule, prevnum = self.revsplit(parents[0])[1:]
663 664 pkind = self._checkpath(entrypath, prevnum, pmodule)
664 665 if pkind == svn.core.svn_node_file:
665 666 removed.add(self.recode(entrypath))
666 667 elif pkind == svn.core.svn_node_dir:
667 668 # We do not know what files were kept or removed,
668 669 # mark them all as changed.
669 670 for childpath in self._iterfiles(pmodule, prevnum):
670 671 childpath = self.getrelpath("/" + childpath)
671 672 if childpath:
672 673 changed.add(self.recode(childpath))
673 674
674 675 for childpath in self._iterfiles(path, revnum):
675 676 childpath = self.getrelpath("/" + childpath)
676 677 if childpath:
677 678 changed.add(self.recode(childpath))
678 679
679 680 # Handle directory copies
680 681 if not ent.copyfrom_path or not parents:
681 682 continue
682 683 # Copy sources not in parent revisions cannot be
683 684 # represented, ignore their origin for now
684 685 pmodule, prevnum = self.revsplit(parents[0])[1:]
685 686 if ent.copyfrom_rev < prevnum:
686 687 continue
687 688 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
688 689 if not copyfrompath:
689 690 continue
690 691 self.ui.debug("mark %s came from %s:%d\n"
691 692 % (path, copyfrompath, ent.copyfrom_rev))
692 693 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
693 694 for childpath in children:
694 695 childpath = self.getrelpath("/" + childpath, pmodule)
695 696 if not childpath:
696 697 continue
697 698 copytopath = path + childpath[len(copyfrompath):]
698 699 copytopath = self.getrelpath(copytopath)
699 700 copies[self.recode(copytopath)] = self.recode(childpath)
700 701
701 702 self.ui.progress(_('scanning paths'), None)
702 703 changed.update(removed)
703 704 return (list(changed), removed, copies)
704 705
705 706 def _fetch_revisions(self, from_revnum, to_revnum):
706 707 if from_revnum < to_revnum:
707 708 from_revnum, to_revnum = to_revnum, from_revnum
708 709
709 710 self.child_cset = None
710 711
711 712 def parselogentry(orig_paths, revnum, author, date, message):
712 713 """Return the parsed commit object or None, and True if
713 714 the revision is a branch root.
714 715 """
715 716 self.ui.debug("parsing revision %d (%d changes)\n" %
716 717 (revnum, len(orig_paths)))
717 718
718 719 branched = False
719 720 rev = self.revid(revnum)
720 721 # branch log might return entries for a parent we already have
721 722
722 723 if rev in self.commits or revnum < to_revnum:
723 724 return None, branched
724 725
725 726 parents = []
726 727 # check whether this revision is the start of a branch or part
727 728 # of a branch renaming
728 729 orig_paths = sorted(orig_paths.iteritems())
729 730 root_paths = [(p, e) for p, e in orig_paths
730 731 if self.module.startswith(p)]
731 732 if root_paths:
732 733 path, ent = root_paths[-1]
733 734 if ent.copyfrom_path:
734 735 branched = True
735 736 newpath = ent.copyfrom_path + self.module[len(path):]
736 737 # ent.copyfrom_rev may not be the actual last revision
737 738 previd = self.latest(newpath, ent.copyfrom_rev)
738 739 if previd is not None:
739 740 prevmodule, prevnum = self.revsplit(previd)[1:]
740 741 if prevnum >= self.startrev:
741 742 parents = [previd]
742 743 self.ui.note(
743 744 _('found parent of branch %s at %d: %s\n') %
744 745 (self.module, prevnum, prevmodule))
745 746 else:
746 747 self.ui.debug("no copyfrom path, don't know what to do.\n")
747 748
748 749 paths = []
749 750 # filter out unrelated paths
750 751 for path, ent in orig_paths:
751 752 if self.getrelpath(path) is None:
752 753 continue
753 754 paths.append((path, ent))
754 755
755 756 # Example SVN datetime. Includes microseconds.
756 757 # ISO-8601 conformant
757 758 # '2007-01-04T17:35:00.902377Z'
758 759 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
759 760
760 761 log = message and self.recode(message) or ''
761 762 author = author and self.recode(author) or ''
762 763 try:
763 764 branch = self.module.split("/")[-1]
764 trunkname = self.ui.config('convert', 'svn.trunk', 'trunk')
765 if branch == trunkname.strip('/'):
766 branch = ''
765 if branch == self.trunkname:
766 branch = None
767 767 except IndexError:
768 768 branch = None
769 769
770 770 cset = commit(author=author,
771 771 date=util.datestr(date),
772 772 desc=log,
773 773 parents=parents,
774 774 branch=branch,
775 775 rev=rev)
776 776
777 777 self.commits[rev] = cset
778 778 # The parents list is *shared* among self.paths and the
779 779 # commit object. Both will be updated below.
780 780 self.paths[rev] = (paths, cset.parents)
781 781 if self.child_cset and not self.child_cset.parents:
782 782 self.child_cset.parents[:] = [rev]
783 783 self.child_cset = cset
784 784 return cset, branched
785 785
786 786 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
787 787 (self.module, from_revnum, to_revnum))
788 788
789 789 try:
790 790 firstcset = None
791 791 lastonbranch = False
792 792 stream = self._getlog([self.module], from_revnum, to_revnum)
793 793 try:
794 794 for entry in stream:
795 795 paths, revnum, author, date, message = entry
796 796 if revnum < self.startrev:
797 797 lastonbranch = True
798 798 break
799 799 if not paths:
800 800 self.ui.debug('revision %d has no entries\n' % revnum)
801 801 # If we ever leave the loop on an empty
802 802 # revision, do not try to get a parent branch
803 803 lastonbranch = lastonbranch or revnum == 0
804 804 continue
805 805 cset, lastonbranch = parselogentry(paths, revnum, author,
806 806 date, message)
807 807 if cset:
808 808 firstcset = cset
809 809 if lastonbranch:
810 810 break
811 811 finally:
812 812 stream.close()
813 813
814 814 if not lastonbranch and firstcset and not firstcset.parents:
815 815 # The first revision of the sequence (the last fetched one)
816 816 # has invalid parents if not a branch root. Find the parent
817 817 # revision now, if any.
818 818 try:
819 819 firstrevnum = self.revnum(firstcset.rev)
820 820 if firstrevnum > 1:
821 821 latest = self.latest(self.module, firstrevnum - 1)
822 822 if latest:
823 823 firstcset.parents.append(latest)
824 824 except SvnPathNotFound:
825 825 pass
826 826 except SubversionException, (inst, num):
827 827 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
828 828 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
829 829 raise
830 830
831 831 def getfile(self, file, rev):
832 832 # TODO: ra.get_file transmits the whole file instead of diffs.
833 833 if file in self.removed:
834 834 raise IOError()
835 835 mode = ''
836 836 try:
837 837 new_module, revnum = self.revsplit(rev)[1:]
838 838 if self.module != new_module:
839 839 self.module = new_module
840 840 self.reparent(self.module)
841 841 io = StringIO()
842 842 info = svn.ra.get_file(self.ra, file, revnum, io)
843 843 data = io.getvalue()
844 844 # ra.get_files() seems to keep a reference on the input buffer
845 845 # preventing collection. Release it explicitely.
846 846 io.close()
847 847 if isinstance(info, list):
848 848 info = info[-1]
849 849 mode = ("svn:executable" in info) and 'x' or ''
850 850 mode = ("svn:special" in info) and 'l' or mode
851 851 except SubversionException, e:
852 852 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
853 853 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
854 854 if e.apr_err in notfound: # File not found
855 855 raise IOError()
856 856 raise
857 857 if mode == 'l':
858 858 link_prefix = "link "
859 859 if data.startswith(link_prefix):
860 860 data = data[len(link_prefix):]
861 861 return data, mode
862 862
863 863 def _iterfiles(self, path, revnum):
864 864 """Enumerate all files in path at revnum, recursively."""
865 865 path = path.strip('/')
866 866 pool = Pool()
867 867 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
868 868 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
869 869 return ((path + '/' + p) for p, e in entries.iteritems()
870 870 if e.kind == svn.core.svn_node_file)
871 871
872 872 def getrelpath(self, path, module=None):
873 873 if module is None:
874 874 module = self.module
875 875 # Given the repository url of this wc, say
876 876 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
877 877 # extract the "entry" portion (a relative path) from what
878 878 # svn log --xml says, ie
879 879 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
880 880 # that is to say "tests/PloneTestCase.py"
881 881 if path.startswith(module):
882 882 relative = path.rstrip('/')[len(module):]
883 883 if relative.startswith('/'):
884 884 return relative[1:]
885 885 elif relative == '':
886 886 return relative
887 887
888 888 # The path is outside our tracked tree...
889 889 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
890 890 return None
891 891
892 892 def _checkpath(self, path, revnum, module=None):
893 893 if module is not None:
894 894 prevmodule = self.reparent('')
895 895 path = module + '/' + path
896 896 try:
897 897 # ra.check_path does not like leading slashes very much, it leads
898 898 # to PROPFIND subversion errors
899 899 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
900 900 finally:
901 901 if module is not None:
902 902 self.reparent(prevmodule)
903 903
904 904 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
905 905 strict_node_history=False):
906 906 # Normalize path names, svn >= 1.5 only wants paths relative to
907 907 # supplied URL
908 908 relpaths = []
909 909 for p in paths:
910 910 if not p.startswith('/'):
911 911 p = self.module + '/' + p
912 912 relpaths.append(p.strip('/'))
913 913 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
914 914 strict_node_history]
915 915 arg = encodeargs(args)
916 916 hgexe = util.hgexecutable()
917 917 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
918 918 stdin, stdout = util.popen2(util.quotecommand(cmd))
919 919 stdin.write(arg)
920 920 try:
921 921 stdin.close()
922 922 except IOError:
923 923 raise util.Abort(_('Mercurial failed to run itself, check'
924 924 ' hg executable is in PATH'))
925 925 return logstream(stdout)
926 926
927 927 pre_revprop_change = '''#!/bin/sh
928 928
929 929 REPOS="$1"
930 930 REV="$2"
931 931 USER="$3"
932 932 PROPNAME="$4"
933 933 ACTION="$5"
934 934
935 935 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
936 936 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
937 937 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
938 938
939 939 echo "Changing prohibited revision property" >&2
940 940 exit 1
941 941 '''
942 942
943 943 class svn_sink(converter_sink, commandline):
944 944 commit_re = re.compile(r'Committed revision (\d+).', re.M)
945 945
946 946 def prerun(self):
947 947 if self.wc:
948 948 os.chdir(self.wc)
949 949
950 950 def postrun(self):
951 951 if self.wc:
952 952 os.chdir(self.cwd)
953 953
954 954 def join(self, name):
955 955 return os.path.join(self.wc, '.svn', name)
956 956
957 957 def revmapfile(self):
958 958 return self.join('hg-shamap')
959 959
960 960 def authorfile(self):
961 961 return self.join('hg-authormap')
962 962
963 963 def __init__(self, ui, path):
964 964
965 965 if svn is None:
966 966 raise MissingTool(_('Could not load Subversion python bindings'))
967 967 converter_sink.__init__(self, ui, path)
968 968 commandline.__init__(self, ui, 'svn')
969 969 self.delete = []
970 970 self.setexec = []
971 971 self.delexec = []
972 972 self.copies = []
973 973 self.wc = None
974 974 self.cwd = os.getcwd()
975 975
976 976 path = os.path.realpath(path)
977 977
978 978 created = False
979 979 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
980 980 self.wc = path
981 981 self.run0('update')
982 982 else:
983 983 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
984 984
985 985 if os.path.isdir(os.path.dirname(path)):
986 986 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
987 987 ui.status(_('initializing svn repository %r\n') %
988 988 os.path.basename(path))
989 989 commandline(ui, 'svnadmin').run0('create', path)
990 990 created = path
991 991 path = util.normpath(path)
992 992 if not path.startswith('/'):
993 993 path = '/' + path
994 994 path = 'file://' + path
995 995
996 996 ui.status(_('initializing svn working copy %r\n')
997 997 % os.path.basename(wcpath))
998 998 self.run0('checkout', path, wcpath)
999 999
1000 1000 self.wc = wcpath
1001 1001 self.opener = util.opener(self.wc)
1002 1002 self.wopener = util.opener(self.wc)
1003 1003 self.childmap = mapfile(ui, self.join('hg-childmap'))
1004 1004 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1005 1005
1006 1006 if created:
1007 1007 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1008 1008 fp = open(hook, 'w')
1009 1009 fp.write(pre_revprop_change)
1010 1010 fp.close()
1011 1011 util.set_flags(hook, False, True)
1012 1012
1013 1013 xport = transport.SvnRaTransport(url=geturl(path))
1014 1014 self.uuid = svn.ra.get_uuid(xport.ra)
1015 1015
1016 1016 def wjoin(self, *names):
1017 1017 return os.path.join(self.wc, *names)
1018 1018
1019 1019 def putfile(self, filename, flags, data):
1020 1020 if 'l' in flags:
1021 1021 self.wopener.symlink(data, filename)
1022 1022 else:
1023 1023 try:
1024 1024 if os.path.islink(self.wjoin(filename)):
1025 1025 os.unlink(filename)
1026 1026 except OSError:
1027 1027 pass
1028 1028 self.wopener(filename, 'w').write(data)
1029 1029
1030 1030 if self.is_exec:
1031 1031 was_exec = self.is_exec(self.wjoin(filename))
1032 1032 else:
1033 1033 # On filesystems not supporting execute-bit, there is no way
1034 1034 # to know if it is set but asking subversion. Setting it
1035 1035 # systematically is just as expensive and much simpler.
1036 1036 was_exec = 'x' not in flags
1037 1037
1038 1038 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1039 1039 if was_exec:
1040 1040 if 'x' not in flags:
1041 1041 self.delexec.append(filename)
1042 1042 else:
1043 1043 if 'x' in flags:
1044 1044 self.setexec.append(filename)
1045 1045
1046 1046 def _copyfile(self, source, dest):
1047 1047 # SVN's copy command pukes if the destination file exists, but
1048 1048 # our copyfile method expects to record a copy that has
1049 1049 # already occurred. Cross the semantic gap.
1050 1050 wdest = self.wjoin(dest)
1051 1051 exists = os.path.lexists(wdest)
1052 1052 if exists:
1053 1053 fd, tempname = tempfile.mkstemp(
1054 1054 prefix='hg-copy-', dir=os.path.dirname(wdest))
1055 1055 os.close(fd)
1056 1056 os.unlink(tempname)
1057 1057 os.rename(wdest, tempname)
1058 1058 try:
1059 1059 self.run0('copy', source, dest)
1060 1060 finally:
1061 1061 if exists:
1062 1062 try:
1063 1063 os.unlink(wdest)
1064 1064 except OSError:
1065 1065 pass
1066 1066 os.rename(tempname, wdest)
1067 1067
1068 1068 def dirs_of(self, files):
1069 1069 dirs = set()
1070 1070 for f in files:
1071 1071 if os.path.isdir(self.wjoin(f)):
1072 1072 dirs.add(f)
1073 1073 for i in strutil.rfindall(f, '/'):
1074 1074 dirs.add(f[:i])
1075 1075 return dirs
1076 1076
1077 1077 def add_dirs(self, files):
1078 1078 add_dirs = [d for d in sorted(self.dirs_of(files))
1079 1079 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1080 1080 if add_dirs:
1081 1081 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1082 1082 return add_dirs
1083 1083
1084 1084 def add_files(self, files):
1085 1085 if files:
1086 1086 self.xargs(files, 'add', quiet=True)
1087 1087 return files
1088 1088
1089 1089 def tidy_dirs(self, names):
1090 1090 deleted = []
1091 1091 for d in sorted(self.dirs_of(names), reverse=True):
1092 1092 wd = self.wjoin(d)
1093 1093 if os.listdir(wd) == '.svn':
1094 1094 self.run0('delete', d)
1095 1095 deleted.append(d)
1096 1096 return deleted
1097 1097
1098 1098 def addchild(self, parent, child):
1099 1099 self.childmap[parent] = child
1100 1100
1101 1101 def revid(self, rev):
1102 1102 return u"svn:%s@%s" % (self.uuid, rev)
1103 1103
1104 1104 def putcommit(self, files, copies, parents, commit, source, revmap):
1105 1105 # Apply changes to working copy
1106 1106 for f, v in files:
1107 1107 try:
1108 1108 data, mode = source.getfile(f, v)
1109 1109 except IOError:
1110 1110 self.delete.append(f)
1111 1111 else:
1112 1112 self.putfile(f, mode, data)
1113 1113 if f in copies:
1114 1114 self.copies.append([copies[f], f])
1115 1115 files = [f[0] for f in files]
1116 1116
1117 1117 for parent in parents:
1118 1118 try:
1119 1119 return self.revid(self.childmap[parent])
1120 1120 except KeyError:
1121 1121 pass
1122 1122 entries = set(self.delete)
1123 1123 files = frozenset(files)
1124 1124 entries.update(self.add_dirs(files.difference(entries)))
1125 1125 if self.copies:
1126 1126 for s, d in self.copies:
1127 1127 self._copyfile(s, d)
1128 1128 self.copies = []
1129 1129 if self.delete:
1130 1130 self.xargs(self.delete, 'delete')
1131 1131 self.delete = []
1132 1132 entries.update(self.add_files(files.difference(entries)))
1133 1133 entries.update(self.tidy_dirs(entries))
1134 1134 if self.delexec:
1135 1135 self.xargs(self.delexec, 'propdel', 'svn:executable')
1136 1136 self.delexec = []
1137 1137 if self.setexec:
1138 1138 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1139 1139 self.setexec = []
1140 1140
1141 1141 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1142 1142 fp = os.fdopen(fd, 'w')
1143 1143 fp.write(commit.desc)
1144 1144 fp.close()
1145 1145 try:
1146 1146 output = self.run0('commit',
1147 1147 username=util.shortuser(commit.author),
1148 1148 file=messagefile,
1149 1149 encoding='utf-8')
1150 1150 try:
1151 1151 rev = self.commit_re.search(output).group(1)
1152 1152 except AttributeError:
1153 1153 if not files:
1154 1154 return parents[0]
1155 1155 self.ui.warn(_('unexpected svn output:\n'))
1156 1156 self.ui.warn(output)
1157 1157 raise util.Abort(_('unable to cope with svn output'))
1158 1158 if commit.rev:
1159 1159 self.run('propset', 'hg:convert-rev', commit.rev,
1160 1160 revprop=True, revision=rev)
1161 1161 if commit.branch and commit.branch != 'default':
1162 1162 self.run('propset', 'hg:convert-branch', commit.branch,
1163 1163 revprop=True, revision=rev)
1164 1164 for parent in parents:
1165 1165 self.addchild(parent, rev)
1166 1166 return self.revid(rev)
1167 1167 finally:
1168 1168 os.unlink(messagefile)
1169 1169
1170 1170 def puttags(self, tags):
1171 1171 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1172 1172 return None, None
General Comments 0
You need to be logged in to leave comments. Login now