##// END OF EJS Templates
convert/svn: fix long line
Matt Mackall -
r11167:b5ef95b5 default
parent child Browse files
Show More
@@ -1,1160 +1,1160 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda *args: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo(_("%s does not look like a Subversion repository")
211 211 % url)
212 212
213 213 try:
214 214 SubversionException
215 215 except NameError:
216 216 raise MissingTool(_('Subversion python bindings could not be loaded'))
217 217
218 218 try:
219 219 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
220 220 if version < (1, 4):
221 221 raise MissingTool(_('Subversion python bindings %d.%d found, '
222 222 '1.4 or later required') % version)
223 223 except AttributeError:
224 224 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
225 225 'or later required'))
226 226
227 227 self.lastrevs = {}
228 228
229 229 latest = None
230 230 try:
231 231 # Support file://path@rev syntax. Useful e.g. to convert
232 232 # deleted branches.
233 233 at = url.rfind('@')
234 234 if at >= 0:
235 235 latest = int(url[at + 1:])
236 236 url = url[:at]
237 237 except ValueError:
238 238 pass
239 239 self.url = geturl(url)
240 240 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
241 241 try:
242 242 self.transport = transport.SvnRaTransport(url=self.url)
243 243 self.ra = self.transport.ra
244 244 self.ctx = self.transport.client
245 245 self.baseurl = svn.ra.get_repos_root(self.ra)
246 246 # Module is either empty or a repository path starting with
247 247 # a slash and not ending with a slash.
248 248 self.module = urllib.unquote(self.url[len(self.baseurl):])
249 249 self.prevmodule = None
250 250 self.rootmodule = self.module
251 251 self.commits = {}
252 252 self.paths = {}
253 253 self.uuid = svn.ra.get_uuid(self.ra)
254 254 except SubversionException:
255 255 ui.traceback()
256 256 raise NoRepo(_("%s does not look like a Subversion repository")
257 257 % self.url)
258 258
259 259 if rev:
260 260 try:
261 261 latest = int(rev)
262 262 except ValueError:
263 263 raise util.Abort(_('svn: revision %s is not an integer') % rev)
264 264
265 265 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
266 266 try:
267 267 self.startrev = int(self.startrev)
268 268 if self.startrev < 0:
269 269 self.startrev = 0
270 270 except ValueError:
271 271 raise util.Abort(_('svn: start revision %s is not an integer')
272 272 % self.startrev)
273 273
274 274 self.head = self.latest(self.module, latest)
275 275 if not self.head:
276 276 raise util.Abort(_('no revision found in module %s')
277 277 % self.module)
278 278 self.last_changed = self.revnum(self.head)
279 279
280 280 self._changescache = None
281 281
282 282 if os.path.exists(os.path.join(url, '.svn/entries')):
283 283 self.wc = url
284 284 else:
285 285 self.wc = None
286 286 self.convertfp = None
287 287
288 288 def setrevmap(self, revmap):
289 289 lastrevs = {}
290 290 for revid in revmap.iterkeys():
291 291 uuid, module, revnum = self.revsplit(revid)
292 292 lastrevnum = lastrevs.setdefault(module, revnum)
293 293 if revnum > lastrevnum:
294 294 lastrevs[module] = revnum
295 295 self.lastrevs = lastrevs
296 296
297 297 def exists(self, path, optrev):
298 298 try:
299 299 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
300 300 optrev, False, self.ctx)
301 301 return True
302 302 except SubversionException:
303 303 return False
304 304
305 305 def getheads(self):
306 306
307 307 def isdir(path, revnum):
308 308 kind = self._checkpath(path, revnum)
309 309 return kind == svn.core.svn_node_dir
310 310
311 311 def getcfgpath(name, rev):
312 312 cfgpath = self.ui.config('convert', 'svn.' + name)
313 313 if cfgpath is not None and cfgpath.strip() == '':
314 314 return None
315 315 path = (cfgpath or name).strip('/')
316 316 if not self.exists(path, rev):
317 317 if cfgpath:
318 318 raise util.Abort(_('expected %s to be at %r, but not found')
319 319 % (name, path))
320 320 return None
321 321 self.ui.note(_('found %s at %r\n') % (name, path))
322 322 return path
323 323
324 324 rev = optrev(self.last_changed)
325 325 oldmodule = ''
326 326 trunk = getcfgpath('trunk', rev)
327 327 self.tags = getcfgpath('tags', rev)
328 328 branches = getcfgpath('branches', rev)
329 329
330 330 # If the project has a trunk or branches, we will extract heads
331 331 # from them. We keep the project root otherwise.
332 332 if trunk:
333 333 oldmodule = self.module or ''
334 334 self.module += '/' + trunk
335 335 self.head = self.latest(self.module, self.last_changed)
336 336 if not self.head:
337 337 raise util.Abort(_('no revision found in module %s')
338 338 % self.module)
339 339
340 340 # First head in the list is the module's head
341 341 self.heads = [self.head]
342 342 if self.tags is not None:
343 343 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
344 344
345 345 # Check if branches bring a few more heads to the list
346 346 if branches:
347 347 rpath = self.url.strip('/')
348 348 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
349 349 rev, False, self.ctx)
350 350 for branch in branchnames.keys():
351 351 module = '%s/%s/%s' % (oldmodule, branches, branch)
352 352 if not isdir(module, self.last_changed):
353 353 continue
354 354 brevid = self.latest(module, self.last_changed)
355 355 if not brevid:
356 356 self.ui.note(_('ignoring empty branch %s\n') % branch)
357 357 continue
358 358 self.ui.note(_('found branch %s at %d\n') %
359 359 (branch, self.revnum(brevid)))
360 360 self.heads.append(brevid)
361 361
362 362 if self.startrev and self.heads:
363 363 if len(self.heads) > 1:
364 364 raise util.Abort(_('svn: start revision is not supported '
365 365 'with more than one branch'))
366 366 revnum = self.revnum(self.heads[0])
367 367 if revnum < self.startrev:
368 368 raise util.Abort(
369 369 _('svn: no revision found after start revision %d')
370 370 % self.startrev)
371 371
372 372 return self.heads
373 373
374 374 def getchanges(self, rev):
375 375 if self._changescache and self._changescache[0] == rev:
376 376 return self._changescache[1]
377 377 self._changescache = None
378 378 (paths, parents) = self.paths[rev]
379 379 if parents:
380 380 files, self.removed, copies = self.expandpaths(rev, paths, parents)
381 381 else:
382 382 # Perform a full checkout on roots
383 383 uuid, module, revnum = self.revsplit(rev)
384 384 entries = svn.client.ls(self.baseurl + urllib.quote(module),
385 385 optrev(revnum), True, self.ctx)
386 386 files = [n for n, e in entries.iteritems()
387 387 if e.kind == svn.core.svn_node_file]
388 388 copies = {}
389 389 self.removed = set()
390 390
391 391 files.sort()
392 392 files = zip(files, [rev] * len(files))
393 393
394 394 # caller caches the result, so free it here to release memory
395 395 del self.paths[rev]
396 396 return (files, copies)
397 397
398 398 def getchangedfiles(self, rev, i):
399 399 changes = self.getchanges(rev)
400 400 self._changescache = (rev, changes)
401 401 return [f[0] for f in changes[0]]
402 402
403 403 def getcommit(self, rev):
404 404 if rev not in self.commits:
405 405 uuid, module, revnum = self.revsplit(rev)
406 406 self.module = module
407 407 self.reparent(module)
408 408 # We assume that:
409 409 # - requests for revisions after "stop" come from the
410 410 # revision graph backward traversal. Cache all of them
411 411 # down to stop, they will be used eventually.
412 412 # - requests for revisions before "stop" come to get
413 413 # isolated branches parents. Just fetch what is needed.
414 414 stop = self.lastrevs.get(module, 0)
415 415 if revnum < stop:
416 416 stop = revnum + 1
417 417 self._fetch_revisions(revnum, stop)
418 418 commit = self.commits[rev]
419 419 # caller caches the result, so free it here to release memory
420 420 del self.commits[rev]
421 421 return commit
422 422
423 423 def gettags(self):
424 424 tags = {}
425 425 if self.tags is None:
426 426 return tags
427 427
428 428 # svn tags are just a convention, project branches left in a
429 429 # 'tags' directory. There is no other relationship than
430 430 # ancestry, which is expensive to discover and makes them hard
431 431 # to update incrementally. Worse, past revisions may be
432 432 # referenced by tags far away in the future, requiring a deep
433 433 # history traversal on every calculation. Current code
434 434 # performs a single backward traversal, tracking moves within
435 435 # the tags directory (tag renaming) and recording a new tag
436 436 # everytime a project is copied from outside the tags
437 437 # directory. It also lists deleted tags, this behaviour may
438 438 # change in the future.
439 439 pendings = []
440 440 tagspath = self.tags
441 441 start = svn.ra.get_latest_revnum(self.ra)
442 442 try:
443 443 for entry in self._getlog([self.tags], start, self.startrev):
444 444 origpaths, revnum, author, date, message = entry
445 445 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
446 446 in origpaths.iteritems() if e.copyfrom_path]
447 447 # Apply moves/copies from more specific to general
448 448 copies.sort(reverse=True)
449 449
450 450 srctagspath = tagspath
451 451 if copies and copies[-1][2] == tagspath:
452 452 # Track tags directory moves
453 453 srctagspath = copies.pop()[0]
454 454
455 455 for source, sourcerev, dest in copies:
456 456 if not dest.startswith(tagspath + '/'):
457 457 continue
458 458 for tag in pendings:
459 459 if tag[0].startswith(dest):
460 460 tagpath = source + tag[0][len(dest):]
461 461 tag[:2] = [tagpath, sourcerev]
462 462 break
463 463 else:
464 464 pendings.append([source, sourcerev, dest])
465 465
466 466 # Filter out tags with children coming from different
467 467 # parts of the repository like:
468 468 # /tags/tag.1 (from /trunk:10)
469 469 # /tags/tag.1/foo (from /branches/foo:12)
470 470 # Here/tags/tag.1 discarded as well as its children.
471 471 # It happens with tools like cvs2svn. Such tags cannot
472 472 # be represented in mercurial.
473 473 addeds = dict((p, e.copyfrom_path) for p, e
474 474 in origpaths.iteritems()
475 475 if e.action == 'A' and e.copyfrom_path)
476 476 badroots = set()
477 477 for destroot in addeds:
478 478 for source, sourcerev, dest in pendings:
479 479 if (not dest.startswith(destroot + '/')
480 480 or source.startswith(addeds[destroot] + '/')):
481 481 continue
482 482 badroots.add(destroot)
483 483 break
484 484
485 485 for badroot in badroots:
486 486 pendings = [p for p in pendings if p[2] != badroot
487 487 and not p[2].startswith(badroot + '/')]
488 488
489 489 # Tell tag renamings from tag creations
490 490 remainings = []
491 491 for source, sourcerev, dest in pendings:
492 492 tagname = dest.split('/')[-1]
493 493 if source.startswith(srctagspath):
494 494 remainings.append([source, sourcerev, tagname])
495 495 continue
496 496 if tagname in tags:
497 497 # Keep the latest tag value
498 498 continue
499 499 # From revision may be fake, get one with changes
500 500 try:
501 501 tagid = self.latest(source, sourcerev)
502 502 if tagid and tagname not in tags:
503 503 tags[tagname] = tagid
504 504 except SvnPathNotFound:
505 505 # It happens when we are following directories
506 506 # we assumed were copied with their parents
507 507 # but were really created in the tag
508 508 # directory.
509 509 pass
510 510 pendings = remainings
511 511 tagspath = srctagspath
512 512
513 513 except SubversionException:
514 514 self.ui.note(_('no tags found at revision %d\n') % start)
515 515 return tags
516 516
517 517 def converted(self, rev, destrev):
518 518 if not self.wc:
519 519 return
520 520 if self.convertfp is None:
521 521 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
522 522 'a')
523 523 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
524 524 self.convertfp.flush()
525 525
526 526 def revid(self, revnum, module=None):
527 527 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
528 528
529 529 def revnum(self, rev):
530 530 return int(rev.split('@')[-1])
531 531
532 532 def revsplit(self, rev):
533 533 url, revnum = rev.rsplit('@', 1)
534 534 revnum = int(revnum)
535 535 parts = url.split('/', 1)
536 536 uuid = parts.pop(0)[4:]
537 537 mod = ''
538 538 if parts:
539 539 mod = '/' + parts[0]
540 540 return uuid, mod, revnum
541 541
542 542 def latest(self, path, stop=0):
543 543 """Find the latest revid affecting path, up to stop. It may return
544 544 a revision in a different module, since a branch may be moved without
545 545 a change being reported. Return None if computed module does not
546 546 belong to rootmodule subtree.
547 547 """
548 548 if not path.startswith(self.rootmodule):
549 549 # Requests on foreign branches may be forbidden at server level
550 550 self.ui.debug('ignoring foreign branch %r\n' % path)
551 551 return None
552 552
553 553 if not stop:
554 554 stop = svn.ra.get_latest_revnum(self.ra)
555 555 try:
556 556 prevmodule = self.reparent('')
557 557 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
558 558 self.reparent(prevmodule)
559 559 except SubversionException:
560 560 dirent = None
561 561 if not dirent:
562 562 raise SvnPathNotFound(_('%s not found up to revision %d')
563 563 % (path, stop))
564 564
565 565 # stat() gives us the previous revision on this line of
566 566 # development, but it might be in *another module*. Fetch the
567 567 # log and detect renames down to the latest revision.
568 568 stream = self._getlog([path], stop, dirent.created_rev)
569 569 try:
570 570 for entry in stream:
571 571 paths, revnum, author, date, message = entry
572 572 if revnum <= dirent.created_rev:
573 573 break
574 574
575 575 for p in paths:
576 576 if not path.startswith(p) or not paths[p].copyfrom_path:
577 577 continue
578 578 newpath = paths[p].copyfrom_path + path[len(p):]
579 579 self.ui.debug("branch renamed from %s to %s at %d\n" %
580 580 (path, newpath, revnum))
581 581 path = newpath
582 582 break
583 583 finally:
584 584 stream.close()
585 585
586 586 if not path.startswith(self.rootmodule):
587 587 self.ui.debug('ignoring foreign branch %r\n' % path)
588 588 return None
589 589 return self.revid(dirent.created_rev, path)
590 590
591 591 def reparent(self, module):
592 592 """Reparent the svn transport and return the previous parent."""
593 593 if self.prevmodule == module:
594 594 return module
595 595 svnurl = self.baseurl + urllib.quote(module)
596 596 prevmodule = self.prevmodule
597 597 if prevmodule is None:
598 598 prevmodule = ''
599 599 self.ui.debug("reparent to %s\n" % svnurl)
600 600 svn.ra.reparent(self.ra, svnurl)
601 601 self.prevmodule = module
602 602 return prevmodule
603 603
604 604 def expandpaths(self, rev, paths, parents):
605 605 changed, removed = set(), set()
606 606 copies = {}
607 607
608 608 new_module, revnum = self.revsplit(rev)[1:]
609 609 if new_module != self.module:
610 610 self.module = new_module
611 611 self.reparent(self.module)
612 612
613 613 for i, (path, ent) in enumerate(paths):
614 614 self.ui.progress(_('scanning paths'), i, item=path,
615 615 total=len(paths))
616 616 entrypath = self.getrelpath(path)
617 617
618 618 kind = self._checkpath(entrypath, revnum)
619 619 if kind == svn.core.svn_node_file:
620 620 changed.add(self.recode(entrypath))
621 621 if not ent.copyfrom_path or not parents:
622 622 continue
623 623 # Copy sources not in parent revisions cannot be
624 624 # represented, ignore their origin for now
625 625 pmodule, prevnum = self.revsplit(parents[0])[1:]
626 626 if ent.copyfrom_rev < prevnum:
627 627 continue
628 628 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
629 629 if not copyfrom_path:
630 630 continue
631 631 self.ui.debug("copied to %s from %s@%s\n" %
632 632 (entrypath, copyfrom_path, ent.copyfrom_rev))
633 633 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
634 634 elif kind == 0: # gone, but had better be a deleted *file*
635 635 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
636 636 pmodule, prevnum = self.revsplit(parents[0])[1:]
637 637 parentpath = pmodule + "/" + entrypath
638 638 fromkind = self._checkpath(entrypath, prevnum, pmodule)
639 639
640 640 if fromkind == svn.core.svn_node_file:
641 641 removed.add(self.recode(entrypath))
642 642 elif fromkind == svn.core.svn_node_dir:
643 643 oroot = parentpath.strip('/')
644 644 nroot = path.strip('/')
645 645 children = self._iterfiles(oroot, prevnum)
646 646 for childpath in children:
647 647 childpath = childpath.replace(oroot, nroot)
648 648 childpath = self.getrelpath("/" + childpath, pmodule)
649 649 if childpath:
650 650 removed.add(self.recode(childpath))
651 651 else:
652 652 self.ui.debug('unknown path in revision %d: %s\n' % \
653 653 (revnum, path))
654 654 elif kind == svn.core.svn_node_dir:
655 655 if ent.action == 'M':
656 656 # If the directory just had a prop change,
657 657 # then we shouldn't need to look for its children.
658 658 continue
659 659 elif ent.action == 'R' and parents:
660 660 # If a directory is replacing a file, mark the previous
661 661 # file as deleted
662 662 pmodule, prevnum = self.revsplit(parents[0])[1:]
663 663 pkind = self._checkpath(entrypath, prevnum, pmodule)
664 664 if pkind == svn.core.svn_node_file:
665 665 removed.add(self.recode(entrypath))
666 666
667 667 for childpath in self._iterfiles(path, revnum):
668 668 childpath = self.getrelpath("/" + childpath)
669 669 if childpath:
670 670 changed.add(self.recode(childpath))
671 671
672 672 # Handle directory copies
673 673 if not ent.copyfrom_path or not parents:
674 674 continue
675 675 # Copy sources not in parent revisions cannot be
676 676 # represented, ignore their origin for now
677 677 pmodule, prevnum = self.revsplit(parents[0])[1:]
678 678 if ent.copyfrom_rev < prevnum:
679 679 continue
680 680 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
681 681 if not copyfrompath:
682 682 continue
683 683 self.ui.debug("mark %s came from %s:%d\n"
684 684 % (path, copyfrompath, ent.copyfrom_rev))
685 685 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
686 686 for childpath in children:
687 687 childpath = self.getrelpath("/" + childpath, pmodule)
688 688 if not childpath:
689 689 continue
690 690 copytopath = path + childpath[len(copyfrompath):]
691 691 copytopath = self.getrelpath(copytopath)
692 692 copies[self.recode(copytopath)] = self.recode(childpath)
693 693
694 694 self.ui.progress(_('scanning paths'), None)
695 695 changed.update(removed)
696 696 return (list(changed), removed, copies)
697 697
698 698 def _fetch_revisions(self, from_revnum, to_revnum):
699 699 if from_revnum < to_revnum:
700 700 from_revnum, to_revnum = to_revnum, from_revnum
701 701
702 702 self.child_cset = None
703 703
704 704 def parselogentry(orig_paths, revnum, author, date, message):
705 705 """Return the parsed commit object or None, and True if
706 706 the revision is a branch root.
707 707 """
708 708 self.ui.debug("parsing revision %d (%d changes)\n" %
709 709 (revnum, len(orig_paths)))
710 710
711 711 branched = False
712 712 rev = self.revid(revnum)
713 713 # branch log might return entries for a parent we already have
714 714
715 715 if rev in self.commits or revnum < to_revnum:
716 716 return None, branched
717 717
718 718 parents = []
719 719 # check whether this revision is the start of a branch or part
720 720 # of a branch renaming
721 721 orig_paths = sorted(orig_paths.iteritems())
722 722 root_paths = [(p, e) for p, e in orig_paths
723 723 if self.module.startswith(p)]
724 724 if root_paths:
725 725 path, ent = root_paths[-1]
726 726 if ent.copyfrom_path:
727 727 branched = True
728 728 newpath = ent.copyfrom_path + self.module[len(path):]
729 729 # ent.copyfrom_rev may not be the actual last revision
730 730 previd = self.latest(newpath, ent.copyfrom_rev)
731 731 if previd is not None:
732 732 prevmodule, prevnum = self.revsplit(previd)[1:]
733 733 if prevnum >= self.startrev:
734 734 parents = [previd]
735 735 self.ui.note(
736 736 _('found parent of branch %s at %d: %s\n') %
737 737 (self.module, prevnum, prevmodule))
738 738 else:
739 739 self.ui.debug("no copyfrom path, don't know what to do.\n")
740 740
741 741 paths = []
742 742 # filter out unrelated paths
743 743 for path, ent in orig_paths:
744 744 if self.getrelpath(path) is None:
745 745 continue
746 746 paths.append((path, ent))
747 747
748 748 # Example SVN datetime. Includes microseconds.
749 749 # ISO-8601 conformant
750 750 # '2007-01-04T17:35:00.902377Z'
751 751 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
752 752
753 753 log = message and self.recode(message) or ''
754 754 author = author and self.recode(author) or ''
755 755 try:
756 756 branch = self.module.split("/")[-1]
757 757 if branch == 'trunk':
758 758 branch = ''
759 759 except IndexError:
760 760 branch = None
761 761
762 762 cset = commit(author=author,
763 763 date=util.datestr(date),
764 764 desc=log,
765 765 parents=parents,
766 766 branch=branch,
767 767 rev=rev)
768 768
769 769 self.commits[rev] = cset
770 770 # The parents list is *shared* among self.paths and the
771 771 # commit object. Both will be updated below.
772 772 self.paths[rev] = (paths, cset.parents)
773 773 if self.child_cset and not self.child_cset.parents:
774 774 self.child_cset.parents[:] = [rev]
775 775 self.child_cset = cset
776 776 return cset, branched
777 777
778 778 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
779 779 (self.module, from_revnum, to_revnum))
780 780
781 781 try:
782 782 firstcset = None
783 783 lastonbranch = False
784 784 stream = self._getlog([self.module], from_revnum, to_revnum)
785 785 try:
786 786 for entry in stream:
787 787 paths, revnum, author, date, message = entry
788 788 if revnum < self.startrev:
789 789 lastonbranch = True
790 790 break
791 791 if not paths:
792 792 self.ui.debug('revision %d has no entries\n' % revnum)
793 793 # If we ever leave the loop on an empty
794 794 # revision, do not try to get a parent branch
795 795 lastonbranch = lastonbranch or revnum == 0
796 796 continue
797 797 cset, lastonbranch = parselogentry(paths, revnum, author,
798 798 date, message)
799 799 if cset:
800 800 firstcset = cset
801 801 if lastonbranch:
802 802 break
803 803 finally:
804 804 stream.close()
805 805
806 806 if not lastonbranch and firstcset and not firstcset.parents:
807 807 # The first revision of the sequence (the last fetched one)
808 808 # has invalid parents if not a branch root. Find the parent
809 809 # revision now, if any.
810 810 try:
811 811 firstrevnum = self.revnum(firstcset.rev)
812 812 if firstrevnum > 1:
813 813 latest = self.latest(self.module, firstrevnum - 1)
814 814 if latest:
815 815 firstcset.parents.append(latest)
816 816 except SvnPathNotFound:
817 817 pass
818 818 except SubversionException, (inst, num):
819 819 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
820 820 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
821 821 raise
822 822
823 823 def getfile(self, file, rev):
824 824 # TODO: ra.get_file transmits the whole file instead of diffs.
825 825 if file in self.removed:
826 826 raise IOError()
827 827 mode = ''
828 828 try:
829 829 new_module, revnum = self.revsplit(rev)[1:]
830 830 if self.module != new_module:
831 831 self.module = new_module
832 832 self.reparent(self.module)
833 833 io = StringIO()
834 834 info = svn.ra.get_file(self.ra, file, revnum, io)
835 835 data = io.getvalue()
836 836 # ra.get_files() seems to keep a reference on the input buffer
837 837 # preventing collection. Release it explicitely.
838 838 io.close()
839 839 if isinstance(info, list):
840 840 info = info[-1]
841 841 mode = ("svn:executable" in info) and 'x' or ''
842 842 mode = ("svn:special" in info) and 'l' or mode
843 843 except SubversionException, e:
844 844 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
845 845 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
846 846 if e.apr_err in notfound: # File not found
847 847 raise IOError()
848 848 raise
849 849 if mode == 'l':
850 850 link_prefix = "link "
851 851 if data.startswith(link_prefix):
852 852 data = data[len(link_prefix):]
853 853 return data, mode
854 854
855 855 def _iterfiles(self, path, revnum):
856 856 """Enumerate all files in path at revnum, recursively."""
857 857 path = path.strip('/')
858 858 pool = Pool()
859 859 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
860 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
860 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
861 861 return ((path + '/' + p) for p, e in entries.iteritems()
862 862 if e.kind == svn.core.svn_node_file)
863 863
864 864 def getrelpath(self, path, module=None):
865 865 if module is None:
866 866 module = self.module
867 867 # Given the repository url of this wc, say
868 868 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
869 869 # extract the "entry" portion (a relative path) from what
870 870 # svn log --xml says, ie
871 871 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
872 872 # that is to say "tests/PloneTestCase.py"
873 873 if path.startswith(module):
874 874 relative = path.rstrip('/')[len(module):]
875 875 if relative.startswith('/'):
876 876 return relative[1:]
877 877 elif relative == '':
878 878 return relative
879 879
880 880 # The path is outside our tracked tree...
881 881 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
882 882 return None
883 883
884 884 def _checkpath(self, path, revnum, module=None):
885 885 if module is not None:
886 886 prevmodule = self.reparent('')
887 887 path = module + '/' + path
888 888 try:
889 889 # ra.check_path does not like leading slashes very much, it leads
890 890 # to PROPFIND subversion errors
891 891 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
892 892 finally:
893 893 if module is not None:
894 894 self.reparent(prevmodule)
895 895
896 896 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
897 897 strict_node_history=False):
898 898 # Normalize path names, svn >= 1.5 only wants paths relative to
899 899 # supplied URL
900 900 relpaths = []
901 901 for p in paths:
902 902 if not p.startswith('/'):
903 903 p = self.module + '/' + p
904 904 relpaths.append(p.strip('/'))
905 905 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
906 906 strict_node_history]
907 907 arg = encodeargs(args)
908 908 hgexe = util.hgexecutable()
909 909 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
910 910 stdin, stdout = util.popen2(cmd)
911 911 stdin.write(arg)
912 912 try:
913 913 stdin.close()
914 914 except IOError:
915 915 raise util.Abort(_('Mercurial failed to run itself, check'
916 916 ' hg executable is in PATH'))
917 917 return logstream(stdout)
918 918
919 919 pre_revprop_change = '''#!/bin/sh
920 920
921 921 REPOS="$1"
922 922 REV="$2"
923 923 USER="$3"
924 924 PROPNAME="$4"
925 925 ACTION="$5"
926 926
927 927 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
928 928 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
929 929 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
930 930
931 931 echo "Changing prohibited revision property" >&2
932 932 exit 1
933 933 '''
934 934
935 935 class svn_sink(converter_sink, commandline):
936 936 commit_re = re.compile(r'Committed revision (\d+).', re.M)
937 937
938 938 def prerun(self):
939 939 if self.wc:
940 940 os.chdir(self.wc)
941 941
942 942 def postrun(self):
943 943 if self.wc:
944 944 os.chdir(self.cwd)
945 945
946 946 def join(self, name):
947 947 return os.path.join(self.wc, '.svn', name)
948 948
949 949 def revmapfile(self):
950 950 return self.join('hg-shamap')
951 951
952 952 def authorfile(self):
953 953 return self.join('hg-authormap')
954 954
955 955 def __init__(self, ui, path):
956 956 converter_sink.__init__(self, ui, path)
957 957 commandline.__init__(self, ui, 'svn')
958 958 self.delete = []
959 959 self.setexec = []
960 960 self.delexec = []
961 961 self.copies = []
962 962 self.wc = None
963 963 self.cwd = os.getcwd()
964 964
965 965 path = os.path.realpath(path)
966 966
967 967 created = False
968 968 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
969 969 self.wc = path
970 970 self.run0('update')
971 971 else:
972 972 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
973 973
974 974 if os.path.isdir(os.path.dirname(path)):
975 975 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
976 976 ui.status(_('initializing svn repository %r\n') %
977 977 os.path.basename(path))
978 978 commandline(ui, 'svnadmin').run0('create', path)
979 979 created = path
980 980 path = util.normpath(path)
981 981 if not path.startswith('/'):
982 982 path = '/' + path
983 983 path = 'file://' + path
984 984
985 985 ui.status(_('initializing svn working copy %r\n')
986 986 % os.path.basename(wcpath))
987 987 self.run0('checkout', path, wcpath)
988 988
989 989 self.wc = wcpath
990 990 self.opener = util.opener(self.wc)
991 991 self.wopener = util.opener(self.wc)
992 992 self.childmap = mapfile(ui, self.join('hg-childmap'))
993 993 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
994 994
995 995 if created:
996 996 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
997 997 fp = open(hook, 'w')
998 998 fp.write(pre_revprop_change)
999 999 fp.close()
1000 1000 util.set_flags(hook, False, True)
1001 1001
1002 1002 xport = transport.SvnRaTransport(url=geturl(path))
1003 1003 self.uuid = svn.ra.get_uuid(xport.ra)
1004 1004
1005 1005 def wjoin(self, *names):
1006 1006 return os.path.join(self.wc, *names)
1007 1007
1008 1008 def putfile(self, filename, flags, data):
1009 1009 if 'l' in flags:
1010 1010 self.wopener.symlink(data, filename)
1011 1011 else:
1012 1012 try:
1013 1013 if os.path.islink(self.wjoin(filename)):
1014 1014 os.unlink(filename)
1015 1015 except OSError:
1016 1016 pass
1017 1017 self.wopener(filename, 'w').write(data)
1018 1018
1019 1019 if self.is_exec:
1020 1020 was_exec = self.is_exec(self.wjoin(filename))
1021 1021 else:
1022 1022 # On filesystems not supporting execute-bit, there is no way
1023 1023 # to know if it is set but asking subversion. Setting it
1024 1024 # systematically is just as expensive and much simpler.
1025 1025 was_exec = 'x' not in flags
1026 1026
1027 1027 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1028 1028 if was_exec:
1029 1029 if 'x' not in flags:
1030 1030 self.delexec.append(filename)
1031 1031 else:
1032 1032 if 'x' in flags:
1033 1033 self.setexec.append(filename)
1034 1034
1035 1035 def _copyfile(self, source, dest):
1036 1036 # SVN's copy command pukes if the destination file exists, but
1037 1037 # our copyfile method expects to record a copy that has
1038 1038 # already occurred. Cross the semantic gap.
1039 1039 wdest = self.wjoin(dest)
1040 1040 exists = os.path.exists(wdest)
1041 1041 if exists:
1042 1042 fd, tempname = tempfile.mkstemp(
1043 1043 prefix='hg-copy-', dir=os.path.dirname(wdest))
1044 1044 os.close(fd)
1045 1045 os.unlink(tempname)
1046 1046 os.rename(wdest, tempname)
1047 1047 try:
1048 1048 self.run0('copy', source, dest)
1049 1049 finally:
1050 1050 if exists:
1051 1051 try:
1052 1052 os.unlink(wdest)
1053 1053 except OSError:
1054 1054 pass
1055 1055 os.rename(tempname, wdest)
1056 1056
1057 1057 def dirs_of(self, files):
1058 1058 dirs = set()
1059 1059 for f in files:
1060 1060 if os.path.isdir(self.wjoin(f)):
1061 1061 dirs.add(f)
1062 1062 for i in strutil.rfindall(f, '/'):
1063 1063 dirs.add(f[:i])
1064 1064 return dirs
1065 1065
1066 1066 def add_dirs(self, files):
1067 1067 add_dirs = [d for d in sorted(self.dirs_of(files))
1068 1068 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1069 1069 if add_dirs:
1070 1070 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1071 1071 return add_dirs
1072 1072
1073 1073 def add_files(self, files):
1074 1074 if files:
1075 1075 self.xargs(files, 'add', quiet=True)
1076 1076 return files
1077 1077
1078 1078 def tidy_dirs(self, names):
1079 1079 deleted = []
1080 1080 for d in sorted(self.dirs_of(names), reverse=True):
1081 1081 wd = self.wjoin(d)
1082 1082 if os.listdir(wd) == '.svn':
1083 1083 self.run0('delete', d)
1084 1084 deleted.append(d)
1085 1085 return deleted
1086 1086
1087 1087 def addchild(self, parent, child):
1088 1088 self.childmap[parent] = child
1089 1089
1090 1090 def revid(self, rev):
1091 1091 return u"svn:%s@%s" % (self.uuid, rev)
1092 1092
1093 1093 def putcommit(self, files, copies, parents, commit, source, revmap):
1094 1094 # Apply changes to working copy
1095 1095 for f, v in files:
1096 1096 try:
1097 1097 data, mode = source.getfile(f, v)
1098 1098 except IOError:
1099 1099 self.delete.append(f)
1100 1100 else:
1101 1101 self.putfile(f, mode, data)
1102 1102 if f in copies:
1103 1103 self.copies.append([copies[f], f])
1104 1104 files = [f[0] for f in files]
1105 1105
1106 1106 for parent in parents:
1107 1107 try:
1108 1108 return self.revid(self.childmap[parent])
1109 1109 except KeyError:
1110 1110 pass
1111 1111 entries = set(self.delete)
1112 1112 files = frozenset(files)
1113 1113 entries.update(self.add_dirs(files.difference(entries)))
1114 1114 if self.copies:
1115 1115 for s, d in self.copies:
1116 1116 self._copyfile(s, d)
1117 1117 self.copies = []
1118 1118 if self.delete:
1119 1119 self.xargs(self.delete, 'delete')
1120 1120 self.delete = []
1121 1121 entries.update(self.add_files(files.difference(entries)))
1122 1122 entries.update(self.tidy_dirs(entries))
1123 1123 if self.delexec:
1124 1124 self.xargs(self.delexec, 'propdel', 'svn:executable')
1125 1125 self.delexec = []
1126 1126 if self.setexec:
1127 1127 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1128 1128 self.setexec = []
1129 1129
1130 1130 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1131 1131 fp = os.fdopen(fd, 'w')
1132 1132 fp.write(commit.desc)
1133 1133 fp.close()
1134 1134 try:
1135 1135 output = self.run0('commit',
1136 1136 username=util.shortuser(commit.author),
1137 1137 file=messagefile,
1138 1138 encoding='utf-8')
1139 1139 try:
1140 1140 rev = self.commit_re.search(output).group(1)
1141 1141 except AttributeError:
1142 1142 if not files:
1143 1143 return parents[0]
1144 1144 self.ui.warn(_('unexpected svn output:\n'))
1145 1145 self.ui.warn(output)
1146 1146 raise util.Abort(_('unable to cope with svn output'))
1147 1147 if commit.rev:
1148 1148 self.run('propset', 'hg:convert-rev', commit.rev,
1149 1149 revprop=True, revision=rev)
1150 1150 if commit.branch and commit.branch != 'default':
1151 1151 self.run('propset', 'hg:convert-branch', commit.branch,
1152 1152 revprop=True, revision=rev)
1153 1153 for parent in parents:
1154 1154 self.addchild(parent, rev)
1155 1155 return self.revid(rev)
1156 1156 finally:
1157 1157 os.unlink(messagefile)
1158 1158
1159 1159 def puttags(self, tags):
1160 1160 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now