##// END OF EJS Templates
convert/svn: update svn working copy only when necessary...
Patrick Mezard -
r15605:2ad5b893 default
parent child Browse files
Show More
@@ -1,1186 +1,1187 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, scmutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def revsplit(rev):
45 45 """Parse a revision string and return (uuid, path, revnum)."""
46 46 url, revnum = rev.rsplit('@', 1)
47 47 parts = url.split('/', 1)
48 48 mod = ''
49 49 if len(parts) > 1:
50 50 mod = '/' + parts[1]
51 51 return parts[0][4:], mod, int(revnum)
52 52
53 53 def quote(s):
54 54 # As of svn 1.7, many svn calls expect "canonical" paths. In
55 55 # theory, we should call svn.core.*canonicalize() on all paths
56 56 # before passing them to the API. Instead, we assume the base url
57 57 # is canonical and copy the behaviour of svn URL encoding function
58 58 # so we can extend it safely with new components. The "safe"
59 59 # characters were taken from the "svn_uri__char_validity" table in
60 60 # libsvn_subr/path.c.
61 61 return urllib.quote(s, "!$&'()*+,-./:=@_~")
62 62
63 63 def geturl(path):
64 64 try:
65 65 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
66 66 except SubversionException:
67 67 # svn.client.url_from_path() fails with local repositories
68 68 pass
69 69 if os.path.isdir(path):
70 70 path = os.path.normpath(os.path.abspath(path))
71 71 if os.name == 'nt':
72 72 path = '/' + util.normpath(path)
73 73 # Module URL is later compared with the repository URL returned
74 74 # by svn API, which is UTF-8.
75 75 path = encoding.tolocal(path)
76 76 path = 'file://%s' % quote(path)
77 77 return svn.core.svn_path_canonicalize(path)
78 78
79 79 def optrev(number):
80 80 optrev = svn.core.svn_opt_revision_t()
81 81 optrev.kind = svn.core.svn_opt_revision_number
82 82 optrev.value.number = number
83 83 return optrev
84 84
85 85 class changedpath(object):
86 86 def __init__(self, p):
87 87 self.copyfrom_path = p.copyfrom_path
88 88 self.copyfrom_rev = p.copyfrom_rev
89 89 self.action = p.action
90 90
91 91 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
92 92 strict_node_history=False):
93 93 protocol = -1
94 94 def receiver(orig_paths, revnum, author, date, message, pool):
95 95 if orig_paths is not None:
96 96 for k, v in orig_paths.iteritems():
97 97 orig_paths[k] = changedpath(v)
98 98 pickle.dump((orig_paths, revnum, author, date, message),
99 99 fp, protocol)
100 100
101 101 try:
102 102 # Use an ra of our own so that our parent can consume
103 103 # our results without confusing the server.
104 104 t = transport.SvnRaTransport(url=url)
105 105 svn.ra.get_log(t.ra, paths, start, end, limit,
106 106 discover_changed_paths,
107 107 strict_node_history,
108 108 receiver)
109 109 except SubversionException, (inst, num):
110 110 pickle.dump(num, fp, protocol)
111 111 except IOError:
112 112 # Caller may interrupt the iteration
113 113 pickle.dump(None, fp, protocol)
114 114 else:
115 115 pickle.dump(None, fp, protocol)
116 116 fp.close()
117 117 # With large history, cleanup process goes crazy and suddenly
118 118 # consumes *huge* amount of memory. The output file being closed,
119 119 # there is no need for clean termination.
120 120 os._exit(0)
121 121
122 122 def debugsvnlog(ui, **opts):
123 123 """Fetch SVN log in a subprocess and channel them back to parent to
124 124 avoid memory collection issues.
125 125 """
126 126 util.setbinary(sys.stdin)
127 127 util.setbinary(sys.stdout)
128 128 args = decodeargs(sys.stdin.read())
129 129 get_log_child(sys.stdout, *args)
130 130
131 131 class logstream(object):
132 132 """Interruptible revision log iterator."""
133 133 def __init__(self, stdout):
134 134 self._stdout = stdout
135 135
136 136 def __iter__(self):
137 137 while True:
138 138 try:
139 139 entry = pickle.load(self._stdout)
140 140 except EOFError:
141 141 raise util.Abort(_('Mercurial failed to run itself, check'
142 142 ' hg executable is in PATH'))
143 143 try:
144 144 orig_paths, revnum, author, date, message = entry
145 145 except:
146 146 if entry is None:
147 147 break
148 148 raise SubversionException("child raised exception", entry)
149 149 yield entry
150 150
151 151 def close(self):
152 152 if self._stdout:
153 153 self._stdout.close()
154 154 self._stdout = None
155 155
156 156
157 157 # Check to see if the given path is a local Subversion repo. Verify this by
158 158 # looking for several svn-specific files and directories in the given
159 159 # directory.
160 160 def filecheck(ui, path, proto):
161 161 for x in ('locks', 'hooks', 'format', 'db'):
162 162 if not os.path.exists(os.path.join(path, x)):
163 163 return False
164 164 return True
165 165
166 166 # Check to see if a given path is the root of an svn repo over http. We verify
167 167 # this by requesting a version-controlled URL we know can't exist and looking
168 168 # for the svn-specific "not found" XML.
169 169 def httpcheck(ui, path, proto):
170 170 try:
171 171 opener = urllib2.build_opener()
172 172 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
173 173 data = rsp.read()
174 174 except urllib2.HTTPError, inst:
175 175 if inst.code != 404:
176 176 # Except for 404 we cannot know for sure this is not an svn repo
177 177 ui.warn(_('svn: cannot probe remote repository, assume it could '
178 178 'be a subversion repository. Use --source-type if you '
179 179 'know better.\n'))
180 180 return True
181 181 data = inst.fp.read()
182 182 except:
183 183 # Could be urllib2.URLError if the URL is invalid or anything else.
184 184 return False
185 185 return '<m:human-readable errcode="160013">' in data
186 186
187 187 protomap = {'http': httpcheck,
188 188 'https': httpcheck,
189 189 'file': filecheck,
190 190 }
191 191 def issvnurl(ui, url):
192 192 try:
193 193 proto, path = url.split('://', 1)
194 194 if proto == 'file':
195 195 path = urllib.url2pathname(path)
196 196 except ValueError:
197 197 proto = 'file'
198 198 path = os.path.abspath(url)
199 199 if proto == 'file':
200 200 path = path.replace(os.sep, '/')
201 201 check = protomap.get(proto, lambda *args: False)
202 202 while '/' in path:
203 203 if check(ui, path, proto):
204 204 return True
205 205 path = path.rsplit('/', 1)[0]
206 206 return False
207 207
208 208 # SVN conversion code stolen from bzr-svn and tailor
209 209 #
210 210 # Subversion looks like a versioned filesystem, branches structures
211 211 # are defined by conventions and not enforced by the tool. First,
212 212 # we define the potential branches (modules) as "trunk" and "branches"
213 213 # children directories. Revisions are then identified by their
214 214 # module and revision number (and a repository identifier).
215 215 #
216 216 # The revision graph is really a tree (or a forest). By default, a
217 217 # revision parent is the previous revision in the same module. If the
218 218 # module directory is copied/moved from another module then the
219 219 # revision is the module root and its parent the source revision in
220 220 # the parent module. A revision has at most one parent.
221 221 #
222 222 class svn_source(converter_source):
223 223 def __init__(self, ui, url, rev=None):
224 224 super(svn_source, self).__init__(ui, url, rev=rev)
225 225
226 226 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
227 227 (os.path.exists(url) and
228 228 os.path.exists(os.path.join(url, '.svn'))) or
229 229 issvnurl(ui, url)):
230 230 raise NoRepo(_("%s does not look like a Subversion repository")
231 231 % url)
232 232 if svn is None:
233 233 raise MissingTool(_('Could not load Subversion python bindings'))
234 234
235 235 try:
236 236 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
237 237 if version < (1, 4):
238 238 raise MissingTool(_('Subversion python bindings %d.%d found, '
239 239 '1.4 or later required') % version)
240 240 except AttributeError:
241 241 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
242 242 'or later required'))
243 243
244 244 self.lastrevs = {}
245 245
246 246 latest = None
247 247 try:
248 248 # Support file://path@rev syntax. Useful e.g. to convert
249 249 # deleted branches.
250 250 at = url.rfind('@')
251 251 if at >= 0:
252 252 latest = int(url[at + 1:])
253 253 url = url[:at]
254 254 except ValueError:
255 255 pass
256 256 self.url = geturl(url)
257 257 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
258 258 try:
259 259 self.transport = transport.SvnRaTransport(url=self.url)
260 260 self.ra = self.transport.ra
261 261 self.ctx = self.transport.client
262 262 self.baseurl = svn.ra.get_repos_root(self.ra)
263 263 # Module is either empty or a repository path starting with
264 264 # a slash and not ending with a slash.
265 265 self.module = urllib.unquote(self.url[len(self.baseurl):])
266 266 self.prevmodule = None
267 267 self.rootmodule = self.module
268 268 self.commits = {}
269 269 self.paths = {}
270 270 self.uuid = svn.ra.get_uuid(self.ra)
271 271 except SubversionException:
272 272 ui.traceback()
273 273 raise NoRepo(_("%s does not look like a Subversion repository")
274 274 % self.url)
275 275
276 276 if rev:
277 277 try:
278 278 latest = int(rev)
279 279 except ValueError:
280 280 raise util.Abort(_('svn: revision %s is not an integer') % rev)
281 281
282 282 self.trunkname = self.ui.config('convert', 'svn.trunk', 'trunk').strip('/')
283 283 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
284 284 try:
285 285 self.startrev = int(self.startrev)
286 286 if self.startrev < 0:
287 287 self.startrev = 0
288 288 except ValueError:
289 289 raise util.Abort(_('svn: start revision %s is not an integer')
290 290 % self.startrev)
291 291
292 292 try:
293 293 self.head = self.latest(self.module, latest)
294 294 except SvnPathNotFound:
295 295 self.head = None
296 296 if not self.head:
297 297 raise util.Abort(_('no revision found in module %s')
298 298 % self.module)
299 299 self.last_changed = self.revnum(self.head)
300 300
301 301 self._changescache = None
302 302
303 303 if os.path.exists(os.path.join(url, '.svn/entries')):
304 304 self.wc = url
305 305 else:
306 306 self.wc = None
307 307 self.convertfp = None
308 308
309 309 def setrevmap(self, revmap):
310 310 lastrevs = {}
311 311 for revid in revmap.iterkeys():
312 312 uuid, module, revnum = revsplit(revid)
313 313 lastrevnum = lastrevs.setdefault(module, revnum)
314 314 if revnum > lastrevnum:
315 315 lastrevs[module] = revnum
316 316 self.lastrevs = lastrevs
317 317
318 318 def exists(self, path, optrev):
319 319 try:
320 320 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
321 321 optrev, False, self.ctx)
322 322 return True
323 323 except SubversionException:
324 324 return False
325 325
326 326 def getheads(self):
327 327
328 328 def isdir(path, revnum):
329 329 kind = self._checkpath(path, revnum)
330 330 return kind == svn.core.svn_node_dir
331 331
332 332 def getcfgpath(name, rev):
333 333 cfgpath = self.ui.config('convert', 'svn.' + name)
334 334 if cfgpath is not None and cfgpath.strip() == '':
335 335 return None
336 336 path = (cfgpath or name).strip('/')
337 337 if not self.exists(path, rev):
338 338 if self.module.endswith(path) and name == 'trunk':
339 339 # we are converting from inside this directory
340 340 return None
341 341 if cfgpath:
342 342 raise util.Abort(_('expected %s to be at %r, but not found')
343 343 % (name, path))
344 344 return None
345 345 self.ui.note(_('found %s at %r\n') % (name, path))
346 346 return path
347 347
348 348 rev = optrev(self.last_changed)
349 349 oldmodule = ''
350 350 trunk = getcfgpath('trunk', rev)
351 351 self.tags = getcfgpath('tags', rev)
352 352 branches = getcfgpath('branches', rev)
353 353
354 354 # If the project has a trunk or branches, we will extract heads
355 355 # from them. We keep the project root otherwise.
356 356 if trunk:
357 357 oldmodule = self.module or ''
358 358 self.module += '/' + trunk
359 359 self.head = self.latest(self.module, self.last_changed)
360 360 if not self.head:
361 361 raise util.Abort(_('no revision found in module %s')
362 362 % self.module)
363 363
364 364 # First head in the list is the module's head
365 365 self.heads = [self.head]
366 366 if self.tags is not None:
367 367 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
368 368
369 369 # Check if branches bring a few more heads to the list
370 370 if branches:
371 371 rpath = self.url.strip('/')
372 372 branchnames = svn.client.ls(rpath + '/' + quote(branches),
373 373 rev, False, self.ctx)
374 374 for branch in branchnames.keys():
375 375 module = '%s/%s/%s' % (oldmodule, branches, branch)
376 376 if not isdir(module, self.last_changed):
377 377 continue
378 378 brevid = self.latest(module, self.last_changed)
379 379 if not brevid:
380 380 self.ui.note(_('ignoring empty branch %s\n') % branch)
381 381 continue
382 382 self.ui.note(_('found branch %s at %d\n') %
383 383 (branch, self.revnum(brevid)))
384 384 self.heads.append(brevid)
385 385
386 386 if self.startrev and self.heads:
387 387 if len(self.heads) > 1:
388 388 raise util.Abort(_('svn: start revision is not supported '
389 389 'with more than one branch'))
390 390 revnum = self.revnum(self.heads[0])
391 391 if revnum < self.startrev:
392 392 raise util.Abort(
393 393 _('svn: no revision found after start revision %d')
394 394 % self.startrev)
395 395
396 396 return self.heads
397 397
398 398 def getchanges(self, rev):
399 399 if self._changescache and self._changescache[0] == rev:
400 400 return self._changescache[1]
401 401 self._changescache = None
402 402 (paths, parents) = self.paths[rev]
403 403 if parents:
404 404 files, self.removed, copies = self.expandpaths(rev, paths, parents)
405 405 else:
406 406 # Perform a full checkout on roots
407 407 uuid, module, revnum = revsplit(rev)
408 408 entries = svn.client.ls(self.baseurl + quote(module),
409 409 optrev(revnum), True, self.ctx)
410 410 files = [n for n, e in entries.iteritems()
411 411 if e.kind == svn.core.svn_node_file]
412 412 copies = {}
413 413 self.removed = set()
414 414
415 415 files.sort()
416 416 files = zip(files, [rev] * len(files))
417 417
418 418 # caller caches the result, so free it here to release memory
419 419 del self.paths[rev]
420 420 return (files, copies)
421 421
422 422 def getchangedfiles(self, rev, i):
423 423 changes = self.getchanges(rev)
424 424 self._changescache = (rev, changes)
425 425 return [f[0] for f in changes[0]]
426 426
427 427 def getcommit(self, rev):
428 428 if rev not in self.commits:
429 429 uuid, module, revnum = revsplit(rev)
430 430 self.module = module
431 431 self.reparent(module)
432 432 # We assume that:
433 433 # - requests for revisions after "stop" come from the
434 434 # revision graph backward traversal. Cache all of them
435 435 # down to stop, they will be used eventually.
436 436 # - requests for revisions before "stop" come to get
437 437 # isolated branches parents. Just fetch what is needed.
438 438 stop = self.lastrevs.get(module, 0)
439 439 if revnum < stop:
440 440 stop = revnum + 1
441 441 self._fetch_revisions(revnum, stop)
442 442 commit = self.commits[rev]
443 443 # caller caches the result, so free it here to release memory
444 444 del self.commits[rev]
445 445 return commit
446 446
447 447 def gettags(self):
448 448 tags = {}
449 449 if self.tags is None:
450 450 return tags
451 451
452 452 # svn tags are just a convention, project branches left in a
453 453 # 'tags' directory. There is no other relationship than
454 454 # ancestry, which is expensive to discover and makes them hard
455 455 # to update incrementally. Worse, past revisions may be
456 456 # referenced by tags far away in the future, requiring a deep
457 457 # history traversal on every calculation. Current code
458 458 # performs a single backward traversal, tracking moves within
459 459 # the tags directory (tag renaming) and recording a new tag
460 460 # everytime a project is copied from outside the tags
461 461 # directory. It also lists deleted tags, this behaviour may
462 462 # change in the future.
463 463 pendings = []
464 464 tagspath = self.tags
465 465 start = svn.ra.get_latest_revnum(self.ra)
466 466 stream = self._getlog([self.tags], start, self.startrev)
467 467 try:
468 468 for entry in stream:
469 469 origpaths, revnum, author, date, message = entry
470 470 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
471 471 in origpaths.iteritems() if e.copyfrom_path]
472 472 # Apply moves/copies from more specific to general
473 473 copies.sort(reverse=True)
474 474
475 475 srctagspath = tagspath
476 476 if copies and copies[-1][2] == tagspath:
477 477 # Track tags directory moves
478 478 srctagspath = copies.pop()[0]
479 479
480 480 for source, sourcerev, dest in copies:
481 481 if not dest.startswith(tagspath + '/'):
482 482 continue
483 483 for tag in pendings:
484 484 if tag[0].startswith(dest):
485 485 tagpath = source + tag[0][len(dest):]
486 486 tag[:2] = [tagpath, sourcerev]
487 487 break
488 488 else:
489 489 pendings.append([source, sourcerev, dest])
490 490
491 491 # Filter out tags with children coming from different
492 492 # parts of the repository like:
493 493 # /tags/tag.1 (from /trunk:10)
494 494 # /tags/tag.1/foo (from /branches/foo:12)
495 495 # Here/tags/tag.1 discarded as well as its children.
496 496 # It happens with tools like cvs2svn. Such tags cannot
497 497 # be represented in mercurial.
498 498 addeds = dict((p, e.copyfrom_path) for p, e
499 499 in origpaths.iteritems()
500 500 if e.action == 'A' and e.copyfrom_path)
501 501 badroots = set()
502 502 for destroot in addeds:
503 503 for source, sourcerev, dest in pendings:
504 504 if (not dest.startswith(destroot + '/')
505 505 or source.startswith(addeds[destroot] + '/')):
506 506 continue
507 507 badroots.add(destroot)
508 508 break
509 509
510 510 for badroot in badroots:
511 511 pendings = [p for p in pendings if p[2] != badroot
512 512 and not p[2].startswith(badroot + '/')]
513 513
514 514 # Tell tag renamings from tag creations
515 515 renamings = []
516 516 for source, sourcerev, dest in pendings:
517 517 tagname = dest.split('/')[-1]
518 518 if source.startswith(srctagspath):
519 519 renamings.append([source, sourcerev, tagname])
520 520 continue
521 521 if tagname in tags:
522 522 # Keep the latest tag value
523 523 continue
524 524 # From revision may be fake, get one with changes
525 525 try:
526 526 tagid = self.latest(source, sourcerev)
527 527 if tagid and tagname not in tags:
528 528 tags[tagname] = tagid
529 529 except SvnPathNotFound:
530 530 # It happens when we are following directories
531 531 # we assumed were copied with their parents
532 532 # but were really created in the tag
533 533 # directory.
534 534 pass
535 535 pendings = renamings
536 536 tagspath = srctagspath
537 537 finally:
538 538 stream.close()
539 539 return tags
540 540
541 541 def converted(self, rev, destrev):
542 542 if not self.wc:
543 543 return
544 544 if self.convertfp is None:
545 545 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
546 546 'a')
547 547 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
548 548 self.convertfp.flush()
549 549
550 550 def revid(self, revnum, module=None):
551 551 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
552 552
553 553 def revnum(self, rev):
554 554 return int(rev.split('@')[-1])
555 555
556 556 def latest(self, path, stop=0):
557 557 """Find the latest revid affecting path, up to stop. It may return
558 558 a revision in a different module, since a branch may be moved without
559 559 a change being reported. Return None if computed module does not
560 560 belong to rootmodule subtree.
561 561 """
562 562 if not path.startswith(self.rootmodule):
563 563 # Requests on foreign branches may be forbidden at server level
564 564 self.ui.debug('ignoring foreign branch %r\n' % path)
565 565 return None
566 566
567 567 if not stop:
568 568 stop = svn.ra.get_latest_revnum(self.ra)
569 569 try:
570 570 prevmodule = self.reparent('')
571 571 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
572 572 self.reparent(prevmodule)
573 573 except SubversionException:
574 574 dirent = None
575 575 if not dirent:
576 576 raise SvnPathNotFound(_('%s not found up to revision %d')
577 577 % (path, stop))
578 578
579 579 # stat() gives us the previous revision on this line of
580 580 # development, but it might be in *another module*. Fetch the
581 581 # log and detect renames down to the latest revision.
582 582 stream = self._getlog([path], stop, dirent.created_rev)
583 583 try:
584 584 for entry in stream:
585 585 paths, revnum, author, date, message = entry
586 586 if revnum <= dirent.created_rev:
587 587 break
588 588
589 589 for p in paths:
590 590 if not path.startswith(p) or not paths[p].copyfrom_path:
591 591 continue
592 592 newpath = paths[p].copyfrom_path + path[len(p):]
593 593 self.ui.debug("branch renamed from %s to %s at %d\n" %
594 594 (path, newpath, revnum))
595 595 path = newpath
596 596 break
597 597 finally:
598 598 stream.close()
599 599
600 600 if not path.startswith(self.rootmodule):
601 601 self.ui.debug('ignoring foreign branch %r\n' % path)
602 602 return None
603 603 return self.revid(dirent.created_rev, path)
604 604
605 605 def reparent(self, module):
606 606 """Reparent the svn transport and return the previous parent."""
607 607 if self.prevmodule == module:
608 608 return module
609 609 svnurl = self.baseurl + quote(module)
610 610 prevmodule = self.prevmodule
611 611 if prevmodule is None:
612 612 prevmodule = ''
613 613 self.ui.debug("reparent to %s\n" % svnurl)
614 614 svn.ra.reparent(self.ra, svnurl)
615 615 self.prevmodule = module
616 616 return prevmodule
617 617
618 618 def expandpaths(self, rev, paths, parents):
619 619 changed, removed = set(), set()
620 620 copies = {}
621 621
622 622 new_module, revnum = revsplit(rev)[1:]
623 623 if new_module != self.module:
624 624 self.module = new_module
625 625 self.reparent(self.module)
626 626
627 627 for i, (path, ent) in enumerate(paths):
628 628 self.ui.progress(_('scanning paths'), i, item=path,
629 629 total=len(paths))
630 630 entrypath = self.getrelpath(path)
631 631
632 632 kind = self._checkpath(entrypath, revnum)
633 633 if kind == svn.core.svn_node_file:
634 634 changed.add(self.recode(entrypath))
635 635 if not ent.copyfrom_path or not parents:
636 636 continue
637 637 # Copy sources not in parent revisions cannot be
638 638 # represented, ignore their origin for now
639 639 pmodule, prevnum = revsplit(parents[0])[1:]
640 640 if ent.copyfrom_rev < prevnum:
641 641 continue
642 642 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
643 643 if not copyfrom_path:
644 644 continue
645 645 self.ui.debug("copied to %s from %s@%s\n" %
646 646 (entrypath, copyfrom_path, ent.copyfrom_rev))
647 647 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
648 648 elif kind == 0: # gone, but had better be a deleted *file*
649 649 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
650 650 pmodule, prevnum = revsplit(parents[0])[1:]
651 651 parentpath = pmodule + "/" + entrypath
652 652 fromkind = self._checkpath(entrypath, prevnum, pmodule)
653 653
654 654 if fromkind == svn.core.svn_node_file:
655 655 removed.add(self.recode(entrypath))
656 656 elif fromkind == svn.core.svn_node_dir:
657 657 oroot = parentpath.strip('/')
658 658 nroot = path.strip('/')
659 659 children = self._iterfiles(oroot, prevnum)
660 660 for childpath in children:
661 661 childpath = childpath.replace(oroot, nroot)
662 662 childpath = self.getrelpath("/" + childpath, pmodule)
663 663 if childpath:
664 664 removed.add(self.recode(childpath))
665 665 else:
666 666 self.ui.debug('unknown path in revision %d: %s\n' % \
667 667 (revnum, path))
668 668 elif kind == svn.core.svn_node_dir:
669 669 if ent.action == 'M':
670 670 # If the directory just had a prop change,
671 671 # then we shouldn't need to look for its children.
672 672 continue
673 673 if ent.action == 'R' and parents:
674 674 # If a directory is replacing a file, mark the previous
675 675 # file as deleted
676 676 pmodule, prevnum = revsplit(parents[0])[1:]
677 677 pkind = self._checkpath(entrypath, prevnum, pmodule)
678 678 if pkind == svn.core.svn_node_file:
679 679 removed.add(self.recode(entrypath))
680 680 elif pkind == svn.core.svn_node_dir:
681 681 # We do not know what files were kept or removed,
682 682 # mark them all as changed.
683 683 for childpath in self._iterfiles(pmodule, prevnum):
684 684 childpath = self.getrelpath("/" + childpath)
685 685 if childpath:
686 686 changed.add(self.recode(childpath))
687 687
688 688 for childpath in self._iterfiles(path, revnum):
689 689 childpath = self.getrelpath("/" + childpath)
690 690 if childpath:
691 691 changed.add(self.recode(childpath))
692 692
693 693 # Handle directory copies
694 694 if not ent.copyfrom_path or not parents:
695 695 continue
696 696 # Copy sources not in parent revisions cannot be
697 697 # represented, ignore their origin for now
698 698 pmodule, prevnum = revsplit(parents[0])[1:]
699 699 if ent.copyfrom_rev < prevnum:
700 700 continue
701 701 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
702 702 if not copyfrompath:
703 703 continue
704 704 self.ui.debug("mark %s came from %s:%d\n"
705 705 % (path, copyfrompath, ent.copyfrom_rev))
706 706 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
707 707 for childpath in children:
708 708 childpath = self.getrelpath("/" + childpath, pmodule)
709 709 if not childpath:
710 710 continue
711 711 copytopath = path + childpath[len(copyfrompath):]
712 712 copytopath = self.getrelpath(copytopath)
713 713 copies[self.recode(copytopath)] = self.recode(childpath)
714 714
715 715 self.ui.progress(_('scanning paths'), None)
716 716 changed.update(removed)
717 717 return (list(changed), removed, copies)
718 718
719 719 def _fetch_revisions(self, from_revnum, to_revnum):
720 720 if from_revnum < to_revnum:
721 721 from_revnum, to_revnum = to_revnum, from_revnum
722 722
723 723 self.child_cset = None
724 724
725 725 def parselogentry(orig_paths, revnum, author, date, message):
726 726 """Return the parsed commit object or None, and True if
727 727 the revision is a branch root.
728 728 """
729 729 self.ui.debug("parsing revision %d (%d changes)\n" %
730 730 (revnum, len(orig_paths)))
731 731
732 732 branched = False
733 733 rev = self.revid(revnum)
734 734 # branch log might return entries for a parent we already have
735 735
736 736 if rev in self.commits or revnum < to_revnum:
737 737 return None, branched
738 738
739 739 parents = []
740 740 # check whether this revision is the start of a branch or part
741 741 # of a branch renaming
742 742 orig_paths = sorted(orig_paths.iteritems())
743 743 root_paths = [(p, e) for p, e in orig_paths
744 744 if self.module.startswith(p)]
745 745 if root_paths:
746 746 path, ent = root_paths[-1]
747 747 if ent.copyfrom_path:
748 748 branched = True
749 749 newpath = ent.copyfrom_path + self.module[len(path):]
750 750 # ent.copyfrom_rev may not be the actual last revision
751 751 previd = self.latest(newpath, ent.copyfrom_rev)
752 752 if previd is not None:
753 753 prevmodule, prevnum = revsplit(previd)[1:]
754 754 if prevnum >= self.startrev:
755 755 parents = [previd]
756 756 self.ui.note(
757 757 _('found parent of branch %s at %d: %s\n') %
758 758 (self.module, prevnum, prevmodule))
759 759 else:
760 760 self.ui.debug("no copyfrom path, don't know what to do.\n")
761 761
762 762 paths = []
763 763 # filter out unrelated paths
764 764 for path, ent in orig_paths:
765 765 if self.getrelpath(path) is None:
766 766 continue
767 767 paths.append((path, ent))
768 768
769 769 # Example SVN datetime. Includes microseconds.
770 770 # ISO-8601 conformant
771 771 # '2007-01-04T17:35:00.902377Z'
772 772 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
773 773
774 774 log = message and self.recode(message) or ''
775 775 author = author and self.recode(author) or ''
776 776 try:
777 777 branch = self.module.split("/")[-1]
778 778 if branch == self.trunkname:
779 779 branch = None
780 780 except IndexError:
781 781 branch = None
782 782
783 783 cset = commit(author=author,
784 784 date=util.datestr(date),
785 785 desc=log,
786 786 parents=parents,
787 787 branch=branch,
788 788 rev=rev)
789 789
790 790 self.commits[rev] = cset
791 791 # The parents list is *shared* among self.paths and the
792 792 # commit object. Both will be updated below.
793 793 self.paths[rev] = (paths, cset.parents)
794 794 if self.child_cset and not self.child_cset.parents:
795 795 self.child_cset.parents[:] = [rev]
796 796 self.child_cset = cset
797 797 return cset, branched
798 798
799 799 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
800 800 (self.module, from_revnum, to_revnum))
801 801
802 802 try:
803 803 firstcset = None
804 804 lastonbranch = False
805 805 stream = self._getlog([self.module], from_revnum, to_revnum)
806 806 try:
807 807 for entry in stream:
808 808 paths, revnum, author, date, message = entry
809 809 if revnum < self.startrev:
810 810 lastonbranch = True
811 811 break
812 812 if not paths:
813 813 self.ui.debug('revision %d has no entries\n' % revnum)
814 814 # If we ever leave the loop on an empty
815 815 # revision, do not try to get a parent branch
816 816 lastonbranch = lastonbranch or revnum == 0
817 817 continue
818 818 cset, lastonbranch = parselogentry(paths, revnum, author,
819 819 date, message)
820 820 if cset:
821 821 firstcset = cset
822 822 if lastonbranch:
823 823 break
824 824 finally:
825 825 stream.close()
826 826
827 827 if not lastonbranch and firstcset and not firstcset.parents:
828 828 # The first revision of the sequence (the last fetched one)
829 829 # has invalid parents if not a branch root. Find the parent
830 830 # revision now, if any.
831 831 try:
832 832 firstrevnum = self.revnum(firstcset.rev)
833 833 if firstrevnum > 1:
834 834 latest = self.latest(self.module, firstrevnum - 1)
835 835 if latest:
836 836 firstcset.parents.append(latest)
837 837 except SvnPathNotFound:
838 838 pass
839 839 except SubversionException, (inst, num):
840 840 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
841 841 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
842 842 raise
843 843
844 844 def getfile(self, file, rev):
845 845 # TODO: ra.get_file transmits the whole file instead of diffs.
846 846 if file in self.removed:
847 847 raise IOError()
848 848 mode = ''
849 849 try:
850 850 new_module, revnum = revsplit(rev)[1:]
851 851 if self.module != new_module:
852 852 self.module = new_module
853 853 self.reparent(self.module)
854 854 io = StringIO()
855 855 info = svn.ra.get_file(self.ra, file, revnum, io)
856 856 data = io.getvalue()
857 857 # ra.get_files() seems to keep a reference on the input buffer
858 858 # preventing collection. Release it explicitely.
859 859 io.close()
860 860 if isinstance(info, list):
861 861 info = info[-1]
862 862 mode = ("svn:executable" in info) and 'x' or ''
863 863 mode = ("svn:special" in info) and 'l' or mode
864 864 except SubversionException, e:
865 865 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
866 866 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
867 867 if e.apr_err in notfound: # File not found
868 868 raise IOError()
869 869 raise
870 870 if mode == 'l':
871 871 link_prefix = "link "
872 872 if data.startswith(link_prefix):
873 873 data = data[len(link_prefix):]
874 874 return data, mode
875 875
876 876 def _iterfiles(self, path, revnum):
877 877 """Enumerate all files in path at revnum, recursively."""
878 878 path = path.strip('/')
879 879 pool = Pool()
880 880 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
881 881 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
882 882 if path:
883 883 path += '/'
884 884 return ((path + p) for p, e in entries.iteritems()
885 885 if e.kind == svn.core.svn_node_file)
886 886
887 887 def getrelpath(self, path, module=None):
888 888 if module is None:
889 889 module = self.module
890 890 # Given the repository url of this wc, say
891 891 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
892 892 # extract the "entry" portion (a relative path) from what
893 893 # svn log --xml says, ie
894 894 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
895 895 # that is to say "tests/PloneTestCase.py"
896 896 if path.startswith(module):
897 897 relative = path.rstrip('/')[len(module):]
898 898 if relative.startswith('/'):
899 899 return relative[1:]
900 900 elif relative == '':
901 901 return relative
902 902
903 903 # The path is outside our tracked tree...
904 904 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
905 905 return None
906 906
907 907 def _checkpath(self, path, revnum, module=None):
908 908 if module is not None:
909 909 prevmodule = self.reparent('')
910 910 path = module + '/' + path
911 911 try:
912 912 # ra.check_path does not like leading slashes very much, it leads
913 913 # to PROPFIND subversion errors
914 914 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
915 915 finally:
916 916 if module is not None:
917 917 self.reparent(prevmodule)
918 918
919 919 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
920 920 strict_node_history=False):
921 921 # Normalize path names, svn >= 1.5 only wants paths relative to
922 922 # supplied URL
923 923 relpaths = []
924 924 for p in paths:
925 925 if not p.startswith('/'):
926 926 p = self.module + '/' + p
927 927 relpaths.append(p.strip('/'))
928 928 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
929 929 strict_node_history]
930 930 arg = encodeargs(args)
931 931 hgexe = util.hgexecutable()
932 932 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
933 933 stdin, stdout = util.popen2(util.quotecommand(cmd))
934 934 stdin.write(arg)
935 935 try:
936 936 stdin.close()
937 937 except IOError:
938 938 raise util.Abort(_('Mercurial failed to run itself, check'
939 939 ' hg executable is in PATH'))
940 940 return logstream(stdout)
941 941
942 942 pre_revprop_change = '''#!/bin/sh
943 943
944 944 REPOS="$1"
945 945 REV="$2"
946 946 USER="$3"
947 947 PROPNAME="$4"
948 948 ACTION="$5"
949 949
950 950 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
951 951 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
952 952 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
953 953
954 954 echo "Changing prohibited revision property" >&2
955 955 exit 1
956 956 '''
957 957
958 958 class svn_sink(converter_sink, commandline):
959 959 commit_re = re.compile(r'Committed revision (\d+).', re.M)
960 960 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
961 961
962 962 def prerun(self):
963 963 if self.wc:
964 964 os.chdir(self.wc)
965 965
966 966 def postrun(self):
967 967 if self.wc:
968 968 os.chdir(self.cwd)
969 969
970 970 def join(self, name):
971 971 return os.path.join(self.wc, '.svn', name)
972 972
973 973 def revmapfile(self):
974 974 return self.join('hg-shamap')
975 975
976 976 def authorfile(self):
977 977 return self.join('hg-authormap')
978 978
979 979 def __init__(self, ui, path):
980 980
981 981 converter_sink.__init__(self, ui, path)
982 982 commandline.__init__(self, ui, 'svn')
983 983 self.delete = []
984 984 self.setexec = []
985 985 self.delexec = []
986 986 self.copies = []
987 987 self.wc = None
988 988 self.cwd = os.getcwd()
989 989
990 990 path = os.path.realpath(path)
991 991
992 992 created = False
993 993 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
994 994 self.wc = path
995 995 self.run0('update')
996 996 else:
997 997 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
998 998
999 999 if os.path.isdir(os.path.dirname(path)):
1000 1000 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1001 1001 ui.status(_('initializing svn repository %r\n') %
1002 1002 os.path.basename(path))
1003 1003 commandline(ui, 'svnadmin').run0('create', path)
1004 1004 created = path
1005 1005 path = util.normpath(path)
1006 1006 if not path.startswith('/'):
1007 1007 path = '/' + path
1008 1008 path = 'file://' + path
1009 1009
1010 1010 ui.status(_('initializing svn working copy %r\n')
1011 1011 % os.path.basename(wcpath))
1012 1012 self.run0('checkout', path, wcpath)
1013 1013
1014 1014 self.wc = wcpath
1015 1015 self.opener = scmutil.opener(self.wc)
1016 1016 self.wopener = scmutil.opener(self.wc)
1017 1017 self.childmap = mapfile(ui, self.join('hg-childmap'))
1018 1018 self.is_exec = util.checkexec(self.wc) and util.isexec or None
1019 1019
1020 1020 if created:
1021 1021 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1022 1022 fp = open(hook, 'w')
1023 1023 fp.write(pre_revprop_change)
1024 1024 fp.close()
1025 1025 util.setflags(hook, False, True)
1026 1026
1027 1027 output = self.run0('info')
1028 1028 self.uuid = self.uuid_re.search(output).group(1).strip()
1029 1029
1030 1030 def wjoin(self, *names):
1031 1031 return os.path.join(self.wc, *names)
1032 1032
1033 1033 def putfile(self, filename, flags, data):
1034 1034 if 'l' in flags:
1035 1035 self.wopener.symlink(data, filename)
1036 1036 else:
1037 1037 try:
1038 1038 if os.path.islink(self.wjoin(filename)):
1039 1039 os.unlink(filename)
1040 1040 except OSError:
1041 1041 pass
1042 1042 self.wopener.write(filename, data)
1043 1043
1044 1044 if self.is_exec:
1045 1045 was_exec = self.is_exec(self.wjoin(filename))
1046 1046 else:
1047 1047 # On filesystems not supporting execute-bit, there is no way
1048 1048 # to know if it is set but asking subversion. Setting it
1049 1049 # systematically is just as expensive and much simpler.
1050 1050 was_exec = 'x' not in flags
1051 1051
1052 1052 util.setflags(self.wjoin(filename), False, 'x' in flags)
1053 1053 if was_exec:
1054 1054 if 'x' not in flags:
1055 1055 self.delexec.append(filename)
1056 1056 else:
1057 1057 if 'x' in flags:
1058 1058 self.setexec.append(filename)
1059 1059
1060 1060 def _copyfile(self, source, dest):
1061 1061 # SVN's copy command pukes if the destination file exists, but
1062 1062 # our copyfile method expects to record a copy that has
1063 1063 # already occurred. Cross the semantic gap.
1064 1064 wdest = self.wjoin(dest)
1065 1065 exists = os.path.lexists(wdest)
1066 1066 if exists:
1067 1067 fd, tempname = tempfile.mkstemp(
1068 1068 prefix='hg-copy-', dir=os.path.dirname(wdest))
1069 1069 os.close(fd)
1070 1070 os.unlink(tempname)
1071 1071 os.rename(wdest, tempname)
1072 1072 try:
1073 1073 self.run0('copy', source, dest)
1074 1074 finally:
1075 1075 if exists:
1076 1076 try:
1077 1077 os.unlink(wdest)
1078 1078 except OSError:
1079 1079 pass
1080 1080 os.rename(tempname, wdest)
1081 1081
1082 1082 def dirs_of(self, files):
1083 1083 dirs = set()
1084 1084 for f in files:
1085 1085 if os.path.isdir(self.wjoin(f)):
1086 1086 dirs.add(f)
1087 1087 for i in strutil.rfindall(f, '/'):
1088 1088 dirs.add(f[:i])
1089 1089 return dirs
1090 1090
1091 1091 def add_dirs(self, files):
1092 1092 add_dirs = [d for d in sorted(self.dirs_of(files))
1093 1093 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1094 1094 if add_dirs:
1095 1095 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1096 1096 return add_dirs
1097 1097
1098 1098 def add_files(self, files):
1099 1099 if files:
1100 1100 self.xargs(files, 'add', quiet=True)
1101 1101 return files
1102 1102
1103 1103 def tidy_dirs(self, names):
1104 1104 deleted = []
1105 1105 for d in sorted(self.dirs_of(names), reverse=True):
1106 1106 wd = self.wjoin(d)
1107 1107 if os.listdir(wd) == '.svn':
1108 1108 self.run0('delete', d)
1109 1109 deleted.append(d)
1110 1110 return deleted
1111 1111
1112 1112 def addchild(self, parent, child):
1113 1113 self.childmap[parent] = child
1114 1114
1115 1115 def revid(self, rev):
1116 1116 return u"svn:%s@%s" % (self.uuid, rev)
1117 1117
1118 1118 def putcommit(self, files, copies, parents, commit, source, revmap):
1119 for parent in parents:
1120 try:
1121 return self.revid(self.childmap[parent])
1122 except KeyError:
1123 pass
1124
1119 1125 # Apply changes to working copy
1120 1126 for f, v in files:
1121 1127 try:
1122 1128 data, mode = source.getfile(f, v)
1123 1129 except IOError:
1124 1130 self.delete.append(f)
1125 1131 else:
1126 1132 self.putfile(f, mode, data)
1127 1133 if f in copies:
1128 1134 self.copies.append([copies[f], f])
1129 1135 files = [f[0] for f in files]
1130 1136
1131 for parent in parents:
1132 try:
1133 return self.revid(self.childmap[parent])
1134 except KeyError:
1135 pass
1136 1137 entries = set(self.delete)
1137 1138 files = frozenset(files)
1138 1139 entries.update(self.add_dirs(files.difference(entries)))
1139 1140 if self.copies:
1140 1141 for s, d in self.copies:
1141 1142 self._copyfile(s, d)
1142 1143 self.copies = []
1143 1144 if self.delete:
1144 1145 self.xargs(self.delete, 'delete')
1145 1146 self.delete = []
1146 1147 entries.update(self.add_files(files.difference(entries)))
1147 1148 entries.update(self.tidy_dirs(entries))
1148 1149 if self.delexec:
1149 1150 self.xargs(self.delexec, 'propdel', 'svn:executable')
1150 1151 self.delexec = []
1151 1152 if self.setexec:
1152 1153 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1153 1154 self.setexec = []
1154 1155
1155 1156 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1156 1157 fp = os.fdopen(fd, 'w')
1157 1158 fp.write(commit.desc)
1158 1159 fp.close()
1159 1160 try:
1160 1161 output = self.run0('commit',
1161 1162 username=util.shortuser(commit.author),
1162 1163 file=messagefile,
1163 1164 encoding='utf-8')
1164 1165 try:
1165 1166 rev = self.commit_re.search(output).group(1)
1166 1167 except AttributeError:
1167 1168 if not files:
1168 1169 return parents[0]
1169 1170 self.ui.warn(_('unexpected svn output:\n'))
1170 1171 self.ui.warn(output)
1171 1172 raise util.Abort(_('unable to cope with svn output'))
1172 1173 if commit.rev:
1173 1174 self.run('propset', 'hg:convert-rev', commit.rev,
1174 1175 revprop=True, revision=rev)
1175 1176 if commit.branch and commit.branch != 'default':
1176 1177 self.run('propset', 'hg:convert-branch', commit.branch,
1177 1178 revprop=True, revision=rev)
1178 1179 for parent in parents:
1179 1180 self.addchild(parent, rev)
1180 1181 return self.revid(rev)
1181 1182 finally:
1182 1183 os.unlink(messagefile)
1183 1184
1184 1185 def puttags(self, tags):
1185 1186 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1186 1187 return None, None
General Comments 0
You need to be logged in to leave comments. Login now