##// END OF EJS Templates
i18n: use util.pconvert() instead of 'str.replace()' for problematic encoding...
FUJIWARA Katsunori -
r16067:467a85ce stable
parent child Browse files
Show More
@@ -1,1189 +1,1189
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, scmutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def revsplit(rev):
45 45 """Parse a revision string and return (uuid, path, revnum)."""
46 46 url, revnum = rev.rsplit('@', 1)
47 47 parts = url.split('/', 1)
48 48 mod = ''
49 49 if len(parts) > 1:
50 50 mod = '/' + parts[1]
51 51 return parts[0][4:], mod, int(revnum)
52 52
53 53 def quote(s):
54 54 # As of svn 1.7, many svn calls expect "canonical" paths. In
55 55 # theory, we should call svn.core.*canonicalize() on all paths
56 56 # before passing them to the API. Instead, we assume the base url
57 57 # is canonical and copy the behaviour of svn URL encoding function
58 58 # so we can extend it safely with new components. The "safe"
59 59 # characters were taken from the "svn_uri__char_validity" table in
60 60 # libsvn_subr/path.c.
61 61 return urllib.quote(s, "!$&'()*+,-./:=@_~")
62 62
63 63 def geturl(path):
64 64 try:
65 65 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
66 66 except SubversionException:
67 67 # svn.client.url_from_path() fails with local repositories
68 68 pass
69 69 if os.path.isdir(path):
70 70 path = os.path.normpath(os.path.abspath(path))
71 71 if os.name == 'nt':
72 72 path = '/' + util.normpath(path)
73 73 # Module URL is later compared with the repository URL returned
74 74 # by svn API, which is UTF-8.
75 75 path = encoding.tolocal(path)
76 76 path = 'file://%s' % quote(path)
77 77 return svn.core.svn_path_canonicalize(path)
78 78
79 79 def optrev(number):
80 80 optrev = svn.core.svn_opt_revision_t()
81 81 optrev.kind = svn.core.svn_opt_revision_number
82 82 optrev.value.number = number
83 83 return optrev
84 84
85 85 class changedpath(object):
86 86 def __init__(self, p):
87 87 self.copyfrom_path = p.copyfrom_path
88 88 self.copyfrom_rev = p.copyfrom_rev
89 89 self.action = p.action
90 90
91 91 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
92 92 strict_node_history=False):
93 93 protocol = -1
94 94 def receiver(orig_paths, revnum, author, date, message, pool):
95 95 if orig_paths is not None:
96 96 for k, v in orig_paths.iteritems():
97 97 orig_paths[k] = changedpath(v)
98 98 pickle.dump((orig_paths, revnum, author, date, message),
99 99 fp, protocol)
100 100
101 101 try:
102 102 # Use an ra of our own so that our parent can consume
103 103 # our results without confusing the server.
104 104 t = transport.SvnRaTransport(url=url)
105 105 svn.ra.get_log(t.ra, paths, start, end, limit,
106 106 discover_changed_paths,
107 107 strict_node_history,
108 108 receiver)
109 109 except IOError:
110 110 # Caller may interrupt the iteration
111 111 pickle.dump(None, fp, protocol)
112 112 except Exception, inst:
113 113 pickle.dump(str(inst), fp, protocol)
114 114 else:
115 115 pickle.dump(None, fp, protocol)
116 116 fp.close()
117 117 # With large history, cleanup process goes crazy and suddenly
118 118 # consumes *huge* amount of memory. The output file being closed,
119 119 # there is no need for clean termination.
120 120 os._exit(0)
121 121
122 122 def debugsvnlog(ui, **opts):
123 123 """Fetch SVN log in a subprocess and channel them back to parent to
124 124 avoid memory collection issues.
125 125 """
126 126 util.setbinary(sys.stdin)
127 127 util.setbinary(sys.stdout)
128 128 args = decodeargs(sys.stdin.read())
129 129 get_log_child(sys.stdout, *args)
130 130
131 131 class logstream(object):
132 132 """Interruptible revision log iterator."""
133 133 def __init__(self, stdout):
134 134 self._stdout = stdout
135 135
136 136 def __iter__(self):
137 137 while True:
138 138 try:
139 139 entry = pickle.load(self._stdout)
140 140 except EOFError:
141 141 raise util.Abort(_('Mercurial failed to run itself, check'
142 142 ' hg executable is in PATH'))
143 143 try:
144 144 orig_paths, revnum, author, date, message = entry
145 145 except:
146 146 if entry is None:
147 147 break
148 148 raise util.Abort(_("log stream exception '%s'") % entry)
149 149 yield entry
150 150
151 151 def close(self):
152 152 if self._stdout:
153 153 self._stdout.close()
154 154 self._stdout = None
155 155
156 156
157 157 # Check to see if the given path is a local Subversion repo. Verify this by
158 158 # looking for several svn-specific files and directories in the given
159 159 # directory.
160 160 def filecheck(ui, path, proto):
161 161 for x in ('locks', 'hooks', 'format', 'db'):
162 162 if not os.path.exists(os.path.join(path, x)):
163 163 return False
164 164 return True
165 165
166 166 # Check to see if a given path is the root of an svn repo over http. We verify
167 167 # this by requesting a version-controlled URL we know can't exist and looking
168 168 # for the svn-specific "not found" XML.
169 169 def httpcheck(ui, path, proto):
170 170 try:
171 171 opener = urllib2.build_opener()
172 172 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
173 173 data = rsp.read()
174 174 except urllib2.HTTPError, inst:
175 175 if inst.code != 404:
176 176 # Except for 404 we cannot know for sure this is not an svn repo
177 177 ui.warn(_('svn: cannot probe remote repository, assume it could '
178 178 'be a subversion repository. Use --source-type if you '
179 179 'know better.\n'))
180 180 return True
181 181 data = inst.fp.read()
182 182 except:
183 183 # Could be urllib2.URLError if the URL is invalid or anything else.
184 184 return False
185 185 return '<m:human-readable errcode="160013">' in data
186 186
187 187 protomap = {'http': httpcheck,
188 188 'https': httpcheck,
189 189 'file': filecheck,
190 190 }
191 191 def issvnurl(ui, url):
192 192 try:
193 193 proto, path = url.split('://', 1)
194 194 if proto == 'file':
195 195 path = urllib.url2pathname(path)
196 196 except ValueError:
197 197 proto = 'file'
198 198 path = os.path.abspath(url)
199 199 if proto == 'file':
200 path = path.replace(os.sep, '/')
200 path = util.pconvert(path)
201 201 check = protomap.get(proto, lambda *args: False)
202 202 while '/' in path:
203 203 if check(ui, path, proto):
204 204 return True
205 205 path = path.rsplit('/', 1)[0]
206 206 return False
207 207
208 208 # SVN conversion code stolen from bzr-svn and tailor
209 209 #
210 210 # Subversion looks like a versioned filesystem, branches structures
211 211 # are defined by conventions and not enforced by the tool. First,
212 212 # we define the potential branches (modules) as "trunk" and "branches"
213 213 # children directories. Revisions are then identified by their
214 214 # module and revision number (and a repository identifier).
215 215 #
216 216 # The revision graph is really a tree (or a forest). By default, a
217 217 # revision parent is the previous revision in the same module. If the
218 218 # module directory is copied/moved from another module then the
219 219 # revision is the module root and its parent the source revision in
220 220 # the parent module. A revision has at most one parent.
221 221 #
222 222 class svn_source(converter_source):
223 223 def __init__(self, ui, url, rev=None):
224 224 super(svn_source, self).__init__(ui, url, rev=rev)
225 225
226 226 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
227 227 (os.path.exists(url) and
228 228 os.path.exists(os.path.join(url, '.svn'))) or
229 229 issvnurl(ui, url)):
230 230 raise NoRepo(_("%s does not look like a Subversion repository")
231 231 % url)
232 232 if svn is None:
233 233 raise MissingTool(_('Could not load Subversion python bindings'))
234 234
235 235 try:
236 236 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
237 237 if version < (1, 4):
238 238 raise MissingTool(_('Subversion python bindings %d.%d found, '
239 239 '1.4 or later required') % version)
240 240 except AttributeError:
241 241 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
242 242 'or later required'))
243 243
244 244 self.lastrevs = {}
245 245
246 246 latest = None
247 247 try:
248 248 # Support file://path@rev syntax. Useful e.g. to convert
249 249 # deleted branches.
250 250 at = url.rfind('@')
251 251 if at >= 0:
252 252 latest = int(url[at + 1:])
253 253 url = url[:at]
254 254 except ValueError:
255 255 pass
256 256 self.url = geturl(url)
257 257 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
258 258 try:
259 259 self.transport = transport.SvnRaTransport(url=self.url)
260 260 self.ra = self.transport.ra
261 261 self.ctx = self.transport.client
262 262 self.baseurl = svn.ra.get_repos_root(self.ra)
263 263 # Module is either empty or a repository path starting with
264 264 # a slash and not ending with a slash.
265 265 self.module = urllib.unquote(self.url[len(self.baseurl):])
266 266 self.prevmodule = None
267 267 self.rootmodule = self.module
268 268 self.commits = {}
269 269 self.paths = {}
270 270 self.uuid = svn.ra.get_uuid(self.ra)
271 271 except SubversionException:
272 272 ui.traceback()
273 273 raise NoRepo(_("%s does not look like a Subversion repository")
274 274 % self.url)
275 275
276 276 if rev:
277 277 try:
278 278 latest = int(rev)
279 279 except ValueError:
280 280 raise util.Abort(_('svn: revision %s is not an integer') % rev)
281 281
282 282 self.trunkname = self.ui.config('convert', 'svn.trunk', 'trunk').strip('/')
283 283 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
284 284 try:
285 285 self.startrev = int(self.startrev)
286 286 if self.startrev < 0:
287 287 self.startrev = 0
288 288 except ValueError:
289 289 raise util.Abort(_('svn: start revision %s is not an integer')
290 290 % self.startrev)
291 291
292 292 try:
293 293 self.head = self.latest(self.module, latest)
294 294 except SvnPathNotFound:
295 295 self.head = None
296 296 if not self.head:
297 297 raise util.Abort(_('no revision found in module %s')
298 298 % self.module)
299 299 self.last_changed = self.revnum(self.head)
300 300
301 301 self._changescache = None
302 302
303 303 if os.path.exists(os.path.join(url, '.svn/entries')):
304 304 self.wc = url
305 305 else:
306 306 self.wc = None
307 307 self.convertfp = None
308 308
309 309 def setrevmap(self, revmap):
310 310 lastrevs = {}
311 311 for revid in revmap.iterkeys():
312 312 uuid, module, revnum = revsplit(revid)
313 313 lastrevnum = lastrevs.setdefault(module, revnum)
314 314 if revnum > lastrevnum:
315 315 lastrevs[module] = revnum
316 316 self.lastrevs = lastrevs
317 317
318 318 def exists(self, path, optrev):
319 319 try:
320 320 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
321 321 optrev, False, self.ctx)
322 322 return True
323 323 except SubversionException:
324 324 return False
325 325
326 326 def getheads(self):
327 327
328 328 def isdir(path, revnum):
329 329 kind = self._checkpath(path, revnum)
330 330 return kind == svn.core.svn_node_dir
331 331
332 332 def getcfgpath(name, rev):
333 333 cfgpath = self.ui.config('convert', 'svn.' + name)
334 334 if cfgpath is not None and cfgpath.strip() == '':
335 335 return None
336 336 path = (cfgpath or name).strip('/')
337 337 if not self.exists(path, rev):
338 338 if self.module.endswith(path) and name == 'trunk':
339 339 # we are converting from inside this directory
340 340 return None
341 341 if cfgpath:
342 342 raise util.Abort(_('expected %s to be at %r, but not found')
343 343 % (name, path))
344 344 return None
345 345 self.ui.note(_('found %s at %r\n') % (name, path))
346 346 return path
347 347
348 348 rev = optrev(self.last_changed)
349 349 oldmodule = ''
350 350 trunk = getcfgpath('trunk', rev)
351 351 self.tags = getcfgpath('tags', rev)
352 352 branches = getcfgpath('branches', rev)
353 353
354 354 # If the project has a trunk or branches, we will extract heads
355 355 # from them. We keep the project root otherwise.
356 356 if trunk:
357 357 oldmodule = self.module or ''
358 358 self.module += '/' + trunk
359 359 self.head = self.latest(self.module, self.last_changed)
360 360 if not self.head:
361 361 raise util.Abort(_('no revision found in module %s')
362 362 % self.module)
363 363
364 364 # First head in the list is the module's head
365 365 self.heads = [self.head]
366 366 if self.tags is not None:
367 367 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
368 368
369 369 # Check if branches bring a few more heads to the list
370 370 if branches:
371 371 rpath = self.url.strip('/')
372 372 branchnames = svn.client.ls(rpath + '/' + quote(branches),
373 373 rev, False, self.ctx)
374 374 for branch in branchnames.keys():
375 375 module = '%s/%s/%s' % (oldmodule, branches, branch)
376 376 if not isdir(module, self.last_changed):
377 377 continue
378 378 brevid = self.latest(module, self.last_changed)
379 379 if not brevid:
380 380 self.ui.note(_('ignoring empty branch %s\n') % branch)
381 381 continue
382 382 self.ui.note(_('found branch %s at %d\n') %
383 383 (branch, self.revnum(brevid)))
384 384 self.heads.append(brevid)
385 385
386 386 if self.startrev and self.heads:
387 387 if len(self.heads) > 1:
388 388 raise util.Abort(_('svn: start revision is not supported '
389 389 'with more than one branch'))
390 390 revnum = self.revnum(self.heads[0])
391 391 if revnum < self.startrev:
392 392 raise util.Abort(
393 393 _('svn: no revision found after start revision %d')
394 394 % self.startrev)
395 395
396 396 return self.heads
397 397
398 398 def getchanges(self, rev):
399 399 if self._changescache and self._changescache[0] == rev:
400 400 return self._changescache[1]
401 401 self._changescache = None
402 402 (paths, parents) = self.paths[rev]
403 403 if parents:
404 404 files, self.removed, copies = self.expandpaths(rev, paths, parents)
405 405 else:
406 406 # Perform a full checkout on roots
407 407 uuid, module, revnum = revsplit(rev)
408 408 entries = svn.client.ls(self.baseurl + quote(module),
409 409 optrev(revnum), True, self.ctx)
410 410 files = [n for n, e in entries.iteritems()
411 411 if e.kind == svn.core.svn_node_file]
412 412 copies = {}
413 413 self.removed = set()
414 414
415 415 files.sort()
416 416 files = zip(files, [rev] * len(files))
417 417
418 418 # caller caches the result, so free it here to release memory
419 419 del self.paths[rev]
420 420 return (files, copies)
421 421
422 422 def getchangedfiles(self, rev, i):
423 423 changes = self.getchanges(rev)
424 424 self._changescache = (rev, changes)
425 425 return [f[0] for f in changes[0]]
426 426
427 427 def getcommit(self, rev):
428 428 if rev not in self.commits:
429 429 uuid, module, revnum = revsplit(rev)
430 430 self.module = module
431 431 self.reparent(module)
432 432 # We assume that:
433 433 # - requests for revisions after "stop" come from the
434 434 # revision graph backward traversal. Cache all of them
435 435 # down to stop, they will be used eventually.
436 436 # - requests for revisions before "stop" come to get
437 437 # isolated branches parents. Just fetch what is needed.
438 438 stop = self.lastrevs.get(module, 0)
439 439 if revnum < stop:
440 440 stop = revnum + 1
441 441 self._fetch_revisions(revnum, stop)
442 442 if rev not in self.commits:
443 443 raise util.Abort(_('svn: revision %s not found') % revnum)
444 444 commit = self.commits[rev]
445 445 # caller caches the result, so free it here to release memory
446 446 del self.commits[rev]
447 447 return commit
448 448
449 449 def gettags(self):
450 450 tags = {}
451 451 if self.tags is None:
452 452 return tags
453 453
454 454 # svn tags are just a convention, project branches left in a
455 455 # 'tags' directory. There is no other relationship than
456 456 # ancestry, which is expensive to discover and makes them hard
457 457 # to update incrementally. Worse, past revisions may be
458 458 # referenced by tags far away in the future, requiring a deep
459 459 # history traversal on every calculation. Current code
460 460 # performs a single backward traversal, tracking moves within
461 461 # the tags directory (tag renaming) and recording a new tag
462 462 # everytime a project is copied from outside the tags
463 463 # directory. It also lists deleted tags, this behaviour may
464 464 # change in the future.
465 465 pendings = []
466 466 tagspath = self.tags
467 467 start = svn.ra.get_latest_revnum(self.ra)
468 468 stream = self._getlog([self.tags], start, self.startrev)
469 469 try:
470 470 for entry in stream:
471 471 origpaths, revnum, author, date, message = entry
472 472 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
473 473 in origpaths.iteritems() if e.copyfrom_path]
474 474 # Apply moves/copies from more specific to general
475 475 copies.sort(reverse=True)
476 476
477 477 srctagspath = tagspath
478 478 if copies and copies[-1][2] == tagspath:
479 479 # Track tags directory moves
480 480 srctagspath = copies.pop()[0]
481 481
482 482 for source, sourcerev, dest in copies:
483 483 if not dest.startswith(tagspath + '/'):
484 484 continue
485 485 for tag in pendings:
486 486 if tag[0].startswith(dest):
487 487 tagpath = source + tag[0][len(dest):]
488 488 tag[:2] = [tagpath, sourcerev]
489 489 break
490 490 else:
491 491 pendings.append([source, sourcerev, dest])
492 492
493 493 # Filter out tags with children coming from different
494 494 # parts of the repository like:
495 495 # /tags/tag.1 (from /trunk:10)
496 496 # /tags/tag.1/foo (from /branches/foo:12)
497 497 # Here/tags/tag.1 discarded as well as its children.
498 498 # It happens with tools like cvs2svn. Such tags cannot
499 499 # be represented in mercurial.
500 500 addeds = dict((p, e.copyfrom_path) for p, e
501 501 in origpaths.iteritems()
502 502 if e.action == 'A' and e.copyfrom_path)
503 503 badroots = set()
504 504 for destroot in addeds:
505 505 for source, sourcerev, dest in pendings:
506 506 if (not dest.startswith(destroot + '/')
507 507 or source.startswith(addeds[destroot] + '/')):
508 508 continue
509 509 badroots.add(destroot)
510 510 break
511 511
512 512 for badroot in badroots:
513 513 pendings = [p for p in pendings if p[2] != badroot
514 514 and not p[2].startswith(badroot + '/')]
515 515
516 516 # Tell tag renamings from tag creations
517 517 renamings = []
518 518 for source, sourcerev, dest in pendings:
519 519 tagname = dest.split('/')[-1]
520 520 if source.startswith(srctagspath):
521 521 renamings.append([source, sourcerev, tagname])
522 522 continue
523 523 if tagname in tags:
524 524 # Keep the latest tag value
525 525 continue
526 526 # From revision may be fake, get one with changes
527 527 try:
528 528 tagid = self.latest(source, sourcerev)
529 529 if tagid and tagname not in tags:
530 530 tags[tagname] = tagid
531 531 except SvnPathNotFound:
532 532 # It happens when we are following directories
533 533 # we assumed were copied with their parents
534 534 # but were really created in the tag
535 535 # directory.
536 536 pass
537 537 pendings = renamings
538 538 tagspath = srctagspath
539 539 finally:
540 540 stream.close()
541 541 return tags
542 542
543 543 def converted(self, rev, destrev):
544 544 if not self.wc:
545 545 return
546 546 if self.convertfp is None:
547 547 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
548 548 'a')
549 549 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
550 550 self.convertfp.flush()
551 551
552 552 def revid(self, revnum, module=None):
553 553 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
554 554
555 555 def revnum(self, rev):
556 556 return int(rev.split('@')[-1])
557 557
558 558 def latest(self, path, stop=0):
559 559 """Find the latest revid affecting path, up to stop. It may return
560 560 a revision in a different module, since a branch may be moved without
561 561 a change being reported. Return None if computed module does not
562 562 belong to rootmodule subtree.
563 563 """
564 564 if not path.startswith(self.rootmodule):
565 565 # Requests on foreign branches may be forbidden at server level
566 566 self.ui.debug('ignoring foreign branch %r\n' % path)
567 567 return None
568 568
569 569 if not stop:
570 570 stop = svn.ra.get_latest_revnum(self.ra)
571 571 try:
572 572 prevmodule = self.reparent('')
573 573 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
574 574 self.reparent(prevmodule)
575 575 except SubversionException:
576 576 dirent = None
577 577 if not dirent:
578 578 raise SvnPathNotFound(_('%s not found up to revision %d')
579 579 % (path, stop))
580 580
581 581 # stat() gives us the previous revision on this line of
582 582 # development, but it might be in *another module*. Fetch the
583 583 # log and detect renames down to the latest revision.
584 584 stream = self._getlog([path], stop, dirent.created_rev)
585 585 try:
586 586 for entry in stream:
587 587 paths, revnum, author, date, message = entry
588 588 if revnum <= dirent.created_rev:
589 589 break
590 590
591 591 for p in paths:
592 592 if not path.startswith(p) or not paths[p].copyfrom_path:
593 593 continue
594 594 newpath = paths[p].copyfrom_path + path[len(p):]
595 595 self.ui.debug("branch renamed from %s to %s at %d\n" %
596 596 (path, newpath, revnum))
597 597 path = newpath
598 598 break
599 599 finally:
600 600 stream.close()
601 601
602 602 if not path.startswith(self.rootmodule):
603 603 self.ui.debug('ignoring foreign branch %r\n' % path)
604 604 return None
605 605 return self.revid(dirent.created_rev, path)
606 606
607 607 def reparent(self, module):
608 608 """Reparent the svn transport and return the previous parent."""
609 609 if self.prevmodule == module:
610 610 return module
611 611 svnurl = self.baseurl + quote(module)
612 612 prevmodule = self.prevmodule
613 613 if prevmodule is None:
614 614 prevmodule = ''
615 615 self.ui.debug("reparent to %s\n" % svnurl)
616 616 svn.ra.reparent(self.ra, svnurl)
617 617 self.prevmodule = module
618 618 return prevmodule
619 619
620 620 def expandpaths(self, rev, paths, parents):
621 621 changed, removed = set(), set()
622 622 copies = {}
623 623
624 624 new_module, revnum = revsplit(rev)[1:]
625 625 if new_module != self.module:
626 626 self.module = new_module
627 627 self.reparent(self.module)
628 628
629 629 for i, (path, ent) in enumerate(paths):
630 630 self.ui.progress(_('scanning paths'), i, item=path,
631 631 total=len(paths))
632 632 entrypath = self.getrelpath(path)
633 633
634 634 kind = self._checkpath(entrypath, revnum)
635 635 if kind == svn.core.svn_node_file:
636 636 changed.add(self.recode(entrypath))
637 637 if not ent.copyfrom_path or not parents:
638 638 continue
639 639 # Copy sources not in parent revisions cannot be
640 640 # represented, ignore their origin for now
641 641 pmodule, prevnum = revsplit(parents[0])[1:]
642 642 if ent.copyfrom_rev < prevnum:
643 643 continue
644 644 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
645 645 if not copyfrom_path:
646 646 continue
647 647 self.ui.debug("copied to %s from %s@%s\n" %
648 648 (entrypath, copyfrom_path, ent.copyfrom_rev))
649 649 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
650 650 elif kind == 0: # gone, but had better be a deleted *file*
651 651 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
652 652 pmodule, prevnum = revsplit(parents[0])[1:]
653 653 parentpath = pmodule + "/" + entrypath
654 654 fromkind = self._checkpath(entrypath, prevnum, pmodule)
655 655
656 656 if fromkind == svn.core.svn_node_file:
657 657 removed.add(self.recode(entrypath))
658 658 elif fromkind == svn.core.svn_node_dir:
659 659 oroot = parentpath.strip('/')
660 660 nroot = path.strip('/')
661 661 children = self._iterfiles(oroot, prevnum)
662 662 for childpath in children:
663 663 childpath = childpath.replace(oroot, nroot)
664 664 childpath = self.getrelpath("/" + childpath, pmodule)
665 665 if childpath:
666 666 removed.add(self.recode(childpath))
667 667 else:
668 668 self.ui.debug('unknown path in revision %d: %s\n' % \
669 669 (revnum, path))
670 670 elif kind == svn.core.svn_node_dir:
671 671 if ent.action == 'M':
672 672 # If the directory just had a prop change,
673 673 # then we shouldn't need to look for its children.
674 674 continue
675 675 if ent.action == 'R' and parents:
676 676 # If a directory is replacing a file, mark the previous
677 677 # file as deleted
678 678 pmodule, prevnum = revsplit(parents[0])[1:]
679 679 pkind = self._checkpath(entrypath, prevnum, pmodule)
680 680 if pkind == svn.core.svn_node_file:
681 681 removed.add(self.recode(entrypath))
682 682 elif pkind == svn.core.svn_node_dir:
683 683 # We do not know what files were kept or removed,
684 684 # mark them all as changed.
685 685 for childpath in self._iterfiles(pmodule, prevnum):
686 686 childpath = self.getrelpath("/" + childpath)
687 687 if childpath:
688 688 changed.add(self.recode(childpath))
689 689
690 690 for childpath in self._iterfiles(path, revnum):
691 691 childpath = self.getrelpath("/" + childpath)
692 692 if childpath:
693 693 changed.add(self.recode(childpath))
694 694
695 695 # Handle directory copies
696 696 if not ent.copyfrom_path or not parents:
697 697 continue
698 698 # Copy sources not in parent revisions cannot be
699 699 # represented, ignore their origin for now
700 700 pmodule, prevnum = revsplit(parents[0])[1:]
701 701 if ent.copyfrom_rev < prevnum:
702 702 continue
703 703 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
704 704 if not copyfrompath:
705 705 continue
706 706 self.ui.debug("mark %s came from %s:%d\n"
707 707 % (path, copyfrompath, ent.copyfrom_rev))
708 708 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
709 709 for childpath in children:
710 710 childpath = self.getrelpath("/" + childpath, pmodule)
711 711 if not childpath:
712 712 continue
713 713 copytopath = path + childpath[len(copyfrompath):]
714 714 copytopath = self.getrelpath(copytopath)
715 715 copies[self.recode(copytopath)] = self.recode(childpath)
716 716
717 717 self.ui.progress(_('scanning paths'), None)
718 718 changed.update(removed)
719 719 return (list(changed), removed, copies)
720 720
721 721 def _fetch_revisions(self, from_revnum, to_revnum):
722 722 if from_revnum < to_revnum:
723 723 from_revnum, to_revnum = to_revnum, from_revnum
724 724
725 725 self.child_cset = None
726 726
727 727 def parselogentry(orig_paths, revnum, author, date, message):
728 728 """Return the parsed commit object or None, and True if
729 729 the revision is a branch root.
730 730 """
731 731 self.ui.debug("parsing revision %d (%d changes)\n" %
732 732 (revnum, len(orig_paths)))
733 733
734 734 branched = False
735 735 rev = self.revid(revnum)
736 736 # branch log might return entries for a parent we already have
737 737
738 738 if rev in self.commits or revnum < to_revnum:
739 739 return None, branched
740 740
741 741 parents = []
742 742 # check whether this revision is the start of a branch or part
743 743 # of a branch renaming
744 744 orig_paths = sorted(orig_paths.iteritems())
745 745 root_paths = [(p, e) for p, e in orig_paths
746 746 if self.module.startswith(p)]
747 747 if root_paths:
748 748 path, ent = root_paths[-1]
749 749 if ent.copyfrom_path:
750 750 branched = True
751 751 newpath = ent.copyfrom_path + self.module[len(path):]
752 752 # ent.copyfrom_rev may not be the actual last revision
753 753 previd = self.latest(newpath, ent.copyfrom_rev)
754 754 if previd is not None:
755 755 prevmodule, prevnum = revsplit(previd)[1:]
756 756 if prevnum >= self.startrev:
757 757 parents = [previd]
758 758 self.ui.note(
759 759 _('found parent of branch %s at %d: %s\n') %
760 760 (self.module, prevnum, prevmodule))
761 761 else:
762 762 self.ui.debug("no copyfrom path, don't know what to do.\n")
763 763
764 764 paths = []
765 765 # filter out unrelated paths
766 766 for path, ent in orig_paths:
767 767 if self.getrelpath(path) is None:
768 768 continue
769 769 paths.append((path, ent))
770 770
771 771 # Example SVN datetime. Includes microseconds.
772 772 # ISO-8601 conformant
773 773 # '2007-01-04T17:35:00.902377Z'
774 774 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
775 775
776 776 log = message and self.recode(message) or ''
777 777 author = author and self.recode(author) or ''
778 778 try:
779 779 branch = self.module.split("/")[-1]
780 780 if branch == self.trunkname:
781 781 branch = None
782 782 except IndexError:
783 783 branch = None
784 784
785 785 cset = commit(author=author,
786 786 date=util.datestr(date),
787 787 desc=log,
788 788 parents=parents,
789 789 branch=branch,
790 790 rev=rev)
791 791
792 792 self.commits[rev] = cset
793 793 # The parents list is *shared* among self.paths and the
794 794 # commit object. Both will be updated below.
795 795 self.paths[rev] = (paths, cset.parents)
796 796 if self.child_cset and not self.child_cset.parents:
797 797 self.child_cset.parents[:] = [rev]
798 798 self.child_cset = cset
799 799 return cset, branched
800 800
801 801 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
802 802 (self.module, from_revnum, to_revnum))
803 803
804 804 try:
805 805 firstcset = None
806 806 lastonbranch = False
807 807 stream = self._getlog([self.module], from_revnum, to_revnum)
808 808 try:
809 809 for entry in stream:
810 810 paths, revnum, author, date, message = entry
811 811 if revnum < self.startrev:
812 812 lastonbranch = True
813 813 break
814 814 if not paths:
815 815 self.ui.debug('revision %d has no entries\n' % revnum)
816 816 # If we ever leave the loop on an empty
817 817 # revision, do not try to get a parent branch
818 818 lastonbranch = lastonbranch or revnum == 0
819 819 continue
820 820 cset, lastonbranch = parselogentry(paths, revnum, author,
821 821 date, message)
822 822 if cset:
823 823 firstcset = cset
824 824 if lastonbranch:
825 825 break
826 826 finally:
827 827 stream.close()
828 828
829 829 if not lastonbranch and firstcset and not firstcset.parents:
830 830 # The first revision of the sequence (the last fetched one)
831 831 # has invalid parents if not a branch root. Find the parent
832 832 # revision now, if any.
833 833 try:
834 834 firstrevnum = self.revnum(firstcset.rev)
835 835 if firstrevnum > 1:
836 836 latest = self.latest(self.module, firstrevnum - 1)
837 837 if latest:
838 838 firstcset.parents.append(latest)
839 839 except SvnPathNotFound:
840 840 pass
841 841 except SubversionException, (inst, num):
842 842 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
843 843 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
844 844 raise
845 845
846 846 def getfile(self, file, rev):
847 847 # TODO: ra.get_file transmits the whole file instead of diffs.
848 848 if file in self.removed:
849 849 raise IOError()
850 850 mode = ''
851 851 try:
852 852 new_module, revnum = revsplit(rev)[1:]
853 853 if self.module != new_module:
854 854 self.module = new_module
855 855 self.reparent(self.module)
856 856 io = StringIO()
857 857 info = svn.ra.get_file(self.ra, file, revnum, io)
858 858 data = io.getvalue()
859 859 # ra.get_files() seems to keep a reference on the input buffer
860 860 # preventing collection. Release it explicitely.
861 861 io.close()
862 862 if isinstance(info, list):
863 863 info = info[-1]
864 864 mode = ("svn:executable" in info) and 'x' or ''
865 865 mode = ("svn:special" in info) and 'l' or mode
866 866 except SubversionException, e:
867 867 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
868 868 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
869 869 if e.apr_err in notfound: # File not found
870 870 raise IOError()
871 871 raise
872 872 if mode == 'l':
873 873 link_prefix = "link "
874 874 if data.startswith(link_prefix):
875 875 data = data[len(link_prefix):]
876 876 return data, mode
877 877
878 878 def _iterfiles(self, path, revnum):
879 879 """Enumerate all files in path at revnum, recursively."""
880 880 path = path.strip('/')
881 881 pool = Pool()
882 882 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
883 883 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
884 884 if path:
885 885 path += '/'
886 886 return ((path + p) for p, e in entries.iteritems()
887 887 if e.kind == svn.core.svn_node_file)
888 888
889 889 def getrelpath(self, path, module=None):
890 890 if module is None:
891 891 module = self.module
892 892 # Given the repository url of this wc, say
893 893 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
894 894 # extract the "entry" portion (a relative path) from what
895 895 # svn log --xml says, ie
896 896 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
897 897 # that is to say "tests/PloneTestCase.py"
898 898 if path.startswith(module):
899 899 relative = path.rstrip('/')[len(module):]
900 900 if relative.startswith('/'):
901 901 return relative[1:]
902 902 elif relative == '':
903 903 return relative
904 904
905 905 # The path is outside our tracked tree...
906 906 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
907 907 return None
908 908
909 909 def _checkpath(self, path, revnum, module=None):
910 910 if module is not None:
911 911 prevmodule = self.reparent('')
912 912 path = module + '/' + path
913 913 try:
914 914 # ra.check_path does not like leading slashes very much, it leads
915 915 # to PROPFIND subversion errors
916 916 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
917 917 finally:
918 918 if module is not None:
919 919 self.reparent(prevmodule)
920 920
921 921 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
922 922 strict_node_history=False):
923 923 # Normalize path names, svn >= 1.5 only wants paths relative to
924 924 # supplied URL
925 925 relpaths = []
926 926 for p in paths:
927 927 if not p.startswith('/'):
928 928 p = self.module + '/' + p
929 929 relpaths.append(p.strip('/'))
930 930 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
931 931 strict_node_history]
932 932 arg = encodeargs(args)
933 933 hgexe = util.hgexecutable()
934 934 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
935 935 stdin, stdout = util.popen2(util.quotecommand(cmd))
936 936 stdin.write(arg)
937 937 try:
938 938 stdin.close()
939 939 except IOError:
940 940 raise util.Abort(_('Mercurial failed to run itself, check'
941 941 ' hg executable is in PATH'))
942 942 return logstream(stdout)
943 943
944 944 pre_revprop_change = '''#!/bin/sh
945 945
946 946 REPOS="$1"
947 947 REV="$2"
948 948 USER="$3"
949 949 PROPNAME="$4"
950 950 ACTION="$5"
951 951
952 952 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
953 953 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
954 954 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
955 955
956 956 echo "Changing prohibited revision property" >&2
957 957 exit 1
958 958 '''
959 959
960 960 class svn_sink(converter_sink, commandline):
961 961 commit_re = re.compile(r'Committed revision (\d+).', re.M)
962 962 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
963 963
964 964 def prerun(self):
965 965 if self.wc:
966 966 os.chdir(self.wc)
967 967
968 968 def postrun(self):
969 969 if self.wc:
970 970 os.chdir(self.cwd)
971 971
972 972 def join(self, name):
973 973 return os.path.join(self.wc, '.svn', name)
974 974
975 975 def revmapfile(self):
976 976 return self.join('hg-shamap')
977 977
978 978 def authorfile(self):
979 979 return self.join('hg-authormap')
980 980
981 981 def __init__(self, ui, path):
982 982
983 983 converter_sink.__init__(self, ui, path)
984 984 commandline.__init__(self, ui, 'svn')
985 985 self.delete = []
986 986 self.setexec = []
987 987 self.delexec = []
988 988 self.copies = []
989 989 self.wc = None
990 990 self.cwd = os.getcwd()
991 991
992 992 path = os.path.realpath(path)
993 993
994 994 created = False
995 995 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
996 996 self.wc = path
997 997 self.run0('update')
998 998 else:
999 999 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
1000 1000
1001 1001 if os.path.isdir(os.path.dirname(path)):
1002 1002 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1003 1003 ui.status(_('initializing svn repository %r\n') %
1004 1004 os.path.basename(path))
1005 1005 commandline(ui, 'svnadmin').run0('create', path)
1006 1006 created = path
1007 1007 path = util.normpath(path)
1008 1008 if not path.startswith('/'):
1009 1009 path = '/' + path
1010 1010 path = 'file://' + path
1011 1011
1012 1012 ui.status(_('initializing svn working copy %r\n')
1013 1013 % os.path.basename(wcpath))
1014 1014 self.run0('checkout', path, wcpath)
1015 1015
1016 1016 self.wc = wcpath
1017 1017 self.opener = scmutil.opener(self.wc)
1018 1018 self.wopener = scmutil.opener(self.wc)
1019 1019 self.childmap = mapfile(ui, self.join('hg-childmap'))
1020 1020 self.is_exec = util.checkexec(self.wc) and util.isexec or None
1021 1021
1022 1022 if created:
1023 1023 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1024 1024 fp = open(hook, 'w')
1025 1025 fp.write(pre_revprop_change)
1026 1026 fp.close()
1027 1027 util.setflags(hook, False, True)
1028 1028
1029 1029 output = self.run0('info')
1030 1030 self.uuid = self.uuid_re.search(output).group(1).strip()
1031 1031
1032 1032 def wjoin(self, *names):
1033 1033 return os.path.join(self.wc, *names)
1034 1034
1035 1035 def putfile(self, filename, flags, data):
1036 1036 if 'l' in flags:
1037 1037 self.wopener.symlink(data, filename)
1038 1038 else:
1039 1039 try:
1040 1040 if os.path.islink(self.wjoin(filename)):
1041 1041 os.unlink(filename)
1042 1042 except OSError:
1043 1043 pass
1044 1044 self.wopener.write(filename, data)
1045 1045
1046 1046 if self.is_exec:
1047 1047 was_exec = self.is_exec(self.wjoin(filename))
1048 1048 else:
1049 1049 # On filesystems not supporting execute-bit, there is no way
1050 1050 # to know if it is set but asking subversion. Setting it
1051 1051 # systematically is just as expensive and much simpler.
1052 1052 was_exec = 'x' not in flags
1053 1053
1054 1054 util.setflags(self.wjoin(filename), False, 'x' in flags)
1055 1055 if was_exec:
1056 1056 if 'x' not in flags:
1057 1057 self.delexec.append(filename)
1058 1058 else:
1059 1059 if 'x' in flags:
1060 1060 self.setexec.append(filename)
1061 1061
1062 1062 def _copyfile(self, source, dest):
1063 1063 # SVN's copy command pukes if the destination file exists, but
1064 1064 # our copyfile method expects to record a copy that has
1065 1065 # already occurred. Cross the semantic gap.
1066 1066 wdest = self.wjoin(dest)
1067 1067 exists = os.path.lexists(wdest)
1068 1068 if exists:
1069 1069 fd, tempname = tempfile.mkstemp(
1070 1070 prefix='hg-copy-', dir=os.path.dirname(wdest))
1071 1071 os.close(fd)
1072 1072 os.unlink(tempname)
1073 1073 os.rename(wdest, tempname)
1074 1074 try:
1075 1075 self.run0('copy', source, dest)
1076 1076 finally:
1077 1077 if exists:
1078 1078 try:
1079 1079 os.unlink(wdest)
1080 1080 except OSError:
1081 1081 pass
1082 1082 os.rename(tempname, wdest)
1083 1083
1084 1084 def dirs_of(self, files):
1085 1085 dirs = set()
1086 1086 for f in files:
1087 1087 if os.path.isdir(self.wjoin(f)):
1088 1088 dirs.add(f)
1089 1089 for i in strutil.rfindall(f, '/'):
1090 1090 dirs.add(f[:i])
1091 1091 return dirs
1092 1092
1093 1093 def add_dirs(self, files):
1094 1094 add_dirs = [d for d in sorted(self.dirs_of(files))
1095 1095 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1096 1096 if add_dirs:
1097 1097 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1098 1098 return add_dirs
1099 1099
1100 1100 def add_files(self, files):
1101 1101 if files:
1102 1102 self.xargs(files, 'add', quiet=True)
1103 1103 return files
1104 1104
1105 1105 def tidy_dirs(self, names):
1106 1106 deleted = []
1107 1107 for d in sorted(self.dirs_of(names), reverse=True):
1108 1108 wd = self.wjoin(d)
1109 1109 if os.listdir(wd) == '.svn':
1110 1110 self.run0('delete', d)
1111 1111 deleted.append(d)
1112 1112 return deleted
1113 1113
1114 1114 def addchild(self, parent, child):
1115 1115 self.childmap[parent] = child
1116 1116
1117 1117 def revid(self, rev):
1118 1118 return u"svn:%s@%s" % (self.uuid, rev)
1119 1119
1120 1120 def putcommit(self, files, copies, parents, commit, source, revmap):
1121 1121 for parent in parents:
1122 1122 try:
1123 1123 return self.revid(self.childmap[parent])
1124 1124 except KeyError:
1125 1125 pass
1126 1126
1127 1127 # Apply changes to working copy
1128 1128 for f, v in files:
1129 1129 try:
1130 1130 data, mode = source.getfile(f, v)
1131 1131 except IOError:
1132 1132 self.delete.append(f)
1133 1133 else:
1134 1134 self.putfile(f, mode, data)
1135 1135 if f in copies:
1136 1136 self.copies.append([copies[f], f])
1137 1137 files = [f[0] for f in files]
1138 1138
1139 1139 entries = set(self.delete)
1140 1140 files = frozenset(files)
1141 1141 entries.update(self.add_dirs(files.difference(entries)))
1142 1142 if self.copies:
1143 1143 for s, d in self.copies:
1144 1144 self._copyfile(s, d)
1145 1145 self.copies = []
1146 1146 if self.delete:
1147 1147 self.xargs(self.delete, 'delete')
1148 1148 self.delete = []
1149 1149 entries.update(self.add_files(files.difference(entries)))
1150 1150 entries.update(self.tidy_dirs(entries))
1151 1151 if self.delexec:
1152 1152 self.xargs(self.delexec, 'propdel', 'svn:executable')
1153 1153 self.delexec = []
1154 1154 if self.setexec:
1155 1155 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1156 1156 self.setexec = []
1157 1157
1158 1158 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1159 1159 fp = os.fdopen(fd, 'w')
1160 1160 fp.write(commit.desc)
1161 1161 fp.close()
1162 1162 try:
1163 1163 output = self.run0('commit',
1164 1164 username=util.shortuser(commit.author),
1165 1165 file=messagefile,
1166 1166 encoding='utf-8')
1167 1167 try:
1168 1168 rev = self.commit_re.search(output).group(1)
1169 1169 except AttributeError:
1170 1170 if not files:
1171 1171 return parents[0]
1172 1172 self.ui.warn(_('unexpected svn output:\n'))
1173 1173 self.ui.warn(output)
1174 1174 raise util.Abort(_('unable to cope with svn output'))
1175 1175 if commit.rev:
1176 1176 self.run('propset', 'hg:convert-rev', commit.rev,
1177 1177 revprop=True, revision=rev)
1178 1178 if commit.branch and commit.branch != 'default':
1179 1179 self.run('propset', 'hg:convert-branch', commit.branch,
1180 1180 revprop=True, revision=rev)
1181 1181 for parent in parents:
1182 1182 self.addchild(parent, rev)
1183 1183 return self.revid(rev)
1184 1184 finally:
1185 1185 os.unlink(messagefile)
1186 1186
1187 1187 def puttags(self, tags):
1188 1188 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1189 1189 return None, None
General Comments 0
You need to be logged in to leave comments. Login now