##// END OF EJS Templates
convert: subversion convert abort on revision not found (issue 3205)
Jesus Espino Garcia -
r15970:9f2ed48f stable
parent child Browse files
Show More
@@ -1,1187 +1,1189 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, scmutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def revsplit(rev):
45 45 """Parse a revision string and return (uuid, path, revnum)."""
46 46 url, revnum = rev.rsplit('@', 1)
47 47 parts = url.split('/', 1)
48 48 mod = ''
49 49 if len(parts) > 1:
50 50 mod = '/' + parts[1]
51 51 return parts[0][4:], mod, int(revnum)
52 52
53 53 def quote(s):
54 54 # As of svn 1.7, many svn calls expect "canonical" paths. In
55 55 # theory, we should call svn.core.*canonicalize() on all paths
56 56 # before passing them to the API. Instead, we assume the base url
57 57 # is canonical and copy the behaviour of svn URL encoding function
58 58 # so we can extend it safely with new components. The "safe"
59 59 # characters were taken from the "svn_uri__char_validity" table in
60 60 # libsvn_subr/path.c.
61 61 return urllib.quote(s, "!$&'()*+,-./:=@_~")
62 62
63 63 def geturl(path):
64 64 try:
65 65 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
66 66 except SubversionException:
67 67 # svn.client.url_from_path() fails with local repositories
68 68 pass
69 69 if os.path.isdir(path):
70 70 path = os.path.normpath(os.path.abspath(path))
71 71 if os.name == 'nt':
72 72 path = '/' + util.normpath(path)
73 73 # Module URL is later compared with the repository URL returned
74 74 # by svn API, which is UTF-8.
75 75 path = encoding.tolocal(path)
76 76 path = 'file://%s' % quote(path)
77 77 return svn.core.svn_path_canonicalize(path)
78 78
79 79 def optrev(number):
80 80 optrev = svn.core.svn_opt_revision_t()
81 81 optrev.kind = svn.core.svn_opt_revision_number
82 82 optrev.value.number = number
83 83 return optrev
84 84
85 85 class changedpath(object):
86 86 def __init__(self, p):
87 87 self.copyfrom_path = p.copyfrom_path
88 88 self.copyfrom_rev = p.copyfrom_rev
89 89 self.action = p.action
90 90
91 91 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
92 92 strict_node_history=False):
93 93 protocol = -1
94 94 def receiver(orig_paths, revnum, author, date, message, pool):
95 95 if orig_paths is not None:
96 96 for k, v in orig_paths.iteritems():
97 97 orig_paths[k] = changedpath(v)
98 98 pickle.dump((orig_paths, revnum, author, date, message),
99 99 fp, protocol)
100 100
101 101 try:
102 102 # Use an ra of our own so that our parent can consume
103 103 # our results without confusing the server.
104 104 t = transport.SvnRaTransport(url=url)
105 105 svn.ra.get_log(t.ra, paths, start, end, limit,
106 106 discover_changed_paths,
107 107 strict_node_history,
108 108 receiver)
109 109 except IOError:
110 110 # Caller may interrupt the iteration
111 111 pickle.dump(None, fp, protocol)
112 112 except Exception, inst:
113 113 pickle.dump(str(inst), fp, protocol)
114 114 else:
115 115 pickle.dump(None, fp, protocol)
116 116 fp.close()
117 117 # With large history, cleanup process goes crazy and suddenly
118 118 # consumes *huge* amount of memory. The output file being closed,
119 119 # there is no need for clean termination.
120 120 os._exit(0)
121 121
122 122 def debugsvnlog(ui, **opts):
123 123 """Fetch SVN log in a subprocess and channel them back to parent to
124 124 avoid memory collection issues.
125 125 """
126 126 util.setbinary(sys.stdin)
127 127 util.setbinary(sys.stdout)
128 128 args = decodeargs(sys.stdin.read())
129 129 get_log_child(sys.stdout, *args)
130 130
131 131 class logstream(object):
132 132 """Interruptible revision log iterator."""
133 133 def __init__(self, stdout):
134 134 self._stdout = stdout
135 135
136 136 def __iter__(self):
137 137 while True:
138 138 try:
139 139 entry = pickle.load(self._stdout)
140 140 except EOFError:
141 141 raise util.Abort(_('Mercurial failed to run itself, check'
142 142 ' hg executable is in PATH'))
143 143 try:
144 144 orig_paths, revnum, author, date, message = entry
145 145 except:
146 146 if entry is None:
147 147 break
148 148 raise util.Abort(_("log stream exception '%s'") % entry)
149 149 yield entry
150 150
151 151 def close(self):
152 152 if self._stdout:
153 153 self._stdout.close()
154 154 self._stdout = None
155 155
156 156
157 157 # Check to see if the given path is a local Subversion repo. Verify this by
158 158 # looking for several svn-specific files and directories in the given
159 159 # directory.
160 160 def filecheck(ui, path, proto):
161 161 for x in ('locks', 'hooks', 'format', 'db'):
162 162 if not os.path.exists(os.path.join(path, x)):
163 163 return False
164 164 return True
165 165
166 166 # Check to see if a given path is the root of an svn repo over http. We verify
167 167 # this by requesting a version-controlled URL we know can't exist and looking
168 168 # for the svn-specific "not found" XML.
169 169 def httpcheck(ui, path, proto):
170 170 try:
171 171 opener = urllib2.build_opener()
172 172 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
173 173 data = rsp.read()
174 174 except urllib2.HTTPError, inst:
175 175 if inst.code != 404:
176 176 # Except for 404 we cannot know for sure this is not an svn repo
177 177 ui.warn(_('svn: cannot probe remote repository, assume it could '
178 178 'be a subversion repository. Use --source-type if you '
179 179 'know better.\n'))
180 180 return True
181 181 data = inst.fp.read()
182 182 except:
183 183 # Could be urllib2.URLError if the URL is invalid or anything else.
184 184 return False
185 185 return '<m:human-readable errcode="160013">' in data
186 186
187 187 protomap = {'http': httpcheck,
188 188 'https': httpcheck,
189 189 'file': filecheck,
190 190 }
191 191 def issvnurl(ui, url):
192 192 try:
193 193 proto, path = url.split('://', 1)
194 194 if proto == 'file':
195 195 path = urllib.url2pathname(path)
196 196 except ValueError:
197 197 proto = 'file'
198 198 path = os.path.abspath(url)
199 199 if proto == 'file':
200 200 path = path.replace(os.sep, '/')
201 201 check = protomap.get(proto, lambda *args: False)
202 202 while '/' in path:
203 203 if check(ui, path, proto):
204 204 return True
205 205 path = path.rsplit('/', 1)[0]
206 206 return False
207 207
208 208 # SVN conversion code stolen from bzr-svn and tailor
209 209 #
210 210 # Subversion looks like a versioned filesystem, branches structures
211 211 # are defined by conventions and not enforced by the tool. First,
212 212 # we define the potential branches (modules) as "trunk" and "branches"
213 213 # children directories. Revisions are then identified by their
214 214 # module and revision number (and a repository identifier).
215 215 #
216 216 # The revision graph is really a tree (or a forest). By default, a
217 217 # revision parent is the previous revision in the same module. If the
218 218 # module directory is copied/moved from another module then the
219 219 # revision is the module root and its parent the source revision in
220 220 # the parent module. A revision has at most one parent.
221 221 #
222 222 class svn_source(converter_source):
223 223 def __init__(self, ui, url, rev=None):
224 224 super(svn_source, self).__init__(ui, url, rev=rev)
225 225
226 226 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
227 227 (os.path.exists(url) and
228 228 os.path.exists(os.path.join(url, '.svn'))) or
229 229 issvnurl(ui, url)):
230 230 raise NoRepo(_("%s does not look like a Subversion repository")
231 231 % url)
232 232 if svn is None:
233 233 raise MissingTool(_('Could not load Subversion python bindings'))
234 234
235 235 try:
236 236 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
237 237 if version < (1, 4):
238 238 raise MissingTool(_('Subversion python bindings %d.%d found, '
239 239 '1.4 or later required') % version)
240 240 except AttributeError:
241 241 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
242 242 'or later required'))
243 243
244 244 self.lastrevs = {}
245 245
246 246 latest = None
247 247 try:
248 248 # Support file://path@rev syntax. Useful e.g. to convert
249 249 # deleted branches.
250 250 at = url.rfind('@')
251 251 if at >= 0:
252 252 latest = int(url[at + 1:])
253 253 url = url[:at]
254 254 except ValueError:
255 255 pass
256 256 self.url = geturl(url)
257 257 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
258 258 try:
259 259 self.transport = transport.SvnRaTransport(url=self.url)
260 260 self.ra = self.transport.ra
261 261 self.ctx = self.transport.client
262 262 self.baseurl = svn.ra.get_repos_root(self.ra)
263 263 # Module is either empty or a repository path starting with
264 264 # a slash and not ending with a slash.
265 265 self.module = urllib.unquote(self.url[len(self.baseurl):])
266 266 self.prevmodule = None
267 267 self.rootmodule = self.module
268 268 self.commits = {}
269 269 self.paths = {}
270 270 self.uuid = svn.ra.get_uuid(self.ra)
271 271 except SubversionException:
272 272 ui.traceback()
273 273 raise NoRepo(_("%s does not look like a Subversion repository")
274 274 % self.url)
275 275
276 276 if rev:
277 277 try:
278 278 latest = int(rev)
279 279 except ValueError:
280 280 raise util.Abort(_('svn: revision %s is not an integer') % rev)
281 281
282 282 self.trunkname = self.ui.config('convert', 'svn.trunk', 'trunk').strip('/')
283 283 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
284 284 try:
285 285 self.startrev = int(self.startrev)
286 286 if self.startrev < 0:
287 287 self.startrev = 0
288 288 except ValueError:
289 289 raise util.Abort(_('svn: start revision %s is not an integer')
290 290 % self.startrev)
291 291
292 292 try:
293 293 self.head = self.latest(self.module, latest)
294 294 except SvnPathNotFound:
295 295 self.head = None
296 296 if not self.head:
297 297 raise util.Abort(_('no revision found in module %s')
298 298 % self.module)
299 299 self.last_changed = self.revnum(self.head)
300 300
301 301 self._changescache = None
302 302
303 303 if os.path.exists(os.path.join(url, '.svn/entries')):
304 304 self.wc = url
305 305 else:
306 306 self.wc = None
307 307 self.convertfp = None
308 308
309 309 def setrevmap(self, revmap):
310 310 lastrevs = {}
311 311 for revid in revmap.iterkeys():
312 312 uuid, module, revnum = revsplit(revid)
313 313 lastrevnum = lastrevs.setdefault(module, revnum)
314 314 if revnum > lastrevnum:
315 315 lastrevs[module] = revnum
316 316 self.lastrevs = lastrevs
317 317
318 318 def exists(self, path, optrev):
319 319 try:
320 320 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
321 321 optrev, False, self.ctx)
322 322 return True
323 323 except SubversionException:
324 324 return False
325 325
326 326 def getheads(self):
327 327
328 328 def isdir(path, revnum):
329 329 kind = self._checkpath(path, revnum)
330 330 return kind == svn.core.svn_node_dir
331 331
332 332 def getcfgpath(name, rev):
333 333 cfgpath = self.ui.config('convert', 'svn.' + name)
334 334 if cfgpath is not None and cfgpath.strip() == '':
335 335 return None
336 336 path = (cfgpath or name).strip('/')
337 337 if not self.exists(path, rev):
338 338 if self.module.endswith(path) and name == 'trunk':
339 339 # we are converting from inside this directory
340 340 return None
341 341 if cfgpath:
342 342 raise util.Abort(_('expected %s to be at %r, but not found')
343 343 % (name, path))
344 344 return None
345 345 self.ui.note(_('found %s at %r\n') % (name, path))
346 346 return path
347 347
348 348 rev = optrev(self.last_changed)
349 349 oldmodule = ''
350 350 trunk = getcfgpath('trunk', rev)
351 351 self.tags = getcfgpath('tags', rev)
352 352 branches = getcfgpath('branches', rev)
353 353
354 354 # If the project has a trunk or branches, we will extract heads
355 355 # from them. We keep the project root otherwise.
356 356 if trunk:
357 357 oldmodule = self.module or ''
358 358 self.module += '/' + trunk
359 359 self.head = self.latest(self.module, self.last_changed)
360 360 if not self.head:
361 361 raise util.Abort(_('no revision found in module %s')
362 362 % self.module)
363 363
364 364 # First head in the list is the module's head
365 365 self.heads = [self.head]
366 366 if self.tags is not None:
367 367 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
368 368
369 369 # Check if branches bring a few more heads to the list
370 370 if branches:
371 371 rpath = self.url.strip('/')
372 372 branchnames = svn.client.ls(rpath + '/' + quote(branches),
373 373 rev, False, self.ctx)
374 374 for branch in branchnames.keys():
375 375 module = '%s/%s/%s' % (oldmodule, branches, branch)
376 376 if not isdir(module, self.last_changed):
377 377 continue
378 378 brevid = self.latest(module, self.last_changed)
379 379 if not brevid:
380 380 self.ui.note(_('ignoring empty branch %s\n') % branch)
381 381 continue
382 382 self.ui.note(_('found branch %s at %d\n') %
383 383 (branch, self.revnum(brevid)))
384 384 self.heads.append(brevid)
385 385
386 386 if self.startrev and self.heads:
387 387 if len(self.heads) > 1:
388 388 raise util.Abort(_('svn: start revision is not supported '
389 389 'with more than one branch'))
390 390 revnum = self.revnum(self.heads[0])
391 391 if revnum < self.startrev:
392 392 raise util.Abort(
393 393 _('svn: no revision found after start revision %d')
394 394 % self.startrev)
395 395
396 396 return self.heads
397 397
398 398 def getchanges(self, rev):
399 399 if self._changescache and self._changescache[0] == rev:
400 400 return self._changescache[1]
401 401 self._changescache = None
402 402 (paths, parents) = self.paths[rev]
403 403 if parents:
404 404 files, self.removed, copies = self.expandpaths(rev, paths, parents)
405 405 else:
406 406 # Perform a full checkout on roots
407 407 uuid, module, revnum = revsplit(rev)
408 408 entries = svn.client.ls(self.baseurl + quote(module),
409 409 optrev(revnum), True, self.ctx)
410 410 files = [n for n, e in entries.iteritems()
411 411 if e.kind == svn.core.svn_node_file]
412 412 copies = {}
413 413 self.removed = set()
414 414
415 415 files.sort()
416 416 files = zip(files, [rev] * len(files))
417 417
418 418 # caller caches the result, so free it here to release memory
419 419 del self.paths[rev]
420 420 return (files, copies)
421 421
422 422 def getchangedfiles(self, rev, i):
423 423 changes = self.getchanges(rev)
424 424 self._changescache = (rev, changes)
425 425 return [f[0] for f in changes[0]]
426 426
427 427 def getcommit(self, rev):
428 428 if rev not in self.commits:
429 429 uuid, module, revnum = revsplit(rev)
430 430 self.module = module
431 431 self.reparent(module)
432 432 # We assume that:
433 433 # - requests for revisions after "stop" come from the
434 434 # revision graph backward traversal. Cache all of them
435 435 # down to stop, they will be used eventually.
436 436 # - requests for revisions before "stop" come to get
437 437 # isolated branches parents. Just fetch what is needed.
438 438 stop = self.lastrevs.get(module, 0)
439 439 if revnum < stop:
440 440 stop = revnum + 1
441 441 self._fetch_revisions(revnum, stop)
442 if rev not in self.commits:
443 raise util.Abort(_('svn: revision %s not found') % revnum)
442 444 commit = self.commits[rev]
443 445 # caller caches the result, so free it here to release memory
444 446 del self.commits[rev]
445 447 return commit
446 448
447 449 def gettags(self):
448 450 tags = {}
449 451 if self.tags is None:
450 452 return tags
451 453
452 454 # svn tags are just a convention, project branches left in a
453 455 # 'tags' directory. There is no other relationship than
454 456 # ancestry, which is expensive to discover and makes them hard
455 457 # to update incrementally. Worse, past revisions may be
456 458 # referenced by tags far away in the future, requiring a deep
457 459 # history traversal on every calculation. Current code
458 460 # performs a single backward traversal, tracking moves within
459 461 # the tags directory (tag renaming) and recording a new tag
460 462 # everytime a project is copied from outside the tags
461 463 # directory. It also lists deleted tags, this behaviour may
462 464 # change in the future.
463 465 pendings = []
464 466 tagspath = self.tags
465 467 start = svn.ra.get_latest_revnum(self.ra)
466 468 stream = self._getlog([self.tags], start, self.startrev)
467 469 try:
468 470 for entry in stream:
469 471 origpaths, revnum, author, date, message = entry
470 472 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
471 473 in origpaths.iteritems() if e.copyfrom_path]
472 474 # Apply moves/copies from more specific to general
473 475 copies.sort(reverse=True)
474 476
475 477 srctagspath = tagspath
476 478 if copies and copies[-1][2] == tagspath:
477 479 # Track tags directory moves
478 480 srctagspath = copies.pop()[0]
479 481
480 482 for source, sourcerev, dest in copies:
481 483 if not dest.startswith(tagspath + '/'):
482 484 continue
483 485 for tag in pendings:
484 486 if tag[0].startswith(dest):
485 487 tagpath = source + tag[0][len(dest):]
486 488 tag[:2] = [tagpath, sourcerev]
487 489 break
488 490 else:
489 491 pendings.append([source, sourcerev, dest])
490 492
491 493 # Filter out tags with children coming from different
492 494 # parts of the repository like:
493 495 # /tags/tag.1 (from /trunk:10)
494 496 # /tags/tag.1/foo (from /branches/foo:12)
495 497 # Here/tags/tag.1 discarded as well as its children.
496 498 # It happens with tools like cvs2svn. Such tags cannot
497 499 # be represented in mercurial.
498 500 addeds = dict((p, e.copyfrom_path) for p, e
499 501 in origpaths.iteritems()
500 502 if e.action == 'A' and e.copyfrom_path)
501 503 badroots = set()
502 504 for destroot in addeds:
503 505 for source, sourcerev, dest in pendings:
504 506 if (not dest.startswith(destroot + '/')
505 507 or source.startswith(addeds[destroot] + '/')):
506 508 continue
507 509 badroots.add(destroot)
508 510 break
509 511
510 512 for badroot in badroots:
511 513 pendings = [p for p in pendings if p[2] != badroot
512 514 and not p[2].startswith(badroot + '/')]
513 515
514 516 # Tell tag renamings from tag creations
515 517 renamings = []
516 518 for source, sourcerev, dest in pendings:
517 519 tagname = dest.split('/')[-1]
518 520 if source.startswith(srctagspath):
519 521 renamings.append([source, sourcerev, tagname])
520 522 continue
521 523 if tagname in tags:
522 524 # Keep the latest tag value
523 525 continue
524 526 # From revision may be fake, get one with changes
525 527 try:
526 528 tagid = self.latest(source, sourcerev)
527 529 if tagid and tagname not in tags:
528 530 tags[tagname] = tagid
529 531 except SvnPathNotFound:
530 532 # It happens when we are following directories
531 533 # we assumed were copied with their parents
532 534 # but were really created in the tag
533 535 # directory.
534 536 pass
535 537 pendings = renamings
536 538 tagspath = srctagspath
537 539 finally:
538 540 stream.close()
539 541 return tags
540 542
541 543 def converted(self, rev, destrev):
542 544 if not self.wc:
543 545 return
544 546 if self.convertfp is None:
545 547 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
546 548 'a')
547 549 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
548 550 self.convertfp.flush()
549 551
550 552 def revid(self, revnum, module=None):
551 553 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
552 554
553 555 def revnum(self, rev):
554 556 return int(rev.split('@')[-1])
555 557
556 558 def latest(self, path, stop=0):
557 559 """Find the latest revid affecting path, up to stop. It may return
558 560 a revision in a different module, since a branch may be moved without
559 561 a change being reported. Return None if computed module does not
560 562 belong to rootmodule subtree.
561 563 """
562 564 if not path.startswith(self.rootmodule):
563 565 # Requests on foreign branches may be forbidden at server level
564 566 self.ui.debug('ignoring foreign branch %r\n' % path)
565 567 return None
566 568
567 569 if not stop:
568 570 stop = svn.ra.get_latest_revnum(self.ra)
569 571 try:
570 572 prevmodule = self.reparent('')
571 573 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
572 574 self.reparent(prevmodule)
573 575 except SubversionException:
574 576 dirent = None
575 577 if not dirent:
576 578 raise SvnPathNotFound(_('%s not found up to revision %d')
577 579 % (path, stop))
578 580
579 581 # stat() gives us the previous revision on this line of
580 582 # development, but it might be in *another module*. Fetch the
581 583 # log and detect renames down to the latest revision.
582 584 stream = self._getlog([path], stop, dirent.created_rev)
583 585 try:
584 586 for entry in stream:
585 587 paths, revnum, author, date, message = entry
586 588 if revnum <= dirent.created_rev:
587 589 break
588 590
589 591 for p in paths:
590 592 if not path.startswith(p) or not paths[p].copyfrom_path:
591 593 continue
592 594 newpath = paths[p].copyfrom_path + path[len(p):]
593 595 self.ui.debug("branch renamed from %s to %s at %d\n" %
594 596 (path, newpath, revnum))
595 597 path = newpath
596 598 break
597 599 finally:
598 600 stream.close()
599 601
600 602 if not path.startswith(self.rootmodule):
601 603 self.ui.debug('ignoring foreign branch %r\n' % path)
602 604 return None
603 605 return self.revid(dirent.created_rev, path)
604 606
605 607 def reparent(self, module):
606 608 """Reparent the svn transport and return the previous parent."""
607 609 if self.prevmodule == module:
608 610 return module
609 611 svnurl = self.baseurl + quote(module)
610 612 prevmodule = self.prevmodule
611 613 if prevmodule is None:
612 614 prevmodule = ''
613 615 self.ui.debug("reparent to %s\n" % svnurl)
614 616 svn.ra.reparent(self.ra, svnurl)
615 617 self.prevmodule = module
616 618 return prevmodule
617 619
618 620 def expandpaths(self, rev, paths, parents):
619 621 changed, removed = set(), set()
620 622 copies = {}
621 623
622 624 new_module, revnum = revsplit(rev)[1:]
623 625 if new_module != self.module:
624 626 self.module = new_module
625 627 self.reparent(self.module)
626 628
627 629 for i, (path, ent) in enumerate(paths):
628 630 self.ui.progress(_('scanning paths'), i, item=path,
629 631 total=len(paths))
630 632 entrypath = self.getrelpath(path)
631 633
632 634 kind = self._checkpath(entrypath, revnum)
633 635 if kind == svn.core.svn_node_file:
634 636 changed.add(self.recode(entrypath))
635 637 if not ent.copyfrom_path or not parents:
636 638 continue
637 639 # Copy sources not in parent revisions cannot be
638 640 # represented, ignore their origin for now
639 641 pmodule, prevnum = revsplit(parents[0])[1:]
640 642 if ent.copyfrom_rev < prevnum:
641 643 continue
642 644 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
643 645 if not copyfrom_path:
644 646 continue
645 647 self.ui.debug("copied to %s from %s@%s\n" %
646 648 (entrypath, copyfrom_path, ent.copyfrom_rev))
647 649 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
648 650 elif kind == 0: # gone, but had better be a deleted *file*
649 651 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
650 652 pmodule, prevnum = revsplit(parents[0])[1:]
651 653 parentpath = pmodule + "/" + entrypath
652 654 fromkind = self._checkpath(entrypath, prevnum, pmodule)
653 655
654 656 if fromkind == svn.core.svn_node_file:
655 657 removed.add(self.recode(entrypath))
656 658 elif fromkind == svn.core.svn_node_dir:
657 659 oroot = parentpath.strip('/')
658 660 nroot = path.strip('/')
659 661 children = self._iterfiles(oroot, prevnum)
660 662 for childpath in children:
661 663 childpath = childpath.replace(oroot, nroot)
662 664 childpath = self.getrelpath("/" + childpath, pmodule)
663 665 if childpath:
664 666 removed.add(self.recode(childpath))
665 667 else:
666 668 self.ui.debug('unknown path in revision %d: %s\n' % \
667 669 (revnum, path))
668 670 elif kind == svn.core.svn_node_dir:
669 671 if ent.action == 'M':
670 672 # If the directory just had a prop change,
671 673 # then we shouldn't need to look for its children.
672 674 continue
673 675 if ent.action == 'R' and parents:
674 676 # If a directory is replacing a file, mark the previous
675 677 # file as deleted
676 678 pmodule, prevnum = revsplit(parents[0])[1:]
677 679 pkind = self._checkpath(entrypath, prevnum, pmodule)
678 680 if pkind == svn.core.svn_node_file:
679 681 removed.add(self.recode(entrypath))
680 682 elif pkind == svn.core.svn_node_dir:
681 683 # We do not know what files were kept or removed,
682 684 # mark them all as changed.
683 685 for childpath in self._iterfiles(pmodule, prevnum):
684 686 childpath = self.getrelpath("/" + childpath)
685 687 if childpath:
686 688 changed.add(self.recode(childpath))
687 689
688 690 for childpath in self._iterfiles(path, revnum):
689 691 childpath = self.getrelpath("/" + childpath)
690 692 if childpath:
691 693 changed.add(self.recode(childpath))
692 694
693 695 # Handle directory copies
694 696 if not ent.copyfrom_path or not parents:
695 697 continue
696 698 # Copy sources not in parent revisions cannot be
697 699 # represented, ignore their origin for now
698 700 pmodule, prevnum = revsplit(parents[0])[1:]
699 701 if ent.copyfrom_rev < prevnum:
700 702 continue
701 703 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
702 704 if not copyfrompath:
703 705 continue
704 706 self.ui.debug("mark %s came from %s:%d\n"
705 707 % (path, copyfrompath, ent.copyfrom_rev))
706 708 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
707 709 for childpath in children:
708 710 childpath = self.getrelpath("/" + childpath, pmodule)
709 711 if not childpath:
710 712 continue
711 713 copytopath = path + childpath[len(copyfrompath):]
712 714 copytopath = self.getrelpath(copytopath)
713 715 copies[self.recode(copytopath)] = self.recode(childpath)
714 716
715 717 self.ui.progress(_('scanning paths'), None)
716 718 changed.update(removed)
717 719 return (list(changed), removed, copies)
718 720
719 721 def _fetch_revisions(self, from_revnum, to_revnum):
720 722 if from_revnum < to_revnum:
721 723 from_revnum, to_revnum = to_revnum, from_revnum
722 724
723 725 self.child_cset = None
724 726
725 727 def parselogentry(orig_paths, revnum, author, date, message):
726 728 """Return the parsed commit object or None, and True if
727 729 the revision is a branch root.
728 730 """
729 731 self.ui.debug("parsing revision %d (%d changes)\n" %
730 732 (revnum, len(orig_paths)))
731 733
732 734 branched = False
733 735 rev = self.revid(revnum)
734 736 # branch log might return entries for a parent we already have
735 737
736 738 if rev in self.commits or revnum < to_revnum:
737 739 return None, branched
738 740
739 741 parents = []
740 742 # check whether this revision is the start of a branch or part
741 743 # of a branch renaming
742 744 orig_paths = sorted(orig_paths.iteritems())
743 745 root_paths = [(p, e) for p, e in orig_paths
744 746 if self.module.startswith(p)]
745 747 if root_paths:
746 748 path, ent = root_paths[-1]
747 749 if ent.copyfrom_path:
748 750 branched = True
749 751 newpath = ent.copyfrom_path + self.module[len(path):]
750 752 # ent.copyfrom_rev may not be the actual last revision
751 753 previd = self.latest(newpath, ent.copyfrom_rev)
752 754 if previd is not None:
753 755 prevmodule, prevnum = revsplit(previd)[1:]
754 756 if prevnum >= self.startrev:
755 757 parents = [previd]
756 758 self.ui.note(
757 759 _('found parent of branch %s at %d: %s\n') %
758 760 (self.module, prevnum, prevmodule))
759 761 else:
760 762 self.ui.debug("no copyfrom path, don't know what to do.\n")
761 763
762 764 paths = []
763 765 # filter out unrelated paths
764 766 for path, ent in orig_paths:
765 767 if self.getrelpath(path) is None:
766 768 continue
767 769 paths.append((path, ent))
768 770
769 771 # Example SVN datetime. Includes microseconds.
770 772 # ISO-8601 conformant
771 773 # '2007-01-04T17:35:00.902377Z'
772 774 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
773 775
774 776 log = message and self.recode(message) or ''
775 777 author = author and self.recode(author) or ''
776 778 try:
777 779 branch = self.module.split("/")[-1]
778 780 if branch == self.trunkname:
779 781 branch = None
780 782 except IndexError:
781 783 branch = None
782 784
783 785 cset = commit(author=author,
784 786 date=util.datestr(date),
785 787 desc=log,
786 788 parents=parents,
787 789 branch=branch,
788 790 rev=rev)
789 791
790 792 self.commits[rev] = cset
791 793 # The parents list is *shared* among self.paths and the
792 794 # commit object. Both will be updated below.
793 795 self.paths[rev] = (paths, cset.parents)
794 796 if self.child_cset and not self.child_cset.parents:
795 797 self.child_cset.parents[:] = [rev]
796 798 self.child_cset = cset
797 799 return cset, branched
798 800
799 801 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
800 802 (self.module, from_revnum, to_revnum))
801 803
802 804 try:
803 805 firstcset = None
804 806 lastonbranch = False
805 807 stream = self._getlog([self.module], from_revnum, to_revnum)
806 808 try:
807 809 for entry in stream:
808 810 paths, revnum, author, date, message = entry
809 811 if revnum < self.startrev:
810 812 lastonbranch = True
811 813 break
812 814 if not paths:
813 815 self.ui.debug('revision %d has no entries\n' % revnum)
814 816 # If we ever leave the loop on an empty
815 817 # revision, do not try to get a parent branch
816 818 lastonbranch = lastonbranch or revnum == 0
817 819 continue
818 820 cset, lastonbranch = parselogentry(paths, revnum, author,
819 821 date, message)
820 822 if cset:
821 823 firstcset = cset
822 824 if lastonbranch:
823 825 break
824 826 finally:
825 827 stream.close()
826 828
827 829 if not lastonbranch and firstcset and not firstcset.parents:
828 830 # The first revision of the sequence (the last fetched one)
829 831 # has invalid parents if not a branch root. Find the parent
830 832 # revision now, if any.
831 833 try:
832 834 firstrevnum = self.revnum(firstcset.rev)
833 835 if firstrevnum > 1:
834 836 latest = self.latest(self.module, firstrevnum - 1)
835 837 if latest:
836 838 firstcset.parents.append(latest)
837 839 except SvnPathNotFound:
838 840 pass
839 841 except SubversionException, (inst, num):
840 842 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
841 843 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
842 844 raise
843 845
844 846 def getfile(self, file, rev):
845 847 # TODO: ra.get_file transmits the whole file instead of diffs.
846 848 if file in self.removed:
847 849 raise IOError()
848 850 mode = ''
849 851 try:
850 852 new_module, revnum = revsplit(rev)[1:]
851 853 if self.module != new_module:
852 854 self.module = new_module
853 855 self.reparent(self.module)
854 856 io = StringIO()
855 857 info = svn.ra.get_file(self.ra, file, revnum, io)
856 858 data = io.getvalue()
857 859 # ra.get_files() seems to keep a reference on the input buffer
858 860 # preventing collection. Release it explicitely.
859 861 io.close()
860 862 if isinstance(info, list):
861 863 info = info[-1]
862 864 mode = ("svn:executable" in info) and 'x' or ''
863 865 mode = ("svn:special" in info) and 'l' or mode
864 866 except SubversionException, e:
865 867 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
866 868 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
867 869 if e.apr_err in notfound: # File not found
868 870 raise IOError()
869 871 raise
870 872 if mode == 'l':
871 873 link_prefix = "link "
872 874 if data.startswith(link_prefix):
873 875 data = data[len(link_prefix):]
874 876 return data, mode
875 877
876 878 def _iterfiles(self, path, revnum):
877 879 """Enumerate all files in path at revnum, recursively."""
878 880 path = path.strip('/')
879 881 pool = Pool()
880 882 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
881 883 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
882 884 if path:
883 885 path += '/'
884 886 return ((path + p) for p, e in entries.iteritems()
885 887 if e.kind == svn.core.svn_node_file)
886 888
887 889 def getrelpath(self, path, module=None):
888 890 if module is None:
889 891 module = self.module
890 892 # Given the repository url of this wc, say
891 893 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
892 894 # extract the "entry" portion (a relative path) from what
893 895 # svn log --xml says, ie
894 896 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
895 897 # that is to say "tests/PloneTestCase.py"
896 898 if path.startswith(module):
897 899 relative = path.rstrip('/')[len(module):]
898 900 if relative.startswith('/'):
899 901 return relative[1:]
900 902 elif relative == '':
901 903 return relative
902 904
903 905 # The path is outside our tracked tree...
904 906 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
905 907 return None
906 908
907 909 def _checkpath(self, path, revnum, module=None):
908 910 if module is not None:
909 911 prevmodule = self.reparent('')
910 912 path = module + '/' + path
911 913 try:
912 914 # ra.check_path does not like leading slashes very much, it leads
913 915 # to PROPFIND subversion errors
914 916 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
915 917 finally:
916 918 if module is not None:
917 919 self.reparent(prevmodule)
918 920
919 921 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
920 922 strict_node_history=False):
921 923 # Normalize path names, svn >= 1.5 only wants paths relative to
922 924 # supplied URL
923 925 relpaths = []
924 926 for p in paths:
925 927 if not p.startswith('/'):
926 928 p = self.module + '/' + p
927 929 relpaths.append(p.strip('/'))
928 930 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
929 931 strict_node_history]
930 932 arg = encodeargs(args)
931 933 hgexe = util.hgexecutable()
932 934 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
933 935 stdin, stdout = util.popen2(util.quotecommand(cmd))
934 936 stdin.write(arg)
935 937 try:
936 938 stdin.close()
937 939 except IOError:
938 940 raise util.Abort(_('Mercurial failed to run itself, check'
939 941 ' hg executable is in PATH'))
940 942 return logstream(stdout)
941 943
942 944 pre_revprop_change = '''#!/bin/sh
943 945
944 946 REPOS="$1"
945 947 REV="$2"
946 948 USER="$3"
947 949 PROPNAME="$4"
948 950 ACTION="$5"
949 951
950 952 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
951 953 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
952 954 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
953 955
954 956 echo "Changing prohibited revision property" >&2
955 957 exit 1
956 958 '''
957 959
958 960 class svn_sink(converter_sink, commandline):
959 961 commit_re = re.compile(r'Committed revision (\d+).', re.M)
960 962 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
961 963
962 964 def prerun(self):
963 965 if self.wc:
964 966 os.chdir(self.wc)
965 967
966 968 def postrun(self):
967 969 if self.wc:
968 970 os.chdir(self.cwd)
969 971
970 972 def join(self, name):
971 973 return os.path.join(self.wc, '.svn', name)
972 974
973 975 def revmapfile(self):
974 976 return self.join('hg-shamap')
975 977
976 978 def authorfile(self):
977 979 return self.join('hg-authormap')
978 980
979 981 def __init__(self, ui, path):
980 982
981 983 converter_sink.__init__(self, ui, path)
982 984 commandline.__init__(self, ui, 'svn')
983 985 self.delete = []
984 986 self.setexec = []
985 987 self.delexec = []
986 988 self.copies = []
987 989 self.wc = None
988 990 self.cwd = os.getcwd()
989 991
990 992 path = os.path.realpath(path)
991 993
992 994 created = False
993 995 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
994 996 self.wc = path
995 997 self.run0('update')
996 998 else:
997 999 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
998 1000
999 1001 if os.path.isdir(os.path.dirname(path)):
1000 1002 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1001 1003 ui.status(_('initializing svn repository %r\n') %
1002 1004 os.path.basename(path))
1003 1005 commandline(ui, 'svnadmin').run0('create', path)
1004 1006 created = path
1005 1007 path = util.normpath(path)
1006 1008 if not path.startswith('/'):
1007 1009 path = '/' + path
1008 1010 path = 'file://' + path
1009 1011
1010 1012 ui.status(_('initializing svn working copy %r\n')
1011 1013 % os.path.basename(wcpath))
1012 1014 self.run0('checkout', path, wcpath)
1013 1015
1014 1016 self.wc = wcpath
1015 1017 self.opener = scmutil.opener(self.wc)
1016 1018 self.wopener = scmutil.opener(self.wc)
1017 1019 self.childmap = mapfile(ui, self.join('hg-childmap'))
1018 1020 self.is_exec = util.checkexec(self.wc) and util.isexec or None
1019 1021
1020 1022 if created:
1021 1023 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1022 1024 fp = open(hook, 'w')
1023 1025 fp.write(pre_revprop_change)
1024 1026 fp.close()
1025 1027 util.setflags(hook, False, True)
1026 1028
1027 1029 output = self.run0('info')
1028 1030 self.uuid = self.uuid_re.search(output).group(1).strip()
1029 1031
1030 1032 def wjoin(self, *names):
1031 1033 return os.path.join(self.wc, *names)
1032 1034
1033 1035 def putfile(self, filename, flags, data):
1034 1036 if 'l' in flags:
1035 1037 self.wopener.symlink(data, filename)
1036 1038 else:
1037 1039 try:
1038 1040 if os.path.islink(self.wjoin(filename)):
1039 1041 os.unlink(filename)
1040 1042 except OSError:
1041 1043 pass
1042 1044 self.wopener.write(filename, data)
1043 1045
1044 1046 if self.is_exec:
1045 1047 was_exec = self.is_exec(self.wjoin(filename))
1046 1048 else:
1047 1049 # On filesystems not supporting execute-bit, there is no way
1048 1050 # to know if it is set but asking subversion. Setting it
1049 1051 # systematically is just as expensive and much simpler.
1050 1052 was_exec = 'x' not in flags
1051 1053
1052 1054 util.setflags(self.wjoin(filename), False, 'x' in flags)
1053 1055 if was_exec:
1054 1056 if 'x' not in flags:
1055 1057 self.delexec.append(filename)
1056 1058 else:
1057 1059 if 'x' in flags:
1058 1060 self.setexec.append(filename)
1059 1061
1060 1062 def _copyfile(self, source, dest):
1061 1063 # SVN's copy command pukes if the destination file exists, but
1062 1064 # our copyfile method expects to record a copy that has
1063 1065 # already occurred. Cross the semantic gap.
1064 1066 wdest = self.wjoin(dest)
1065 1067 exists = os.path.lexists(wdest)
1066 1068 if exists:
1067 1069 fd, tempname = tempfile.mkstemp(
1068 1070 prefix='hg-copy-', dir=os.path.dirname(wdest))
1069 1071 os.close(fd)
1070 1072 os.unlink(tempname)
1071 1073 os.rename(wdest, tempname)
1072 1074 try:
1073 1075 self.run0('copy', source, dest)
1074 1076 finally:
1075 1077 if exists:
1076 1078 try:
1077 1079 os.unlink(wdest)
1078 1080 except OSError:
1079 1081 pass
1080 1082 os.rename(tempname, wdest)
1081 1083
1082 1084 def dirs_of(self, files):
1083 1085 dirs = set()
1084 1086 for f in files:
1085 1087 if os.path.isdir(self.wjoin(f)):
1086 1088 dirs.add(f)
1087 1089 for i in strutil.rfindall(f, '/'):
1088 1090 dirs.add(f[:i])
1089 1091 return dirs
1090 1092
1091 1093 def add_dirs(self, files):
1092 1094 add_dirs = [d for d in sorted(self.dirs_of(files))
1093 1095 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1094 1096 if add_dirs:
1095 1097 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1096 1098 return add_dirs
1097 1099
1098 1100 def add_files(self, files):
1099 1101 if files:
1100 1102 self.xargs(files, 'add', quiet=True)
1101 1103 return files
1102 1104
1103 1105 def tidy_dirs(self, names):
1104 1106 deleted = []
1105 1107 for d in sorted(self.dirs_of(names), reverse=True):
1106 1108 wd = self.wjoin(d)
1107 1109 if os.listdir(wd) == '.svn':
1108 1110 self.run0('delete', d)
1109 1111 deleted.append(d)
1110 1112 return deleted
1111 1113
1112 1114 def addchild(self, parent, child):
1113 1115 self.childmap[parent] = child
1114 1116
1115 1117 def revid(self, rev):
1116 1118 return u"svn:%s@%s" % (self.uuid, rev)
1117 1119
1118 1120 def putcommit(self, files, copies, parents, commit, source, revmap):
1119 1121 for parent in parents:
1120 1122 try:
1121 1123 return self.revid(self.childmap[parent])
1122 1124 except KeyError:
1123 1125 pass
1124 1126
1125 1127 # Apply changes to working copy
1126 1128 for f, v in files:
1127 1129 try:
1128 1130 data, mode = source.getfile(f, v)
1129 1131 except IOError:
1130 1132 self.delete.append(f)
1131 1133 else:
1132 1134 self.putfile(f, mode, data)
1133 1135 if f in copies:
1134 1136 self.copies.append([copies[f], f])
1135 1137 files = [f[0] for f in files]
1136 1138
1137 1139 entries = set(self.delete)
1138 1140 files = frozenset(files)
1139 1141 entries.update(self.add_dirs(files.difference(entries)))
1140 1142 if self.copies:
1141 1143 for s, d in self.copies:
1142 1144 self._copyfile(s, d)
1143 1145 self.copies = []
1144 1146 if self.delete:
1145 1147 self.xargs(self.delete, 'delete')
1146 1148 self.delete = []
1147 1149 entries.update(self.add_files(files.difference(entries)))
1148 1150 entries.update(self.tidy_dirs(entries))
1149 1151 if self.delexec:
1150 1152 self.xargs(self.delexec, 'propdel', 'svn:executable')
1151 1153 self.delexec = []
1152 1154 if self.setexec:
1153 1155 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1154 1156 self.setexec = []
1155 1157
1156 1158 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1157 1159 fp = os.fdopen(fd, 'w')
1158 1160 fp.write(commit.desc)
1159 1161 fp.close()
1160 1162 try:
1161 1163 output = self.run0('commit',
1162 1164 username=util.shortuser(commit.author),
1163 1165 file=messagefile,
1164 1166 encoding='utf-8')
1165 1167 try:
1166 1168 rev = self.commit_re.search(output).group(1)
1167 1169 except AttributeError:
1168 1170 if not files:
1169 1171 return parents[0]
1170 1172 self.ui.warn(_('unexpected svn output:\n'))
1171 1173 self.ui.warn(output)
1172 1174 raise util.Abort(_('unable to cope with svn output'))
1173 1175 if commit.rev:
1174 1176 self.run('propset', 'hg:convert-rev', commit.rev,
1175 1177 revprop=True, revision=rev)
1176 1178 if commit.branch and commit.branch != 'default':
1177 1179 self.run('propset', 'hg:convert-branch', commit.branch,
1178 1180 revprop=True, revision=rev)
1179 1181 for parent in parents:
1180 1182 self.addchild(parent, rev)
1181 1183 return self.revid(rev)
1182 1184 finally:
1183 1185 os.unlink(messagefile)
1184 1186
1185 1187 def puttags(self, tags):
1186 1188 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1187 1189 return None, None
General Comments 0
You need to be logged in to leave comments. Login now