##// END OF EJS Templates
convert/svn: delegate to svn bindings if HTTP probe fails...
Patrick Mezard -
r9829:1b2516a5 default
parent child Browse files
Show More
@@ -1,1142 +1,1155 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 import urllib2
11 12
12 13 from mercurial import strutil, util, encoding
13 14 from mercurial.i18n import _
14 15
15 16 # Subversion stuff. Works best with very recent Python SVN bindings
16 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
17 18 # these bindings.
18 19
19 20 from cStringIO import StringIO
20 21
21 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
22 23 from common import commandline, converter_source, converter_sink, mapfile
23 24
24 25 try:
25 26 from svn.core import SubversionException, Pool
26 27 import svn
27 28 import svn.client
28 29 import svn.core
29 30 import svn.ra
30 31 import svn.delta
31 32 import transport
32 33 import warnings
33 34 warnings.filterwarnings('ignore',
34 35 module='svn.core',
35 36 category=DeprecationWarning)
36 37
37 38 except ImportError:
38 39 pass
39 40
40 41 class SvnPathNotFound(Exception):
41 42 pass
42 43
43 44 def geturl(path):
44 45 try:
45 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
46 47 except SubversionException:
47 48 pass
48 49 if os.path.isdir(path):
49 50 path = os.path.normpath(os.path.abspath(path))
50 51 if os.name == 'nt':
51 52 path = '/' + util.normpath(path)
52 53 # Module URL is later compared with the repository URL returned
53 54 # by svn API, which is UTF-8.
54 55 path = encoding.tolocal(path)
55 56 return 'file://%s' % urllib.quote(path)
56 57 return path
57 58
58 59 def optrev(number):
59 60 optrev = svn.core.svn_opt_revision_t()
60 61 optrev.kind = svn.core.svn_opt_revision_number
61 62 optrev.value.number = number
62 63 return optrev
63 64
64 65 class changedpath(object):
65 66 def __init__(self, p):
66 67 self.copyfrom_path = p.copyfrom_path
67 68 self.copyfrom_rev = p.copyfrom_rev
68 69 self.action = p.action
69 70
70 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
71 72 strict_node_history=False):
72 73 protocol = -1
73 74 def receiver(orig_paths, revnum, author, date, message, pool):
74 75 if orig_paths is not None:
75 76 for k, v in orig_paths.iteritems():
76 77 orig_paths[k] = changedpath(v)
77 78 pickle.dump((orig_paths, revnum, author, date, message),
78 79 fp, protocol)
79 80
80 81 try:
81 82 # Use an ra of our own so that our parent can consume
82 83 # our results without confusing the server.
83 84 t = transport.SvnRaTransport(url=url)
84 85 svn.ra.get_log(t.ra, paths, start, end, limit,
85 86 discover_changed_paths,
86 87 strict_node_history,
87 88 receiver)
88 89 except SubversionException, (inst, num):
89 90 pickle.dump(num, fp, protocol)
90 91 except IOError:
91 92 # Caller may interrupt the iteration
92 93 pickle.dump(None, fp, protocol)
93 94 else:
94 95 pickle.dump(None, fp, protocol)
95 96 fp.close()
96 97 # With large history, cleanup process goes crazy and suddenly
97 98 # consumes *huge* amount of memory. The output file being closed,
98 99 # there is no need for clean termination.
99 100 os._exit(0)
100 101
101 102 def debugsvnlog(ui, **opts):
102 103 """Fetch SVN log in a subprocess and channel them back to parent to
103 104 avoid memory collection issues.
104 105 """
105 106 util.set_binary(sys.stdin)
106 107 util.set_binary(sys.stdout)
107 108 args = decodeargs(sys.stdin.read())
108 109 get_log_child(sys.stdout, *args)
109 110
110 111 class logstream(object):
111 112 """Interruptible revision log iterator."""
112 113 def __init__(self, stdout):
113 114 self._stdout = stdout
114 115
115 116 def __iter__(self):
116 117 while True:
117 118 try:
118 119 entry = pickle.load(self._stdout)
119 120 except EOFError:
120 121 raise util.Abort(_('Mercurial failed to run itself, check'
121 122 ' hg executable is in PATH'))
122 123 try:
123 124 orig_paths, revnum, author, date, message = entry
124 125 except:
125 126 if entry is None:
126 127 break
127 128 raise SubversionException("child raised exception", entry)
128 129 yield entry
129 130
130 131 def close(self):
131 132 if self._stdout:
132 133 self._stdout.close()
133 134 self._stdout = None
134 135
135 136
136 137 # Check to see if the given path is a local Subversion repo. Verify this by
137 138 # looking for several svn-specific files and directories in the given
138 139 # directory.
139 def filecheck(path, proto):
140 def filecheck(ui, path, proto):
140 141 for x in ('locks', 'hooks', 'format', 'db', ):
141 142 if not os.path.exists(os.path.join(path, x)):
142 143 return False
143 144 return True
144 145
145 146 # Check to see if a given path is the root of an svn repo over http. We verify
146 147 # this by requesting a version-controlled URL we know can't exist and looking
147 148 # for the svn-specific "not found" XML.
148 def httpcheck(path, proto):
149 return ('<m:human-readable errcode="160013">' in
150 urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
149 def httpcheck(ui, path, proto):
150 try:
151 opener = urllib2.build_opener()
152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 return '<m:human-readable errcode="160013">' in rsp.read()
154 except urllib2.HTTPError, inst:
155 if inst.code == 404:
156 return False
157 # Except for 404 we cannot know for sure this is not an svn repo
158 ui.warn(_('svn: cannot probe remote repository, assume it could be '
159 'a subversion repository. Use --source if you know better.\n'))
160 return True
161 except:
162 # Could be urllib2.URLError if the URL is invalid or anything else.
163 return False
151 164
152 165 protomap = {'http': httpcheck,
153 166 'https': httpcheck,
154 167 'file': filecheck,
155 168 }
156 def issvnurl(url):
169 def issvnurl(ui, url):
157 170 try:
158 171 proto, path = url.split('://', 1)
159 172 if proto == 'file':
160 173 path = urllib.url2pathname(path)
161 174 except ValueError:
162 175 proto = 'file'
163 176 path = os.path.abspath(url)
164 177 if proto == 'file':
165 178 path = path.replace(os.sep, '/')
166 179 check = protomap.get(proto, lambda p, p2: False)
167 180 while '/' in path:
168 if check(path, proto):
181 if check(ui, path, proto):
169 182 return True
170 183 path = path.rsplit('/', 1)[0]
171 184 return False
172 185
173 186 # SVN conversion code stolen from bzr-svn and tailor
174 187 #
175 188 # Subversion looks like a versioned filesystem, branches structures
176 189 # are defined by conventions and not enforced by the tool. First,
177 190 # we define the potential branches (modules) as "trunk" and "branches"
178 191 # children directories. Revisions are then identified by their
179 192 # module and revision number (and a repository identifier).
180 193 #
181 194 # The revision graph is really a tree (or a forest). By default, a
182 195 # revision parent is the previous revision in the same module. If the
183 196 # module directory is copied/moved from another module then the
184 197 # revision is the module root and its parent the source revision in
185 198 # the parent module. A revision has at most one parent.
186 199 #
187 200 class svn_source(converter_source):
188 201 def __init__(self, ui, url, rev=None):
189 202 super(svn_source, self).__init__(ui, url, rev=rev)
190 203
191 204 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
192 205 (os.path.exists(url) and
193 206 os.path.exists(os.path.join(url, '.svn'))) or
194 issvnurl(url)):
207 issvnurl(ui, url)):
195 208 raise NoRepo("%s does not look like a Subversion repo" % url)
196 209
197 210 try:
198 211 SubversionException
199 212 except NameError:
200 213 raise MissingTool(_('Subversion python bindings could not be loaded'))
201 214
202 215 try:
203 216 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
204 217 if version < (1, 4):
205 218 raise MissingTool(_('Subversion python bindings %d.%d found, '
206 219 '1.4 or later required') % version)
207 220 except AttributeError:
208 221 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
209 222 'or later required'))
210 223
211 224 self.lastrevs = {}
212 225
213 226 latest = None
214 227 try:
215 228 # Support file://path@rev syntax. Useful e.g. to convert
216 229 # deleted branches.
217 230 at = url.rfind('@')
218 231 if at >= 0:
219 232 latest = int(url[at+1:])
220 233 url = url[:at]
221 234 except ValueError:
222 235 pass
223 236 self.url = geturl(url)
224 237 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
225 238 try:
226 239 self.transport = transport.SvnRaTransport(url=self.url)
227 240 self.ra = self.transport.ra
228 241 self.ctx = self.transport.client
229 242 self.baseurl = svn.ra.get_repos_root(self.ra)
230 243 # Module is either empty or a repository path starting with
231 244 # a slash and not ending with a slash.
232 245 self.module = urllib.unquote(self.url[len(self.baseurl):])
233 246 self.prevmodule = None
234 247 self.rootmodule = self.module
235 248 self.commits = {}
236 249 self.paths = {}
237 250 self.uuid = svn.ra.get_uuid(self.ra)
238 251 except SubversionException:
239 252 ui.traceback()
240 253 raise NoRepo("%s does not look like a Subversion repo" % self.url)
241 254
242 255 if rev:
243 256 try:
244 257 latest = int(rev)
245 258 except ValueError:
246 259 raise util.Abort(_('svn: revision %s is not an integer') % rev)
247 260
248 261 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
249 262 try:
250 263 self.startrev = int(self.startrev)
251 264 if self.startrev < 0:
252 265 self.startrev = 0
253 266 except ValueError:
254 267 raise util.Abort(_('svn: start revision %s is not an integer')
255 268 % self.startrev)
256 269
257 270 self.head = self.latest(self.module, latest)
258 271 if not self.head:
259 272 raise util.Abort(_('no revision found in module %s')
260 273 % self.module)
261 274 self.last_changed = self.revnum(self.head)
262 275
263 276 self._changescache = None
264 277
265 278 if os.path.exists(os.path.join(url, '.svn/entries')):
266 279 self.wc = url
267 280 else:
268 281 self.wc = None
269 282 self.convertfp = None
270 283
271 284 def setrevmap(self, revmap):
272 285 lastrevs = {}
273 286 for revid in revmap.iterkeys():
274 287 uuid, module, revnum = self.revsplit(revid)
275 288 lastrevnum = lastrevs.setdefault(module, revnum)
276 289 if revnum > lastrevnum:
277 290 lastrevs[module] = revnum
278 291 self.lastrevs = lastrevs
279 292
280 293 def exists(self, path, optrev):
281 294 try:
282 295 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
283 296 optrev, False, self.ctx)
284 297 return True
285 298 except SubversionException:
286 299 return False
287 300
288 301 def getheads(self):
289 302
290 303 def isdir(path, revnum):
291 304 kind = self._checkpath(path, revnum)
292 305 return kind == svn.core.svn_node_dir
293 306
294 307 def getcfgpath(name, rev):
295 308 cfgpath = self.ui.config('convert', 'svn.' + name)
296 309 if cfgpath is not None and cfgpath.strip() == '':
297 310 return None
298 311 path = (cfgpath or name).strip('/')
299 312 if not self.exists(path, rev):
300 313 if cfgpath:
301 314 raise util.Abort(_('expected %s to be at %r, but not found')
302 315 % (name, path))
303 316 return None
304 317 self.ui.note(_('found %s at %r\n') % (name, path))
305 318 return path
306 319
307 320 rev = optrev(self.last_changed)
308 321 oldmodule = ''
309 322 trunk = getcfgpath('trunk', rev)
310 323 self.tags = getcfgpath('tags', rev)
311 324 branches = getcfgpath('branches', rev)
312 325
313 326 # If the project has a trunk or branches, we will extract heads
314 327 # from them. We keep the project root otherwise.
315 328 if trunk:
316 329 oldmodule = self.module or ''
317 330 self.module += '/' + trunk
318 331 self.head = self.latest(self.module, self.last_changed)
319 332 if not self.head:
320 333 raise util.Abort(_('no revision found in module %s')
321 334 % self.module)
322 335
323 336 # First head in the list is the module's head
324 337 self.heads = [self.head]
325 338 if self.tags is not None:
326 339 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
327 340
328 341 # Check if branches bring a few more heads to the list
329 342 if branches:
330 343 rpath = self.url.strip('/')
331 344 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
332 345 rev, False, self.ctx)
333 346 for branch in branchnames.keys():
334 347 module = '%s/%s/%s' % (oldmodule, branches, branch)
335 348 if not isdir(module, self.last_changed):
336 349 continue
337 350 brevid = self.latest(module, self.last_changed)
338 351 if not brevid:
339 352 self.ui.note(_('ignoring empty branch %s\n') % branch)
340 353 continue
341 354 self.ui.note(_('found branch %s at %d\n') %
342 355 (branch, self.revnum(brevid)))
343 356 self.heads.append(brevid)
344 357
345 358 if self.startrev and self.heads:
346 359 if len(self.heads) > 1:
347 360 raise util.Abort(_('svn: start revision is not supported '
348 361 'with more than one branch'))
349 362 revnum = self.revnum(self.heads[0])
350 363 if revnum < self.startrev:
351 364 raise util.Abort(_('svn: no revision found after start revision %d')
352 365 % self.startrev)
353 366
354 367 return self.heads
355 368
356 369 def getfile(self, file, rev):
357 370 data, mode = self._getfile(file, rev)
358 371 self.modecache[(file, rev)] = mode
359 372 return data
360 373
361 374 def getmode(self, file, rev):
362 375 return self.modecache[(file, rev)]
363 376
364 377 def getchanges(self, rev):
365 378 if self._changescache and self._changescache[0] == rev:
366 379 return self._changescache[1]
367 380 self._changescache = None
368 381 self.modecache = {}
369 382 (paths, parents) = self.paths[rev]
370 383 if parents:
371 384 files, copies = self.expandpaths(rev, paths, parents)
372 385 else:
373 386 # Perform a full checkout on roots
374 387 uuid, module, revnum = self.revsplit(rev)
375 388 entries = svn.client.ls(self.baseurl + urllib.quote(module),
376 389 optrev(revnum), True, self.ctx)
377 390 files = [n for n,e in entries.iteritems()
378 391 if e.kind == svn.core.svn_node_file]
379 392 copies = {}
380 393
381 394 files.sort()
382 395 files = zip(files, [rev] * len(files))
383 396
384 397 # caller caches the result, so free it here to release memory
385 398 del self.paths[rev]
386 399 return (files, copies)
387 400
388 401 def getchangedfiles(self, rev, i):
389 402 changes = self.getchanges(rev)
390 403 self._changescache = (rev, changes)
391 404 return [f[0] for f in changes[0]]
392 405
393 406 def getcommit(self, rev):
394 407 if rev not in self.commits:
395 408 uuid, module, revnum = self.revsplit(rev)
396 409 self.module = module
397 410 self.reparent(module)
398 411 # We assume that:
399 412 # - requests for revisions after "stop" come from the
400 413 # revision graph backward traversal. Cache all of them
401 414 # down to stop, they will be used eventually.
402 415 # - requests for revisions before "stop" come to get
403 416 # isolated branches parents. Just fetch what is needed.
404 417 stop = self.lastrevs.get(module, 0)
405 418 if revnum < stop:
406 419 stop = revnum + 1
407 420 self._fetch_revisions(revnum, stop)
408 421 commit = self.commits[rev]
409 422 # caller caches the result, so free it here to release memory
410 423 del self.commits[rev]
411 424 return commit
412 425
413 426 def gettags(self):
414 427 tags = {}
415 428 if self.tags is None:
416 429 return tags
417 430
418 431 # svn tags are just a convention, project branches left in a
419 432 # 'tags' directory. There is no other relationship than
420 433 # ancestry, which is expensive to discover and makes them hard
421 434 # to update incrementally. Worse, past revisions may be
422 435 # referenced by tags far away in the future, requiring a deep
423 436 # history traversal on every calculation. Current code
424 437 # performs a single backward traversal, tracking moves within
425 438 # the tags directory (tag renaming) and recording a new tag
426 439 # everytime a project is copied from outside the tags
427 440 # directory. It also lists deleted tags, this behaviour may
428 441 # change in the future.
429 442 pendings = []
430 443 tagspath = self.tags
431 444 start = svn.ra.get_latest_revnum(self.ra)
432 445 try:
433 446 for entry in self._getlog([self.tags], start, self.startrev):
434 447 origpaths, revnum, author, date, message = entry
435 448 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
436 449 in origpaths.iteritems() if e.copyfrom_path]
437 450 # Apply moves/copies from more specific to general
438 451 copies.sort(reverse=True)
439 452
440 453 srctagspath = tagspath
441 454 if copies and copies[-1][2] == tagspath:
442 455 # Track tags directory moves
443 456 srctagspath = copies.pop()[0]
444 457
445 458 for source, sourcerev, dest in copies:
446 459 if not dest.startswith(tagspath + '/'):
447 460 continue
448 461 for tag in pendings:
449 462 if tag[0].startswith(dest):
450 463 tagpath = source + tag[0][len(dest):]
451 464 tag[:2] = [tagpath, sourcerev]
452 465 break
453 466 else:
454 467 pendings.append([source, sourcerev, dest])
455 468
456 469 # Filter out tags with children coming from different
457 470 # parts of the repository like:
458 471 # /tags/tag.1 (from /trunk:10)
459 472 # /tags/tag.1/foo (from /branches/foo:12)
460 473 # Here/tags/tag.1 discarded as well as its children.
461 474 # It happens with tools like cvs2svn. Such tags cannot
462 475 # be represented in mercurial.
463 476 addeds = dict((p, e.copyfrom_path) for p, e
464 477 in origpaths.iteritems()
465 478 if e.action == 'A' and e.copyfrom_path)
466 479 badroots = set()
467 480 for destroot in addeds:
468 481 for source, sourcerev, dest in pendings:
469 482 if (not dest.startswith(destroot + '/')
470 483 or source.startswith(addeds[destroot] + '/')):
471 484 continue
472 485 badroots.add(destroot)
473 486 break
474 487
475 488 for badroot in badroots:
476 489 pendings = [p for p in pendings if p[2] != badroot
477 490 and not p[2].startswith(badroot + '/')]
478 491
479 492 # Tell tag renamings from tag creations
480 493 remainings = []
481 494 for source, sourcerev, dest in pendings:
482 495 tagname = dest.split('/')[-1]
483 496 if source.startswith(srctagspath):
484 497 remainings.append([source, sourcerev, tagname])
485 498 continue
486 499 if tagname in tags:
487 500 # Keep the latest tag value
488 501 continue
489 502 # From revision may be fake, get one with changes
490 503 try:
491 504 tagid = self.latest(source, sourcerev)
492 505 if tagid and tagname not in tags:
493 506 tags[tagname] = tagid
494 507 except SvnPathNotFound:
495 508 # It happens when we are following directories
496 509 # we assumed were copied with their parents
497 510 # but were really created in the tag
498 511 # directory.
499 512 pass
500 513 pendings = remainings
501 514 tagspath = srctagspath
502 515
503 516 except SubversionException:
504 517 self.ui.note(_('no tags found at revision %d\n') % start)
505 518 return tags
506 519
507 520 def converted(self, rev, destrev):
508 521 if not self.wc:
509 522 return
510 523 if self.convertfp is None:
511 524 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
512 525 'a')
513 526 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
514 527 self.convertfp.flush()
515 528
516 529 def revid(self, revnum, module=None):
517 530 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
518 531
519 532 def revnum(self, rev):
520 533 return int(rev.split('@')[-1])
521 534
522 535 def revsplit(self, rev):
523 536 url, revnum = rev.rsplit('@', 1)
524 537 revnum = int(revnum)
525 538 parts = url.split('/', 1)
526 539 uuid = parts.pop(0)[4:]
527 540 mod = ''
528 541 if parts:
529 542 mod = '/' + parts[0]
530 543 return uuid, mod, revnum
531 544
532 545 def latest(self, path, stop=0):
533 546 """Find the latest revid affecting path, up to stop. It may return
534 547 a revision in a different module, since a branch may be moved without
535 548 a change being reported. Return None if computed module does not
536 549 belong to rootmodule subtree.
537 550 """
538 551 if not path.startswith(self.rootmodule):
539 552 # Requests on foreign branches may be forbidden at server level
540 553 self.ui.debug('ignoring foreign branch %r\n' % path)
541 554 return None
542 555
543 556 if not stop:
544 557 stop = svn.ra.get_latest_revnum(self.ra)
545 558 try:
546 559 prevmodule = self.reparent('')
547 560 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
548 561 self.reparent(prevmodule)
549 562 except SubversionException:
550 563 dirent = None
551 564 if not dirent:
552 565 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
553 566
554 567 # stat() gives us the previous revision on this line of
555 568 # development, but it might be in *another module*. Fetch the
556 569 # log and detect renames down to the latest revision.
557 570 stream = self._getlog([path], stop, dirent.created_rev)
558 571 try:
559 572 for entry in stream:
560 573 paths, revnum, author, date, message = entry
561 574 if revnum <= dirent.created_rev:
562 575 break
563 576
564 577 for p in paths:
565 578 if not path.startswith(p) or not paths[p].copyfrom_path:
566 579 continue
567 580 newpath = paths[p].copyfrom_path + path[len(p):]
568 581 self.ui.debug("branch renamed from %s to %s at %d\n" %
569 582 (path, newpath, revnum))
570 583 path = newpath
571 584 break
572 585 finally:
573 586 stream.close()
574 587
575 588 if not path.startswith(self.rootmodule):
576 589 self.ui.debug('ignoring foreign branch %r\n' % path)
577 590 return None
578 591 return self.revid(dirent.created_rev, path)
579 592
580 593 def reparent(self, module):
581 594 """Reparent the svn transport and return the previous parent."""
582 595 if self.prevmodule == module:
583 596 return module
584 597 svnurl = self.baseurl + urllib.quote(module)
585 598 prevmodule = self.prevmodule
586 599 if prevmodule is None:
587 600 prevmodule = ''
588 601 self.ui.debug("reparent to %s\n" % svnurl)
589 602 svn.ra.reparent(self.ra, svnurl)
590 603 self.prevmodule = module
591 604 return prevmodule
592 605
593 606 def expandpaths(self, rev, paths, parents):
594 607 entries = []
595 608 # Map of entrypath, revision for finding source of deleted
596 609 # revisions.
597 610 copyfrom = {}
598 611 copies = {}
599 612
600 613 new_module, revnum = self.revsplit(rev)[1:]
601 614 if new_module != self.module:
602 615 self.module = new_module
603 616 self.reparent(self.module)
604 617
605 618 for path, ent in paths:
606 619 entrypath = self.getrelpath(path)
607 620
608 621 kind = self._checkpath(entrypath, revnum)
609 622 if kind == svn.core.svn_node_file:
610 623 entries.append(self.recode(entrypath))
611 624 if not ent.copyfrom_path or not parents:
612 625 continue
613 626 # Copy sources not in parent revisions cannot be
614 627 # represented, ignore their origin for now
615 628 pmodule, prevnum = self.revsplit(parents[0])[1:]
616 629 if ent.copyfrom_rev < prevnum:
617 630 continue
618 631 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
619 632 if not copyfrom_path:
620 633 continue
621 634 self.ui.debug("copied to %s from %s@%s\n" %
622 635 (entrypath, copyfrom_path, ent.copyfrom_rev))
623 636 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
624 637 elif kind == 0: # gone, but had better be a deleted *file*
625 638 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
626 639 pmodule, prevnum = self.revsplit(parents[0])[1:]
627 640 parentpath = pmodule + "/" + entrypath
628 641 self.ui.debug("entry %s\n" % parentpath)
629 642
630 643 # We can avoid the reparent calls if the module has
631 644 # not changed but it probably does not worth the pain.
632 645 prevmodule = self.reparent('')
633 646 fromkind = svn.ra.check_path(self.ra, parentpath.strip('/'), prevnum)
634 647 self.reparent(prevmodule)
635 648
636 649 if fromkind == svn.core.svn_node_file:
637 650 entries.append(self.recode(entrypath))
638 651 elif fromkind == svn.core.svn_node_dir:
639 652 if ent.action == 'C':
640 653 children = self._find_children(path, prevnum)
641 654 else:
642 655 oroot = parentpath.strip('/')
643 656 nroot = path.strip('/')
644 657 children = self._find_children(oroot, prevnum)
645 658 children = [s.replace(oroot,nroot) for s in children]
646 659
647 660 for child in children:
648 661 childpath = self.getrelpath("/" + child, pmodule)
649 662 if not childpath:
650 663 continue
651 664 if childpath in copies:
652 665 del copies[childpath]
653 666 entries.append(childpath)
654 667 else:
655 668 self.ui.debug('unknown path in revision %d: %s\n' % \
656 669 (revnum, path))
657 670 elif kind == svn.core.svn_node_dir:
658 671 # If the directory just had a prop change,
659 672 # then we shouldn't need to look for its children.
660 673 if ent.action == 'M':
661 674 continue
662 675
663 676 children = sorted(self._find_children(path, revnum))
664 677 for child in children:
665 678 # Can we move a child directory and its
666 679 # parent in the same commit? (probably can). Could
667 680 # cause problems if instead of revnum -1,
668 681 # we have to look in (copyfrom_path, revnum - 1)
669 682 entrypath = self.getrelpath("/" + child)
670 683 if entrypath:
671 684 # Need to filter out directories here...
672 685 kind = self._checkpath(entrypath, revnum)
673 686 if kind != svn.core.svn_node_dir:
674 687 entries.append(self.recode(entrypath))
675 688
676 689 # Handle directory copies
677 690 if not ent.copyfrom_path or not parents:
678 691 continue
679 692 # Copy sources not in parent revisions cannot be
680 693 # represented, ignore their origin for now
681 694 pmodule, prevnum = self.revsplit(parents[0])[1:]
682 695 if ent.copyfrom_rev < prevnum:
683 696 continue
684 697 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
685 698 if not copyfrompath:
686 699 continue
687 700 copyfrom[path] = ent
688 701 self.ui.debug("mark %s came from %s:%d\n"
689 702 % (path, copyfrompath, ent.copyfrom_rev))
690 703 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
691 704 children.sort()
692 705 for child in children:
693 706 entrypath = self.getrelpath("/" + child, pmodule)
694 707 if not entrypath:
695 708 continue
696 709 copytopath = path + entrypath[len(copyfrompath):]
697 710 copytopath = self.getrelpath(copytopath)
698 711 copies[self.recode(copytopath)] = self.recode(entrypath)
699 712
700 713 return (list(set(entries)), copies)
701 714
702 715 def _fetch_revisions(self, from_revnum, to_revnum):
703 716 if from_revnum < to_revnum:
704 717 from_revnum, to_revnum = to_revnum, from_revnum
705 718
706 719 self.child_cset = None
707 720
708 721 def parselogentry(orig_paths, revnum, author, date, message):
709 722 """Return the parsed commit object or None, and True if
710 723 the revision is a branch root.
711 724 """
712 725 self.ui.debug("parsing revision %d (%d changes)\n" %
713 726 (revnum, len(orig_paths)))
714 727
715 728 branched = False
716 729 rev = self.revid(revnum)
717 730 # branch log might return entries for a parent we already have
718 731
719 732 if rev in self.commits or revnum < to_revnum:
720 733 return None, branched
721 734
722 735 parents = []
723 736 # check whether this revision is the start of a branch or part
724 737 # of a branch renaming
725 738 orig_paths = sorted(orig_paths.iteritems())
726 739 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
727 740 if root_paths:
728 741 path, ent = root_paths[-1]
729 742 if ent.copyfrom_path:
730 743 branched = True
731 744 newpath = ent.copyfrom_path + self.module[len(path):]
732 745 # ent.copyfrom_rev may not be the actual last revision
733 746 previd = self.latest(newpath, ent.copyfrom_rev)
734 747 if previd is not None:
735 748 prevmodule, prevnum = self.revsplit(previd)[1:]
736 749 if prevnum >= self.startrev:
737 750 parents = [previd]
738 751 self.ui.note(_('found parent of branch %s at %d: %s\n') %
739 752 (self.module, prevnum, prevmodule))
740 753 else:
741 754 self.ui.debug("no copyfrom path, don't know what to do.\n")
742 755
743 756 paths = []
744 757 # filter out unrelated paths
745 758 for path, ent in orig_paths:
746 759 if self.getrelpath(path) is None:
747 760 continue
748 761 paths.append((path, ent))
749 762
750 763 # Example SVN datetime. Includes microseconds.
751 764 # ISO-8601 conformant
752 765 # '2007-01-04T17:35:00.902377Z'
753 766 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
754 767
755 768 log = message and self.recode(message) or ''
756 769 author = author and self.recode(author) or ''
757 770 try:
758 771 branch = self.module.split("/")[-1]
759 772 if branch == 'trunk':
760 773 branch = ''
761 774 except IndexError:
762 775 branch = None
763 776
764 777 cset = commit(author=author,
765 778 date=util.datestr(date),
766 779 desc=log,
767 780 parents=parents,
768 781 branch=branch,
769 782 rev=rev)
770 783
771 784 self.commits[rev] = cset
772 785 # The parents list is *shared* among self.paths and the
773 786 # commit object. Both will be updated below.
774 787 self.paths[rev] = (paths, cset.parents)
775 788 if self.child_cset and not self.child_cset.parents:
776 789 self.child_cset.parents[:] = [rev]
777 790 self.child_cset = cset
778 791 return cset, branched
779 792
780 793 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
781 794 (self.module, from_revnum, to_revnum))
782 795
783 796 try:
784 797 firstcset = None
785 798 lastonbranch = False
786 799 stream = self._getlog([self.module], from_revnum, to_revnum)
787 800 try:
788 801 for entry in stream:
789 802 paths, revnum, author, date, message = entry
790 803 if revnum < self.startrev:
791 804 lastonbranch = True
792 805 break
793 806 if not paths:
794 807 self.ui.debug('revision %d has no entries\n' % revnum)
795 808 continue
796 809 cset, lastonbranch = parselogentry(paths, revnum, author,
797 810 date, message)
798 811 if cset:
799 812 firstcset = cset
800 813 if lastonbranch:
801 814 break
802 815 finally:
803 816 stream.close()
804 817
805 818 if not lastonbranch and firstcset and not firstcset.parents:
806 819 # The first revision of the sequence (the last fetched one)
807 820 # has invalid parents if not a branch root. Find the parent
808 821 # revision now, if any.
809 822 try:
810 823 firstrevnum = self.revnum(firstcset.rev)
811 824 if firstrevnum > 1:
812 825 latest = self.latest(self.module, firstrevnum - 1)
813 826 if latest:
814 827 firstcset.parents.append(latest)
815 828 except SvnPathNotFound:
816 829 pass
817 830 except SubversionException, (inst, num):
818 831 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
819 832 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
820 833 raise
821 834
822 835 def _getfile(self, file, rev):
823 836 # TODO: ra.get_file transmits the whole file instead of diffs.
824 837 mode = ''
825 838 try:
826 839 new_module, revnum = self.revsplit(rev)[1:]
827 840 if self.module != new_module:
828 841 self.module = new_module
829 842 self.reparent(self.module)
830 843 io = StringIO()
831 844 info = svn.ra.get_file(self.ra, file, revnum, io)
832 845 data = io.getvalue()
833 846 # ra.get_files() seems to keep a reference on the input buffer
834 847 # preventing collection. Release it explicitely.
835 848 io.close()
836 849 if isinstance(info, list):
837 850 info = info[-1]
838 851 mode = ("svn:executable" in info) and 'x' or ''
839 852 mode = ("svn:special" in info) and 'l' or mode
840 853 except SubversionException, e:
841 854 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
842 855 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
843 856 if e.apr_err in notfound: # File not found
844 857 raise IOError()
845 858 raise
846 859 if mode == 'l':
847 860 link_prefix = "link "
848 861 if data.startswith(link_prefix):
849 862 data = data[len(link_prefix):]
850 863 return data, mode
851 864
852 865 def _find_children(self, path, revnum):
853 866 path = path.strip('/')
854 867 pool = Pool()
855 868 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
856 869 return ['%s/%s' % (path, x) for x in
857 870 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
858 871
859 872 def getrelpath(self, path, module=None):
860 873 if module is None:
861 874 module = self.module
862 875 # Given the repository url of this wc, say
863 876 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
864 877 # extract the "entry" portion (a relative path) from what
865 878 # svn log --xml says, ie
866 879 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
867 880 # that is to say "tests/PloneTestCase.py"
868 881 if path.startswith(module):
869 882 relative = path.rstrip('/')[len(module):]
870 883 if relative.startswith('/'):
871 884 return relative[1:]
872 885 elif relative == '':
873 886 return relative
874 887
875 888 # The path is outside our tracked tree...
876 889 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
877 890 return None
878 891
879 892 def _checkpath(self, path, revnum):
880 893 # ra.check_path does not like leading slashes very much, it leads
881 894 # to PROPFIND subversion errors
882 895 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
883 896
884 897 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
885 898 strict_node_history=False):
886 899 # Normalize path names, svn >= 1.5 only wants paths relative to
887 900 # supplied URL
888 901 relpaths = []
889 902 for p in paths:
890 903 if not p.startswith('/'):
891 904 p = self.module + '/' + p
892 905 relpaths.append(p.strip('/'))
893 906 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
894 907 strict_node_history]
895 908 arg = encodeargs(args)
896 909 hgexe = util.hgexecutable()
897 910 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
898 911 stdin, stdout = util.popen2(cmd)
899 912 stdin.write(arg)
900 913 stdin.close()
901 914 return logstream(stdout)
902 915
903 916 pre_revprop_change = '''#!/bin/sh
904 917
905 918 REPOS="$1"
906 919 REV="$2"
907 920 USER="$3"
908 921 PROPNAME="$4"
909 922 ACTION="$5"
910 923
911 924 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
912 925 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
913 926 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
914 927
915 928 echo "Changing prohibited revision property" >&2
916 929 exit 1
917 930 '''
918 931
919 932 class svn_sink(converter_sink, commandline):
920 933 commit_re = re.compile(r'Committed revision (\d+).', re.M)
921 934
922 935 def prerun(self):
923 936 if self.wc:
924 937 os.chdir(self.wc)
925 938
926 939 def postrun(self):
927 940 if self.wc:
928 941 os.chdir(self.cwd)
929 942
930 943 def join(self, name):
931 944 return os.path.join(self.wc, '.svn', name)
932 945
933 946 def revmapfile(self):
934 947 return self.join('hg-shamap')
935 948
936 949 def authorfile(self):
937 950 return self.join('hg-authormap')
938 951
939 952 def __init__(self, ui, path):
940 953 converter_sink.__init__(self, ui, path)
941 954 commandline.__init__(self, ui, 'svn')
942 955 self.delete = []
943 956 self.setexec = []
944 957 self.delexec = []
945 958 self.copies = []
946 959 self.wc = None
947 960 self.cwd = os.getcwd()
948 961
949 962 path = os.path.realpath(path)
950 963
951 964 created = False
952 965 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
953 966 self.wc = path
954 967 self.run0('update')
955 968 else:
956 969 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
957 970
958 971 if os.path.isdir(os.path.dirname(path)):
959 972 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
960 973 ui.status(_('initializing svn repo %r\n') %
961 974 os.path.basename(path))
962 975 commandline(ui, 'svnadmin').run0('create', path)
963 976 created = path
964 977 path = util.normpath(path)
965 978 if not path.startswith('/'):
966 979 path = '/' + path
967 980 path = 'file://' + path
968 981
969 982 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
970 983 self.run0('checkout', path, wcpath)
971 984
972 985 self.wc = wcpath
973 986 self.opener = util.opener(self.wc)
974 987 self.wopener = util.opener(self.wc)
975 988 self.childmap = mapfile(ui, self.join('hg-childmap'))
976 989 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
977 990
978 991 if created:
979 992 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
980 993 fp = open(hook, 'w')
981 994 fp.write(pre_revprop_change)
982 995 fp.close()
983 996 util.set_flags(hook, False, True)
984 997
985 998 xport = transport.SvnRaTransport(url=geturl(path))
986 999 self.uuid = svn.ra.get_uuid(xport.ra)
987 1000
988 1001 def wjoin(self, *names):
989 1002 return os.path.join(self.wc, *names)
990 1003
991 1004 def putfile(self, filename, flags, data):
992 1005 if 'l' in flags:
993 1006 self.wopener.symlink(data, filename)
994 1007 else:
995 1008 try:
996 1009 if os.path.islink(self.wjoin(filename)):
997 1010 os.unlink(filename)
998 1011 except OSError:
999 1012 pass
1000 1013 self.wopener(filename, 'w').write(data)
1001 1014
1002 1015 if self.is_exec:
1003 1016 was_exec = self.is_exec(self.wjoin(filename))
1004 1017 else:
1005 1018 # On filesystems not supporting execute-bit, there is no way
1006 1019 # to know if it is set but asking subversion. Setting it
1007 1020 # systematically is just as expensive and much simpler.
1008 1021 was_exec = 'x' not in flags
1009 1022
1010 1023 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1011 1024 if was_exec:
1012 1025 if 'x' not in flags:
1013 1026 self.delexec.append(filename)
1014 1027 else:
1015 1028 if 'x' in flags:
1016 1029 self.setexec.append(filename)
1017 1030
1018 1031 def _copyfile(self, source, dest):
1019 1032 # SVN's copy command pukes if the destination file exists, but
1020 1033 # our copyfile method expects to record a copy that has
1021 1034 # already occurred. Cross the semantic gap.
1022 1035 wdest = self.wjoin(dest)
1023 1036 exists = os.path.exists(wdest)
1024 1037 if exists:
1025 1038 fd, tempname = tempfile.mkstemp(
1026 1039 prefix='hg-copy-', dir=os.path.dirname(wdest))
1027 1040 os.close(fd)
1028 1041 os.unlink(tempname)
1029 1042 os.rename(wdest, tempname)
1030 1043 try:
1031 1044 self.run0('copy', source, dest)
1032 1045 finally:
1033 1046 if exists:
1034 1047 try:
1035 1048 os.unlink(wdest)
1036 1049 except OSError:
1037 1050 pass
1038 1051 os.rename(tempname, wdest)
1039 1052
1040 1053 def dirs_of(self, files):
1041 1054 dirs = set()
1042 1055 for f in files:
1043 1056 if os.path.isdir(self.wjoin(f)):
1044 1057 dirs.add(f)
1045 1058 for i in strutil.rfindall(f, '/'):
1046 1059 dirs.add(f[:i])
1047 1060 return dirs
1048 1061
1049 1062 def add_dirs(self, files):
1050 1063 add_dirs = [d for d in sorted(self.dirs_of(files))
1051 1064 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1052 1065 if add_dirs:
1053 1066 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1054 1067 return add_dirs
1055 1068
1056 1069 def add_files(self, files):
1057 1070 if files:
1058 1071 self.xargs(files, 'add', quiet=True)
1059 1072 return files
1060 1073
1061 1074 def tidy_dirs(self, names):
1062 1075 deleted = []
1063 1076 for d in sorted(self.dirs_of(names), reverse=True):
1064 1077 wd = self.wjoin(d)
1065 1078 if os.listdir(wd) == '.svn':
1066 1079 self.run0('delete', d)
1067 1080 deleted.append(d)
1068 1081 return deleted
1069 1082
1070 1083 def addchild(self, parent, child):
1071 1084 self.childmap[parent] = child
1072 1085
1073 1086 def revid(self, rev):
1074 1087 return u"svn:%s@%s" % (self.uuid, rev)
1075 1088
1076 1089 def putcommit(self, files, copies, parents, commit, source, revmap):
1077 1090 # Apply changes to working copy
1078 1091 for f, v in files:
1079 1092 try:
1080 1093 data = source.getfile(f, v)
1081 1094 except IOError:
1082 1095 self.delete.append(f)
1083 1096 else:
1084 1097 e = source.getmode(f, v)
1085 1098 self.putfile(f, e, data)
1086 1099 if f in copies:
1087 1100 self.copies.append([copies[f], f])
1088 1101 files = [f[0] for f in files]
1089 1102
1090 1103 for parent in parents:
1091 1104 try:
1092 1105 return self.revid(self.childmap[parent])
1093 1106 except KeyError:
1094 1107 pass
1095 1108 entries = set(self.delete)
1096 1109 files = frozenset(files)
1097 1110 entries.update(self.add_dirs(files.difference(entries)))
1098 1111 if self.copies:
1099 1112 for s, d in self.copies:
1100 1113 self._copyfile(s, d)
1101 1114 self.copies = []
1102 1115 if self.delete:
1103 1116 self.xargs(self.delete, 'delete')
1104 1117 self.delete = []
1105 1118 entries.update(self.add_files(files.difference(entries)))
1106 1119 entries.update(self.tidy_dirs(entries))
1107 1120 if self.delexec:
1108 1121 self.xargs(self.delexec, 'propdel', 'svn:executable')
1109 1122 self.delexec = []
1110 1123 if self.setexec:
1111 1124 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1112 1125 self.setexec = []
1113 1126
1114 1127 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1115 1128 fp = os.fdopen(fd, 'w')
1116 1129 fp.write(commit.desc)
1117 1130 fp.close()
1118 1131 try:
1119 1132 output = self.run0('commit',
1120 1133 username=util.shortuser(commit.author),
1121 1134 file=messagefile,
1122 1135 encoding='utf-8')
1123 1136 try:
1124 1137 rev = self.commit_re.search(output).group(1)
1125 1138 except AttributeError:
1126 1139 self.ui.warn(_('unexpected svn output:\n'))
1127 1140 self.ui.warn(output)
1128 1141 raise util.Abort(_('unable to cope with svn output'))
1129 1142 if commit.rev:
1130 1143 self.run('propset', 'hg:convert-rev', commit.rev,
1131 1144 revprop=True, revision=rev)
1132 1145 if commit.branch and commit.branch != 'default':
1133 1146 self.run('propset', 'hg:convert-branch', commit.branch,
1134 1147 revprop=True, revision=rev)
1135 1148 for parent in parents:
1136 1149 self.addchild(parent, rev)
1137 1150 return self.revid(rev)
1138 1151 finally:
1139 1152 os.unlink(messagefile)
1140 1153
1141 1154 def puttags(self, tags):
1142 1155 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now