##// END OF EJS Templates
convert: handle svn tree with empty roots (issue2079)
Patrick Mezard -
r10618:508fda6b stable
parent child Browse files
Show More
@@ -1,1168 +1,1171 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda p, p2: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo("%s does not look like a Subversion repo" % url)
211 211
212 212 try:
213 213 SubversionException
214 214 except NameError:
215 215 raise MissingTool(_('Subversion python bindings could not be loaded'))
216 216
217 217 try:
218 218 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
219 219 if version < (1, 4):
220 220 raise MissingTool(_('Subversion python bindings %d.%d found, '
221 221 '1.4 or later required') % version)
222 222 except AttributeError:
223 223 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
224 224 'or later required'))
225 225
226 226 self.lastrevs = {}
227 227
228 228 latest = None
229 229 try:
230 230 # Support file://path@rev syntax. Useful e.g. to convert
231 231 # deleted branches.
232 232 at = url.rfind('@')
233 233 if at >= 0:
234 234 latest = int(url[at + 1:])
235 235 url = url[:at]
236 236 except ValueError:
237 237 pass
238 238 self.url = geturl(url)
239 239 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
240 240 try:
241 241 self.transport = transport.SvnRaTransport(url=self.url)
242 242 self.ra = self.transport.ra
243 243 self.ctx = self.transport.client
244 244 self.baseurl = svn.ra.get_repos_root(self.ra)
245 245 # Module is either empty or a repository path starting with
246 246 # a slash and not ending with a slash.
247 247 self.module = urllib.unquote(self.url[len(self.baseurl):])
248 248 self.prevmodule = None
249 249 self.rootmodule = self.module
250 250 self.commits = {}
251 251 self.paths = {}
252 252 self.uuid = svn.ra.get_uuid(self.ra)
253 253 except SubversionException:
254 254 ui.traceback()
255 255 raise NoRepo("%s does not look like a Subversion repo" % self.url)
256 256
257 257 if rev:
258 258 try:
259 259 latest = int(rev)
260 260 except ValueError:
261 261 raise util.Abort(_('svn: revision %s is not an integer') % rev)
262 262
263 263 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
264 264 try:
265 265 self.startrev = int(self.startrev)
266 266 if self.startrev < 0:
267 267 self.startrev = 0
268 268 except ValueError:
269 269 raise util.Abort(_('svn: start revision %s is not an integer')
270 270 % self.startrev)
271 271
272 272 self.head = self.latest(self.module, latest)
273 273 if not self.head:
274 274 raise util.Abort(_('no revision found in module %s')
275 275 % self.module)
276 276 self.last_changed = self.revnum(self.head)
277 277
278 278 self._changescache = None
279 279
280 280 if os.path.exists(os.path.join(url, '.svn/entries')):
281 281 self.wc = url
282 282 else:
283 283 self.wc = None
284 284 self.convertfp = None
285 285
286 286 def setrevmap(self, revmap):
287 287 lastrevs = {}
288 288 for revid in revmap.iterkeys():
289 289 uuid, module, revnum = self.revsplit(revid)
290 290 lastrevnum = lastrevs.setdefault(module, revnum)
291 291 if revnum > lastrevnum:
292 292 lastrevs[module] = revnum
293 293 self.lastrevs = lastrevs
294 294
295 295 def exists(self, path, optrev):
296 296 try:
297 297 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
298 298 optrev, False, self.ctx)
299 299 return True
300 300 except SubversionException:
301 301 return False
302 302
303 303 def getheads(self):
304 304
305 305 def isdir(path, revnum):
306 306 kind = self._checkpath(path, revnum)
307 307 return kind == svn.core.svn_node_dir
308 308
309 309 def getcfgpath(name, rev):
310 310 cfgpath = self.ui.config('convert', 'svn.' + name)
311 311 if cfgpath is not None and cfgpath.strip() == '':
312 312 return None
313 313 path = (cfgpath or name).strip('/')
314 314 if not self.exists(path, rev):
315 315 if cfgpath:
316 316 raise util.Abort(_('expected %s to be at %r, but not found')
317 317 % (name, path))
318 318 return None
319 319 self.ui.note(_('found %s at %r\n') % (name, path))
320 320 return path
321 321
322 322 rev = optrev(self.last_changed)
323 323 oldmodule = ''
324 324 trunk = getcfgpath('trunk', rev)
325 325 self.tags = getcfgpath('tags', rev)
326 326 branches = getcfgpath('branches', rev)
327 327
328 328 # If the project has a trunk or branches, we will extract heads
329 329 # from them. We keep the project root otherwise.
330 330 if trunk:
331 331 oldmodule = self.module or ''
332 332 self.module += '/' + trunk
333 333 self.head = self.latest(self.module, self.last_changed)
334 334 if not self.head:
335 335 raise util.Abort(_('no revision found in module %s')
336 336 % self.module)
337 337
338 338 # First head in the list is the module's head
339 339 self.heads = [self.head]
340 340 if self.tags is not None:
341 341 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
342 342
343 343 # Check if branches bring a few more heads to the list
344 344 if branches:
345 345 rpath = self.url.strip('/')
346 346 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
347 347 rev, False, self.ctx)
348 348 for branch in branchnames.keys():
349 349 module = '%s/%s/%s' % (oldmodule, branches, branch)
350 350 if not isdir(module, self.last_changed):
351 351 continue
352 352 brevid = self.latest(module, self.last_changed)
353 353 if not brevid:
354 354 self.ui.note(_('ignoring empty branch %s\n') % branch)
355 355 continue
356 356 self.ui.note(_('found branch %s at %d\n') %
357 357 (branch, self.revnum(brevid)))
358 358 self.heads.append(brevid)
359 359
360 360 if self.startrev and self.heads:
361 361 if len(self.heads) > 1:
362 362 raise util.Abort(_('svn: start revision is not supported '
363 363 'with more than one branch'))
364 364 revnum = self.revnum(self.heads[0])
365 365 if revnum < self.startrev:
366 366 raise util.Abort(
367 367 _('svn: no revision found after start revision %d')
368 368 % self.startrev)
369 369
370 370 return self.heads
371 371
372 372 def getfile(self, file, rev):
373 373 data, mode = self._getfile(file, rev)
374 374 self.modecache[(file, rev)] = mode
375 375 return data
376 376
377 377 def getmode(self, file, rev):
378 378 return self.modecache[(file, rev)]
379 379
380 380 def getchanges(self, rev):
381 381 if self._changescache and self._changescache[0] == rev:
382 382 return self._changescache[1]
383 383 self._changescache = None
384 384 self.modecache = {}
385 385 (paths, parents) = self.paths[rev]
386 386 if parents:
387 387 files, copies = self.expandpaths(rev, paths, parents)
388 388 else:
389 389 # Perform a full checkout on roots
390 390 uuid, module, revnum = self.revsplit(rev)
391 391 entries = svn.client.ls(self.baseurl + urllib.quote(module),
392 392 optrev(revnum), True, self.ctx)
393 393 files = [n for n, e in entries.iteritems()
394 394 if e.kind == svn.core.svn_node_file]
395 395 copies = {}
396 396
397 397 files.sort()
398 398 files = zip(files, [rev] * len(files))
399 399
400 400 # caller caches the result, so free it here to release memory
401 401 del self.paths[rev]
402 402 return (files, copies)
403 403
404 404 def getchangedfiles(self, rev, i):
405 405 changes = self.getchanges(rev)
406 406 self._changescache = (rev, changes)
407 407 return [f[0] for f in changes[0]]
408 408
409 409 def getcommit(self, rev):
410 410 if rev not in self.commits:
411 411 uuid, module, revnum = self.revsplit(rev)
412 412 self.module = module
413 413 self.reparent(module)
414 414 # We assume that:
415 415 # - requests for revisions after "stop" come from the
416 416 # revision graph backward traversal. Cache all of them
417 417 # down to stop, they will be used eventually.
418 418 # - requests for revisions before "stop" come to get
419 419 # isolated branches parents. Just fetch what is needed.
420 420 stop = self.lastrevs.get(module, 0)
421 421 if revnum < stop:
422 422 stop = revnum + 1
423 423 self._fetch_revisions(revnum, stop)
424 424 commit = self.commits[rev]
425 425 # caller caches the result, so free it here to release memory
426 426 del self.commits[rev]
427 427 return commit
428 428
429 429 def gettags(self):
430 430 tags = {}
431 431 if self.tags is None:
432 432 return tags
433 433
434 434 # svn tags are just a convention, project branches left in a
435 435 # 'tags' directory. There is no other relationship than
436 436 # ancestry, which is expensive to discover and makes them hard
437 437 # to update incrementally. Worse, past revisions may be
438 438 # referenced by tags far away in the future, requiring a deep
439 439 # history traversal on every calculation. Current code
440 440 # performs a single backward traversal, tracking moves within
441 441 # the tags directory (tag renaming) and recording a new tag
442 442 # everytime a project is copied from outside the tags
443 443 # directory. It also lists deleted tags, this behaviour may
444 444 # change in the future.
445 445 pendings = []
446 446 tagspath = self.tags
447 447 start = svn.ra.get_latest_revnum(self.ra)
448 448 try:
449 449 for entry in self._getlog([self.tags], start, self.startrev):
450 450 origpaths, revnum, author, date, message = entry
451 451 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
452 452 in origpaths.iteritems() if e.copyfrom_path]
453 453 # Apply moves/copies from more specific to general
454 454 copies.sort(reverse=True)
455 455
456 456 srctagspath = tagspath
457 457 if copies and copies[-1][2] == tagspath:
458 458 # Track tags directory moves
459 459 srctagspath = copies.pop()[0]
460 460
461 461 for source, sourcerev, dest in copies:
462 462 if not dest.startswith(tagspath + '/'):
463 463 continue
464 464 for tag in pendings:
465 465 if tag[0].startswith(dest):
466 466 tagpath = source + tag[0][len(dest):]
467 467 tag[:2] = [tagpath, sourcerev]
468 468 break
469 469 else:
470 470 pendings.append([source, sourcerev, dest])
471 471
472 472 # Filter out tags with children coming from different
473 473 # parts of the repository like:
474 474 # /tags/tag.1 (from /trunk:10)
475 475 # /tags/tag.1/foo (from /branches/foo:12)
476 476 # Here/tags/tag.1 discarded as well as its children.
477 477 # It happens with tools like cvs2svn. Such tags cannot
478 478 # be represented in mercurial.
479 479 addeds = dict((p, e.copyfrom_path) for p, e
480 480 in origpaths.iteritems()
481 481 if e.action == 'A' and e.copyfrom_path)
482 482 badroots = set()
483 483 for destroot in addeds:
484 484 for source, sourcerev, dest in pendings:
485 485 if (not dest.startswith(destroot + '/')
486 486 or source.startswith(addeds[destroot] + '/')):
487 487 continue
488 488 badroots.add(destroot)
489 489 break
490 490
491 491 for badroot in badroots:
492 492 pendings = [p for p in pendings if p[2] != badroot
493 493 and not p[2].startswith(badroot + '/')]
494 494
495 495 # Tell tag renamings from tag creations
496 496 remainings = []
497 497 for source, sourcerev, dest in pendings:
498 498 tagname = dest.split('/')[-1]
499 499 if source.startswith(srctagspath):
500 500 remainings.append([source, sourcerev, tagname])
501 501 continue
502 502 if tagname in tags:
503 503 # Keep the latest tag value
504 504 continue
505 505 # From revision may be fake, get one with changes
506 506 try:
507 507 tagid = self.latest(source, sourcerev)
508 508 if tagid and tagname not in tags:
509 509 tags[tagname] = tagid
510 510 except SvnPathNotFound:
511 511 # It happens when we are following directories
512 512 # we assumed were copied with their parents
513 513 # but were really created in the tag
514 514 # directory.
515 515 pass
516 516 pendings = remainings
517 517 tagspath = srctagspath
518 518
519 519 except SubversionException:
520 520 self.ui.note(_('no tags found at revision %d\n') % start)
521 521 return tags
522 522
523 523 def converted(self, rev, destrev):
524 524 if not self.wc:
525 525 return
526 526 if self.convertfp is None:
527 527 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
528 528 'a')
529 529 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
530 530 self.convertfp.flush()
531 531
532 532 def revid(self, revnum, module=None):
533 533 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
534 534
535 535 def revnum(self, rev):
536 536 return int(rev.split('@')[-1])
537 537
538 538 def revsplit(self, rev):
539 539 url, revnum = rev.rsplit('@', 1)
540 540 revnum = int(revnum)
541 541 parts = url.split('/', 1)
542 542 uuid = parts.pop(0)[4:]
543 543 mod = ''
544 544 if parts:
545 545 mod = '/' + parts[0]
546 546 return uuid, mod, revnum
547 547
548 548 def latest(self, path, stop=0):
549 549 """Find the latest revid affecting path, up to stop. It may return
550 550 a revision in a different module, since a branch may be moved without
551 551 a change being reported. Return None if computed module does not
552 552 belong to rootmodule subtree.
553 553 """
554 554 if not path.startswith(self.rootmodule):
555 555 # Requests on foreign branches may be forbidden at server level
556 556 self.ui.debug('ignoring foreign branch %r\n' % path)
557 557 return None
558 558
559 559 if not stop:
560 560 stop = svn.ra.get_latest_revnum(self.ra)
561 561 try:
562 562 prevmodule = self.reparent('')
563 563 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
564 564 self.reparent(prevmodule)
565 565 except SubversionException:
566 566 dirent = None
567 567 if not dirent:
568 568 raise SvnPathNotFound(_('%s not found up to revision %d')
569 569 % (path, stop))
570 570
571 571 # stat() gives us the previous revision on this line of
572 572 # development, but it might be in *another module*. Fetch the
573 573 # log and detect renames down to the latest revision.
574 574 stream = self._getlog([path], stop, dirent.created_rev)
575 575 try:
576 576 for entry in stream:
577 577 paths, revnum, author, date, message = entry
578 578 if revnum <= dirent.created_rev:
579 579 break
580 580
581 581 for p in paths:
582 582 if not path.startswith(p) or not paths[p].copyfrom_path:
583 583 continue
584 584 newpath = paths[p].copyfrom_path + path[len(p):]
585 585 self.ui.debug("branch renamed from %s to %s at %d\n" %
586 586 (path, newpath, revnum))
587 587 path = newpath
588 588 break
589 589 finally:
590 590 stream.close()
591 591
592 592 if not path.startswith(self.rootmodule):
593 593 self.ui.debug('ignoring foreign branch %r\n' % path)
594 594 return None
595 595 return self.revid(dirent.created_rev, path)
596 596
597 597 def reparent(self, module):
598 598 """Reparent the svn transport and return the previous parent."""
599 599 if self.prevmodule == module:
600 600 return module
601 601 svnurl = self.baseurl + urllib.quote(module)
602 602 prevmodule = self.prevmodule
603 603 if prevmodule is None:
604 604 prevmodule = ''
605 605 self.ui.debug("reparent to %s\n" % svnurl)
606 606 svn.ra.reparent(self.ra, svnurl)
607 607 self.prevmodule = module
608 608 return prevmodule
609 609
610 610 def expandpaths(self, rev, paths, parents):
611 611 entries = []
612 612 # Map of entrypath, revision for finding source of deleted
613 613 # revisions.
614 614 copyfrom = {}
615 615 copies = {}
616 616
617 617 new_module, revnum = self.revsplit(rev)[1:]
618 618 if new_module != self.module:
619 619 self.module = new_module
620 620 self.reparent(self.module)
621 621
622 622 for path, ent in paths:
623 623 entrypath = self.getrelpath(path)
624 624
625 625 kind = self._checkpath(entrypath, revnum)
626 626 if kind == svn.core.svn_node_file:
627 627 entries.append(self.recode(entrypath))
628 628 if not ent.copyfrom_path or not parents:
629 629 continue
630 630 # Copy sources not in parent revisions cannot be
631 631 # represented, ignore their origin for now
632 632 pmodule, prevnum = self.revsplit(parents[0])[1:]
633 633 if ent.copyfrom_rev < prevnum:
634 634 continue
635 635 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
636 636 if not copyfrom_path:
637 637 continue
638 638 self.ui.debug("copied to %s from %s@%s\n" %
639 639 (entrypath, copyfrom_path, ent.copyfrom_rev))
640 640 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
641 641 elif kind == 0: # gone, but had better be a deleted *file*
642 642 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
643 643 pmodule, prevnum = self.revsplit(parents[0])[1:]
644 644 parentpath = pmodule + "/" + entrypath
645 645 self.ui.debug("entry %s\n" % parentpath)
646 646
647 647 # We can avoid the reparent calls if the module has
648 648 # not changed but it probably does not worth the pain.
649 649 prevmodule = self.reparent('')
650 650 fromkind = svn.ra.check_path(self.ra, parentpath.strip('/'),
651 651 prevnum)
652 652 self.reparent(prevmodule)
653 653
654 654 if fromkind == svn.core.svn_node_file:
655 655 entries.append(self.recode(entrypath))
656 656 elif fromkind == svn.core.svn_node_dir:
657 657 if ent.action == 'C':
658 658 children = self._find_children(path, prevnum)
659 659 else:
660 660 oroot = parentpath.strip('/')
661 661 nroot = path.strip('/')
662 662 children = self._find_children(oroot, prevnum)
663 663 children = [s.replace(oroot, nroot) for s in children]
664 664
665 665 for child in children:
666 666 childpath = self.getrelpath("/" + child, pmodule)
667 667 if not childpath:
668 668 continue
669 669 if childpath in copies:
670 670 del copies[childpath]
671 671 entries.append(childpath)
672 672 else:
673 673 self.ui.debug('unknown path in revision %d: %s\n' % \
674 674 (revnum, path))
675 675 elif kind == svn.core.svn_node_dir:
676 676 # If the directory just had a prop change,
677 677 # then we shouldn't need to look for its children.
678 678 if ent.action == 'M':
679 679 continue
680 680
681 681 children = sorted(self._find_children(path, revnum))
682 682 for child in children:
683 683 # Can we move a child directory and its
684 684 # parent in the same commit? (probably can). Could
685 685 # cause problems if instead of revnum -1,
686 686 # we have to look in (copyfrom_path, revnum - 1)
687 687 entrypath = self.getrelpath("/" + child)
688 688 if entrypath:
689 689 # Need to filter out directories here...
690 690 kind = self._checkpath(entrypath, revnum)
691 691 if kind != svn.core.svn_node_dir:
692 692 entries.append(self.recode(entrypath))
693 693
694 694 # Handle directory copies
695 695 if not ent.copyfrom_path or not parents:
696 696 continue
697 697 # Copy sources not in parent revisions cannot be
698 698 # represented, ignore their origin for now
699 699 pmodule, prevnum = self.revsplit(parents[0])[1:]
700 700 if ent.copyfrom_rev < prevnum:
701 701 continue
702 702 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
703 703 if not copyfrompath:
704 704 continue
705 705 copyfrom[path] = ent
706 706 self.ui.debug("mark %s came from %s:%d\n"
707 707 % (path, copyfrompath, ent.copyfrom_rev))
708 708 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
709 709 children.sort()
710 710 for child in children:
711 711 entrypath = self.getrelpath("/" + child, pmodule)
712 712 if not entrypath:
713 713 continue
714 714 copytopath = path + entrypath[len(copyfrompath):]
715 715 copytopath = self.getrelpath(copytopath)
716 716 copies[self.recode(copytopath)] = self.recode(entrypath)
717 717
718 718 return (list(set(entries)), copies)
719 719
720 720 def _fetch_revisions(self, from_revnum, to_revnum):
721 721 if from_revnum < to_revnum:
722 722 from_revnum, to_revnum = to_revnum, from_revnum
723 723
724 724 self.child_cset = None
725 725
726 726 def parselogentry(orig_paths, revnum, author, date, message):
727 727 """Return the parsed commit object or None, and True if
728 728 the revision is a branch root.
729 729 """
730 730 self.ui.debug("parsing revision %d (%d changes)\n" %
731 731 (revnum, len(orig_paths)))
732 732
733 733 branched = False
734 734 rev = self.revid(revnum)
735 735 # branch log might return entries for a parent we already have
736 736
737 737 if rev in self.commits or revnum < to_revnum:
738 738 return None, branched
739 739
740 740 parents = []
741 741 # check whether this revision is the start of a branch or part
742 742 # of a branch renaming
743 743 orig_paths = sorted(orig_paths.iteritems())
744 744 root_paths = [(p, e) for p, e in orig_paths
745 745 if self.module.startswith(p)]
746 746 if root_paths:
747 747 path, ent = root_paths[-1]
748 748 if ent.copyfrom_path:
749 749 branched = True
750 750 newpath = ent.copyfrom_path + self.module[len(path):]
751 751 # ent.copyfrom_rev may not be the actual last revision
752 752 previd = self.latest(newpath, ent.copyfrom_rev)
753 753 if previd is not None:
754 754 prevmodule, prevnum = self.revsplit(previd)[1:]
755 755 if prevnum >= self.startrev:
756 756 parents = [previd]
757 757 self.ui.note(
758 758 _('found parent of branch %s at %d: %s\n') %
759 759 (self.module, prevnum, prevmodule))
760 760 else:
761 761 self.ui.debug("no copyfrom path, don't know what to do.\n")
762 762
763 763 paths = []
764 764 # filter out unrelated paths
765 765 for path, ent in orig_paths:
766 766 if self.getrelpath(path) is None:
767 767 continue
768 768 paths.append((path, ent))
769 769
770 770 # Example SVN datetime. Includes microseconds.
771 771 # ISO-8601 conformant
772 772 # '2007-01-04T17:35:00.902377Z'
773 773 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
774 774
775 775 log = message and self.recode(message) or ''
776 776 author = author and self.recode(author) or ''
777 777 try:
778 778 branch = self.module.split("/")[-1]
779 779 if branch == 'trunk':
780 780 branch = ''
781 781 except IndexError:
782 782 branch = None
783 783
784 784 cset = commit(author=author,
785 785 date=util.datestr(date),
786 786 desc=log,
787 787 parents=parents,
788 788 branch=branch,
789 789 rev=rev)
790 790
791 791 self.commits[rev] = cset
792 792 # The parents list is *shared* among self.paths and the
793 793 # commit object. Both will be updated below.
794 794 self.paths[rev] = (paths, cset.parents)
795 795 if self.child_cset and not self.child_cset.parents:
796 796 self.child_cset.parents[:] = [rev]
797 797 self.child_cset = cset
798 798 return cset, branched
799 799
800 800 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
801 801 (self.module, from_revnum, to_revnum))
802 802
803 803 try:
804 804 firstcset = None
805 805 lastonbranch = False
806 806 stream = self._getlog([self.module], from_revnum, to_revnum)
807 807 try:
808 808 for entry in stream:
809 809 paths, revnum, author, date, message = entry
810 810 if revnum < self.startrev:
811 811 lastonbranch = True
812 812 break
813 813 if not paths:
814 814 self.ui.debug('revision %d has no entries\n' % revnum)
815 # If we ever leave the loop on an empty
816 # revision, do not try to get a parent branch
817 lastonbranch = lastonbranch or revnum == 0
815 818 continue
816 819 cset, lastonbranch = parselogentry(paths, revnum, author,
817 820 date, message)
818 821 if cset:
819 822 firstcset = cset
820 823 if lastonbranch:
821 824 break
822 825 finally:
823 826 stream.close()
824 827
825 828 if not lastonbranch and firstcset and not firstcset.parents:
826 829 # The first revision of the sequence (the last fetched one)
827 830 # has invalid parents if not a branch root. Find the parent
828 831 # revision now, if any.
829 832 try:
830 833 firstrevnum = self.revnum(firstcset.rev)
831 834 if firstrevnum > 1:
832 835 latest = self.latest(self.module, firstrevnum - 1)
833 836 if latest:
834 837 firstcset.parents.append(latest)
835 838 except SvnPathNotFound:
836 839 pass
837 840 except SubversionException, (inst, num):
838 841 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
839 842 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
840 843 raise
841 844
842 845 def _getfile(self, file, rev):
843 846 # TODO: ra.get_file transmits the whole file instead of diffs.
844 847 mode = ''
845 848 try:
846 849 new_module, revnum = self.revsplit(rev)[1:]
847 850 if self.module != new_module:
848 851 self.module = new_module
849 852 self.reparent(self.module)
850 853 io = StringIO()
851 854 info = svn.ra.get_file(self.ra, file, revnum, io)
852 855 data = io.getvalue()
853 856 # ra.get_files() seems to keep a reference on the input buffer
854 857 # preventing collection. Release it explicitely.
855 858 io.close()
856 859 if isinstance(info, list):
857 860 info = info[-1]
858 861 mode = ("svn:executable" in info) and 'x' or ''
859 862 mode = ("svn:special" in info) and 'l' or mode
860 863 except SubversionException, e:
861 864 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
862 865 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
863 866 if e.apr_err in notfound: # File not found
864 867 raise IOError()
865 868 raise
866 869 if mode == 'l':
867 870 link_prefix = "link "
868 871 if data.startswith(link_prefix):
869 872 data = data[len(link_prefix):]
870 873 return data, mode
871 874
872 875 def _find_children(self, path, revnum):
873 876 path = path.strip('/')
874 877 pool = Pool()
875 878 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
876 879 return ['%s/%s' % (path, x) for x in
877 880 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
878 881
879 882 def getrelpath(self, path, module=None):
880 883 if module is None:
881 884 module = self.module
882 885 # Given the repository url of this wc, say
883 886 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
884 887 # extract the "entry" portion (a relative path) from what
885 888 # svn log --xml says, ie
886 889 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
887 890 # that is to say "tests/PloneTestCase.py"
888 891 if path.startswith(module):
889 892 relative = path.rstrip('/')[len(module):]
890 893 if relative.startswith('/'):
891 894 return relative[1:]
892 895 elif relative == '':
893 896 return relative
894 897
895 898 # The path is outside our tracked tree...
896 899 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
897 900 return None
898 901
899 902 def _checkpath(self, path, revnum):
900 903 # ra.check_path does not like leading slashes very much, it leads
901 904 # to PROPFIND subversion errors
902 905 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
903 906
904 907 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
905 908 strict_node_history=False):
906 909 # Normalize path names, svn >= 1.5 only wants paths relative to
907 910 # supplied URL
908 911 relpaths = []
909 912 for p in paths:
910 913 if not p.startswith('/'):
911 914 p = self.module + '/' + p
912 915 relpaths.append(p.strip('/'))
913 916 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
914 917 strict_node_history]
915 918 arg = encodeargs(args)
916 919 hgexe = util.hgexecutable()
917 920 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
918 921 stdin, stdout = util.popen2(cmd)
919 922 stdin.write(arg)
920 923 try:
921 924 stdin.close()
922 925 except IOError:
923 926 raise util.Abort(_('Mercurial failed to run itself, check'
924 927 ' hg executable is in PATH'))
925 928 return logstream(stdout)
926 929
927 930 pre_revprop_change = '''#!/bin/sh
928 931
929 932 REPOS="$1"
930 933 REV="$2"
931 934 USER="$3"
932 935 PROPNAME="$4"
933 936 ACTION="$5"
934 937
935 938 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
936 939 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
937 940 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
938 941
939 942 echo "Changing prohibited revision property" >&2
940 943 exit 1
941 944 '''
942 945
943 946 class svn_sink(converter_sink, commandline):
944 947 commit_re = re.compile(r'Committed revision (\d+).', re.M)
945 948
946 949 def prerun(self):
947 950 if self.wc:
948 951 os.chdir(self.wc)
949 952
950 953 def postrun(self):
951 954 if self.wc:
952 955 os.chdir(self.cwd)
953 956
954 957 def join(self, name):
955 958 return os.path.join(self.wc, '.svn', name)
956 959
957 960 def revmapfile(self):
958 961 return self.join('hg-shamap')
959 962
960 963 def authorfile(self):
961 964 return self.join('hg-authormap')
962 965
963 966 def __init__(self, ui, path):
964 967 converter_sink.__init__(self, ui, path)
965 968 commandline.__init__(self, ui, 'svn')
966 969 self.delete = []
967 970 self.setexec = []
968 971 self.delexec = []
969 972 self.copies = []
970 973 self.wc = None
971 974 self.cwd = os.getcwd()
972 975
973 976 path = os.path.realpath(path)
974 977
975 978 created = False
976 979 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
977 980 self.wc = path
978 981 self.run0('update')
979 982 else:
980 983 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
981 984
982 985 if os.path.isdir(os.path.dirname(path)):
983 986 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
984 987 ui.status(_('initializing svn repo %r\n') %
985 988 os.path.basename(path))
986 989 commandline(ui, 'svnadmin').run0('create', path)
987 990 created = path
988 991 path = util.normpath(path)
989 992 if not path.startswith('/'):
990 993 path = '/' + path
991 994 path = 'file://' + path
992 995
993 996 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
994 997 self.run0('checkout', path, wcpath)
995 998
996 999 self.wc = wcpath
997 1000 self.opener = util.opener(self.wc)
998 1001 self.wopener = util.opener(self.wc)
999 1002 self.childmap = mapfile(ui, self.join('hg-childmap'))
1000 1003 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1001 1004
1002 1005 if created:
1003 1006 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1004 1007 fp = open(hook, 'w')
1005 1008 fp.write(pre_revprop_change)
1006 1009 fp.close()
1007 1010 util.set_flags(hook, False, True)
1008 1011
1009 1012 xport = transport.SvnRaTransport(url=geturl(path))
1010 1013 self.uuid = svn.ra.get_uuid(xport.ra)
1011 1014
1012 1015 def wjoin(self, *names):
1013 1016 return os.path.join(self.wc, *names)
1014 1017
1015 1018 def putfile(self, filename, flags, data):
1016 1019 if 'l' in flags:
1017 1020 self.wopener.symlink(data, filename)
1018 1021 else:
1019 1022 try:
1020 1023 if os.path.islink(self.wjoin(filename)):
1021 1024 os.unlink(filename)
1022 1025 except OSError:
1023 1026 pass
1024 1027 self.wopener(filename, 'w').write(data)
1025 1028
1026 1029 if self.is_exec:
1027 1030 was_exec = self.is_exec(self.wjoin(filename))
1028 1031 else:
1029 1032 # On filesystems not supporting execute-bit, there is no way
1030 1033 # to know if it is set but asking subversion. Setting it
1031 1034 # systematically is just as expensive and much simpler.
1032 1035 was_exec = 'x' not in flags
1033 1036
1034 1037 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1035 1038 if was_exec:
1036 1039 if 'x' not in flags:
1037 1040 self.delexec.append(filename)
1038 1041 else:
1039 1042 if 'x' in flags:
1040 1043 self.setexec.append(filename)
1041 1044
1042 1045 def _copyfile(self, source, dest):
1043 1046 # SVN's copy command pukes if the destination file exists, but
1044 1047 # our copyfile method expects to record a copy that has
1045 1048 # already occurred. Cross the semantic gap.
1046 1049 wdest = self.wjoin(dest)
1047 1050 exists = os.path.exists(wdest)
1048 1051 if exists:
1049 1052 fd, tempname = tempfile.mkstemp(
1050 1053 prefix='hg-copy-', dir=os.path.dirname(wdest))
1051 1054 os.close(fd)
1052 1055 os.unlink(tempname)
1053 1056 os.rename(wdest, tempname)
1054 1057 try:
1055 1058 self.run0('copy', source, dest)
1056 1059 finally:
1057 1060 if exists:
1058 1061 try:
1059 1062 os.unlink(wdest)
1060 1063 except OSError:
1061 1064 pass
1062 1065 os.rename(tempname, wdest)
1063 1066
1064 1067 def dirs_of(self, files):
1065 1068 dirs = set()
1066 1069 for f in files:
1067 1070 if os.path.isdir(self.wjoin(f)):
1068 1071 dirs.add(f)
1069 1072 for i in strutil.rfindall(f, '/'):
1070 1073 dirs.add(f[:i])
1071 1074 return dirs
1072 1075
1073 1076 def add_dirs(self, files):
1074 1077 add_dirs = [d for d in sorted(self.dirs_of(files))
1075 1078 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1076 1079 if add_dirs:
1077 1080 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1078 1081 return add_dirs
1079 1082
1080 1083 def add_files(self, files):
1081 1084 if files:
1082 1085 self.xargs(files, 'add', quiet=True)
1083 1086 return files
1084 1087
1085 1088 def tidy_dirs(self, names):
1086 1089 deleted = []
1087 1090 for d in sorted(self.dirs_of(names), reverse=True):
1088 1091 wd = self.wjoin(d)
1089 1092 if os.listdir(wd) == '.svn':
1090 1093 self.run0('delete', d)
1091 1094 deleted.append(d)
1092 1095 return deleted
1093 1096
1094 1097 def addchild(self, parent, child):
1095 1098 self.childmap[parent] = child
1096 1099
1097 1100 def revid(self, rev):
1098 1101 return u"svn:%s@%s" % (self.uuid, rev)
1099 1102
1100 1103 def putcommit(self, files, copies, parents, commit, source, revmap):
1101 1104 # Apply changes to working copy
1102 1105 for f, v in files:
1103 1106 try:
1104 1107 data = source.getfile(f, v)
1105 1108 except IOError:
1106 1109 self.delete.append(f)
1107 1110 else:
1108 1111 e = source.getmode(f, v)
1109 1112 self.putfile(f, e, data)
1110 1113 if f in copies:
1111 1114 self.copies.append([copies[f], f])
1112 1115 files = [f[0] for f in files]
1113 1116
1114 1117 for parent in parents:
1115 1118 try:
1116 1119 return self.revid(self.childmap[parent])
1117 1120 except KeyError:
1118 1121 pass
1119 1122 entries = set(self.delete)
1120 1123 files = frozenset(files)
1121 1124 entries.update(self.add_dirs(files.difference(entries)))
1122 1125 if self.copies:
1123 1126 for s, d in self.copies:
1124 1127 self._copyfile(s, d)
1125 1128 self.copies = []
1126 1129 if self.delete:
1127 1130 self.xargs(self.delete, 'delete')
1128 1131 self.delete = []
1129 1132 entries.update(self.add_files(files.difference(entries)))
1130 1133 entries.update(self.tidy_dirs(entries))
1131 1134 if self.delexec:
1132 1135 self.xargs(self.delexec, 'propdel', 'svn:executable')
1133 1136 self.delexec = []
1134 1137 if self.setexec:
1135 1138 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1136 1139 self.setexec = []
1137 1140
1138 1141 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1139 1142 fp = os.fdopen(fd, 'w')
1140 1143 fp.write(commit.desc)
1141 1144 fp.close()
1142 1145 try:
1143 1146 output = self.run0('commit',
1144 1147 username=util.shortuser(commit.author),
1145 1148 file=messagefile,
1146 1149 encoding='utf-8')
1147 1150 try:
1148 1151 rev = self.commit_re.search(output).group(1)
1149 1152 except AttributeError:
1150 1153 if not files:
1151 1154 return parents[0]
1152 1155 self.ui.warn(_('unexpected svn output:\n'))
1153 1156 self.ui.warn(output)
1154 1157 raise util.Abort(_('unable to cope with svn output'))
1155 1158 if commit.rev:
1156 1159 self.run('propset', 'hg:convert-rev', commit.rev,
1157 1160 revprop=True, revision=rev)
1158 1161 if commit.branch and commit.branch != 'default':
1159 1162 self.run('propset', 'hg:convert-branch', commit.branch,
1160 1163 revprop=True, revision=rev)
1161 1164 for parent in parents:
1162 1165 self.addchild(parent, rev)
1163 1166 return self.revid(rev)
1164 1167 finally:
1165 1168 os.unlink(messagefile)
1166 1169
1167 1170 def puttags(self, tags):
1168 1171 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now