##// END OF EJS Templates
convert: hide svn deprecation warnings
Ronny Pfannschmidt -
r8221:f35b9330 default
parent child Browse files
Show More
@@ -1,1203 +1,1208 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24 import urllib
25 25
26 26 from mercurial import strutil, util
27 27 from mercurial.i18n import _
28 28
29 29 # Subversion stuff. Works best with very recent Python SVN bindings
30 30 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
31 31 # these bindings.
32 32
33 33 from cStringIO import StringIO
34 34
35 35 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
36 36 from common import commandline, converter_source, converter_sink, mapfile
37 37
38 38 try:
39 39 from svn.core import SubversionException, Pool
40 40 import svn
41 41 import svn.client
42 42 import svn.core
43 43 import svn.ra
44 44 import svn.delta
45 45 import transport
46 import warnings
47 warnings.filterwarnings('ignore',
48 module='svn.core',
49 category=DeprecationWarning)
50
46 51 except ImportError:
47 52 pass
48 53
49 54 class SvnPathNotFound(Exception):
50 55 pass
51 56
52 57 def geturl(path):
53 58 try:
54 59 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
55 60 except SubversionException:
56 61 pass
57 62 if os.path.isdir(path):
58 63 path = os.path.normpath(os.path.abspath(path))
59 64 if os.name == 'nt':
60 65 path = '/' + util.normpath(path)
61 66 return 'file://%s' % urllib.quote(path)
62 67 return path
63 68
64 69 def optrev(number):
65 70 optrev = svn.core.svn_opt_revision_t()
66 71 optrev.kind = svn.core.svn_opt_revision_number
67 72 optrev.value.number = number
68 73 return optrev
69 74
70 75 class changedpath(object):
71 76 def __init__(self, p):
72 77 self.copyfrom_path = p.copyfrom_path
73 78 self.copyfrom_rev = p.copyfrom_rev
74 79 self.action = p.action
75 80
76 81 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
77 82 strict_node_history=False):
78 83 protocol = -1
79 84 def receiver(orig_paths, revnum, author, date, message, pool):
80 85 if orig_paths is not None:
81 86 for k, v in orig_paths.iteritems():
82 87 orig_paths[k] = changedpath(v)
83 88 pickle.dump((orig_paths, revnum, author, date, message),
84 89 fp, protocol)
85 90
86 91 try:
87 92 # Use an ra of our own so that our parent can consume
88 93 # our results without confusing the server.
89 94 t = transport.SvnRaTransport(url=url)
90 95 svn.ra.get_log(t.ra, paths, start, end, limit,
91 96 discover_changed_paths,
92 97 strict_node_history,
93 98 receiver)
94 99 except SubversionException, (inst, num):
95 100 pickle.dump(num, fp, protocol)
96 101 except IOError:
97 102 # Caller may interrupt the iteration
98 103 pickle.dump(None, fp, protocol)
99 104 else:
100 105 pickle.dump(None, fp, protocol)
101 106 fp.close()
102 107 # With large history, cleanup process goes crazy and suddenly
103 108 # consumes *huge* amount of memory. The output file being closed,
104 109 # there is no need for clean termination.
105 110 os._exit(0)
106 111
107 112 def debugsvnlog(ui, **opts):
108 113 """Fetch SVN log in a subprocess and channel them back to parent to
109 114 avoid memory collection issues.
110 115 """
111 116 util.set_binary(sys.stdin)
112 117 util.set_binary(sys.stdout)
113 118 args = decodeargs(sys.stdin.read())
114 119 get_log_child(sys.stdout, *args)
115 120
116 121 class logstream:
117 122 """Interruptible revision log iterator."""
118 123 def __init__(self, stdout):
119 124 self._stdout = stdout
120 125
121 126 def __iter__(self):
122 127 while True:
123 128 entry = pickle.load(self._stdout)
124 129 try:
125 130 orig_paths, revnum, author, date, message = entry
126 131 except:
127 132 if entry is None:
128 133 break
129 134 raise SubversionException("child raised exception", entry)
130 135 yield entry
131 136
132 137 def close(self):
133 138 if self._stdout:
134 139 self._stdout.close()
135 140 self._stdout = None
136 141
137 142
138 143 # Check to see if the given path is a local Subversion repo. Verify this by
139 144 # looking for several svn-specific files and directories in the given
140 145 # directory.
141 146 def filecheck(path, proto):
142 147 for x in ('locks', 'hooks', 'format', 'db', ):
143 148 if not os.path.exists(os.path.join(path, x)):
144 149 return False
145 150 return True
146 151
147 152 # Check to see if a given path is the root of an svn repo over http. We verify
148 153 # this by requesting a version-controlled URL we know can't exist and looking
149 154 # for the svn-specific "not found" XML.
150 155 def httpcheck(path, proto):
151 156 return ('<m:human-readable errcode="160013">' in
152 157 urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
153 158
154 159 protomap = {'http': httpcheck,
155 160 'https': httpcheck,
156 161 'file': filecheck,
157 162 }
158 163 def issvnurl(url):
159 164 if not '://' in url:
160 165 return False
161 166 proto, path = url.split('://', 1)
162 167 path = urllib.url2pathname(path).replace(os.sep, '/')
163 168 check = protomap.get(proto, lambda p, p2: False)
164 169 while '/' in path:
165 170 if check(path, proto):
166 171 return True
167 172 path = path.rsplit('/', 1)[0]
168 173 return False
169 174
170 175 # SVN conversion code stolen from bzr-svn and tailor
171 176 #
172 177 # Subversion looks like a versioned filesystem, branches structures
173 178 # are defined by conventions and not enforced by the tool. First,
174 179 # we define the potential branches (modules) as "trunk" and "branches"
175 180 # children directories. Revisions are then identified by their
176 181 # module and revision number (and a repository identifier).
177 182 #
178 183 # The revision graph is really a tree (or a forest). By default, a
179 184 # revision parent is the previous revision in the same module. If the
180 185 # module directory is copied/moved from another module then the
181 186 # revision is the module root and its parent the source revision in
182 187 # the parent module. A revision has at most one parent.
183 188 #
184 189 class svn_source(converter_source):
185 190 def __init__(self, ui, url, rev=None):
186 191 super(svn_source, self).__init__(ui, url, rev=rev)
187 192
188 193 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
189 194 (os.path.exists(url) and
190 195 os.path.exists(os.path.join(url, '.svn'))) or
191 196 issvnurl(url)):
192 197 raise NoRepo("%s does not look like a Subversion repo" % url)
193 198
194 199 try:
195 200 SubversionException
196 201 except NameError:
197 202 raise MissingTool(_('Subversion python bindings could not be loaded'))
198 203
199 204 try:
200 205 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
201 206 if version < (1, 4):
202 207 raise MissingTool(_('Subversion python bindings %d.%d found, '
203 208 '1.4 or later required') % version)
204 209 except AttributeError:
205 210 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
206 211 'or later required'))
207 212
208 213 self.encoding = locale.getpreferredencoding()
209 214 self.lastrevs = {}
210 215
211 216 latest = None
212 217 try:
213 218 # Support file://path@rev syntax. Useful e.g. to convert
214 219 # deleted branches.
215 220 at = url.rfind('@')
216 221 if at >= 0:
217 222 latest = int(url[at+1:])
218 223 url = url[:at]
219 224 except ValueError:
220 225 pass
221 226 self.url = geturl(url)
222 227 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
223 228 try:
224 229 self.transport = transport.SvnRaTransport(url=self.url)
225 230 self.ra = self.transport.ra
226 231 self.ctx = self.transport.client
227 232 self.baseurl = svn.ra.get_repos_root(self.ra)
228 233 # Module is either empty or a repository path starting with
229 234 # a slash and not ending with a slash.
230 235 self.module = urllib.unquote(self.url[len(self.baseurl):])
231 236 self.prevmodule = None
232 237 self.rootmodule = self.module
233 238 self.commits = {}
234 239 self.paths = {}
235 240 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
236 241 except SubversionException:
237 242 ui.traceback()
238 243 raise NoRepo("%s does not look like a Subversion repo" % self.url)
239 244
240 245 if rev:
241 246 try:
242 247 latest = int(rev)
243 248 except ValueError:
244 249 raise util.Abort(_('svn: revision %s is not an integer') % rev)
245 250
246 251 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
247 252 try:
248 253 self.startrev = int(self.startrev)
249 254 if self.startrev < 0:
250 255 self.startrev = 0
251 256 except ValueError:
252 257 raise util.Abort(_('svn: start revision %s is not an integer')
253 258 % self.startrev)
254 259
255 260 try:
256 261 self.get_blacklist()
257 262 except IOError:
258 263 pass
259 264
260 265 self.head = self.latest(self.module, latest)
261 266 if not self.head:
262 267 raise util.Abort(_('no revision found in module %s') %
263 268 self.module.encode(self.encoding))
264 269 self.last_changed = self.revnum(self.head)
265 270
266 271 self._changescache = None
267 272
268 273 if os.path.exists(os.path.join(url, '.svn/entries')):
269 274 self.wc = url
270 275 else:
271 276 self.wc = None
272 277 self.convertfp = None
273 278
274 279 def setrevmap(self, revmap):
275 280 lastrevs = {}
276 281 for revid in revmap.iterkeys():
277 282 uuid, module, revnum = self.revsplit(revid)
278 283 lastrevnum = lastrevs.setdefault(module, revnum)
279 284 if revnum > lastrevnum:
280 285 lastrevs[module] = revnum
281 286 self.lastrevs = lastrevs
282 287
283 288 def exists(self, path, optrev):
284 289 try:
285 290 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
286 291 optrev, False, self.ctx)
287 292 return True
288 293 except SubversionException:
289 294 return False
290 295
291 296 def getheads(self):
292 297
293 298 def isdir(path, revnum):
294 299 kind = self._checkpath(path, revnum)
295 300 return kind == svn.core.svn_node_dir
296 301
297 302 def getcfgpath(name, rev):
298 303 cfgpath = self.ui.config('convert', 'svn.' + name)
299 304 if cfgpath is not None and cfgpath.strip() == '':
300 305 return None
301 306 path = (cfgpath or name).strip('/')
302 307 if not self.exists(path, rev):
303 308 if cfgpath:
304 309 raise util.Abort(_('expected %s to be at %r, but not found')
305 310 % (name, path))
306 311 return None
307 312 self.ui.note(_('found %s at %r\n') % (name, path))
308 313 return path
309 314
310 315 rev = optrev(self.last_changed)
311 316 oldmodule = ''
312 317 trunk = getcfgpath('trunk', rev)
313 318 self.tags = getcfgpath('tags', rev)
314 319 branches = getcfgpath('branches', rev)
315 320
316 321 # If the project has a trunk or branches, we will extract heads
317 322 # from them. We keep the project root otherwise.
318 323 if trunk:
319 324 oldmodule = self.module or ''
320 325 self.module += '/' + trunk
321 326 self.head = self.latest(self.module, self.last_changed)
322 327 if not self.head:
323 328 raise util.Abort(_('no revision found in module %s') %
324 329 self.module.encode(self.encoding))
325 330
326 331 # First head in the list is the module's head
327 332 self.heads = [self.head]
328 333 if self.tags is not None:
329 334 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
330 335
331 336 # Check if branches bring a few more heads to the list
332 337 if branches:
333 338 rpath = self.url.strip('/')
334 339 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
335 340 rev, False, self.ctx)
336 341 for branch in branchnames.keys():
337 342 module = '%s/%s/%s' % (oldmodule, branches, branch)
338 343 if not isdir(module, self.last_changed):
339 344 continue
340 345 brevid = self.latest(module, self.last_changed)
341 346 if not brevid:
342 347 self.ui.note(_('ignoring empty branch %s\n') %
343 348 branch.encode(self.encoding))
344 349 continue
345 350 self.ui.note(_('found branch %s at %d\n') %
346 351 (branch, self.revnum(brevid)))
347 352 self.heads.append(brevid)
348 353
349 354 if self.startrev and self.heads:
350 355 if len(self.heads) > 1:
351 356 raise util.Abort(_('svn: start revision is not supported '
352 357 'with more than one branch'))
353 358 revnum = self.revnum(self.heads[0])
354 359 if revnum < self.startrev:
355 360 raise util.Abort(_('svn: no revision found after start revision %d')
356 361 % self.startrev)
357 362
358 363 return self.heads
359 364
360 365 def getfile(self, file, rev):
361 366 data, mode = self._getfile(file, rev)
362 367 self.modecache[(file, rev)] = mode
363 368 return data
364 369
365 370 def getmode(self, file, rev):
366 371 return self.modecache[(file, rev)]
367 372
368 373 def getchanges(self, rev):
369 374 if self._changescache and self._changescache[0] == rev:
370 375 return self._changescache[1]
371 376 self._changescache = None
372 377 self.modecache = {}
373 378 (paths, parents) = self.paths[rev]
374 379 if parents:
375 380 files, copies = self.expandpaths(rev, paths, parents)
376 381 else:
377 382 # Perform a full checkout on roots
378 383 uuid, module, revnum = self.revsplit(rev)
379 384 entries = svn.client.ls(self.baseurl + urllib.quote(module),
380 385 optrev(revnum), True, self.ctx)
381 386 files = [n for n,e in entries.iteritems()
382 387 if e.kind == svn.core.svn_node_file]
383 388 copies = {}
384 389
385 390 files.sort()
386 391 files = zip(files, [rev] * len(files))
387 392
388 393 # caller caches the result, so free it here to release memory
389 394 del self.paths[rev]
390 395 return (files, copies)
391 396
392 397 def getchangedfiles(self, rev, i):
393 398 changes = self.getchanges(rev)
394 399 self._changescache = (rev, changes)
395 400 return [f[0] for f in changes[0]]
396 401
397 402 def getcommit(self, rev):
398 403 if rev not in self.commits:
399 404 uuid, module, revnum = self.revsplit(rev)
400 405 self.module = module
401 406 self.reparent(module)
402 407 # We assume that:
403 408 # - requests for revisions after "stop" come from the
404 409 # revision graph backward traversal. Cache all of them
405 410 # down to stop, they will be used eventually.
406 411 # - requests for revisions before "stop" come to get
407 412 # isolated branches parents. Just fetch what is needed.
408 413 stop = self.lastrevs.get(module, 0)
409 414 if revnum < stop:
410 415 stop = revnum + 1
411 416 self._fetch_revisions(revnum, stop)
412 417 commit = self.commits[rev]
413 418 # caller caches the result, so free it here to release memory
414 419 del self.commits[rev]
415 420 return commit
416 421
417 422 def gettags(self):
418 423 tags = {}
419 424 if self.tags is None:
420 425 return tags
421 426
422 427 # svn tags are just a convention, project branches left in a
423 428 # 'tags' directory. There is no other relationship than
424 429 # ancestry, which is expensive to discover and makes them hard
425 430 # to update incrementally. Worse, past revisions may be
426 431 # referenced by tags far away in the future, requiring a deep
427 432 # history traversal on every calculation. Current code
428 433 # performs a single backward traversal, tracking moves within
429 434 # the tags directory (tag renaming) and recording a new tag
430 435 # everytime a project is copied from outside the tags
431 436 # directory. It also lists deleted tags, this behaviour may
432 437 # change in the future.
433 438 pendings = []
434 439 tagspath = self.tags
435 440 start = svn.ra.get_latest_revnum(self.ra)
436 441 try:
437 442 for entry in self._getlog([self.tags], start, self.startrev):
438 443 origpaths, revnum, author, date, message = entry
439 444 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
440 445 in origpaths.iteritems() if e.copyfrom_path]
441 446 # Apply moves/copies from more specific to general
442 447 copies.sort(reverse=True)
443 448
444 449 srctagspath = tagspath
445 450 if copies and copies[-1][2] == tagspath:
446 451 # Track tags directory moves
447 452 srctagspath = copies.pop()[0]
448 453
449 454 for source, sourcerev, dest in copies:
450 455 if not dest.startswith(tagspath + '/'):
451 456 continue
452 457 for tag in pendings:
453 458 if tag[0].startswith(dest):
454 459 tagpath = source + tag[0][len(dest):]
455 460 tag[:2] = [tagpath, sourcerev]
456 461 break
457 462 else:
458 463 pendings.append([source, sourcerev, dest.split('/')[-1]])
459 464
460 465 # Tell tag renamings from tag creations
461 466 remainings = []
462 467 for source, sourcerev, tagname in pendings:
463 468 if source.startswith(srctagspath):
464 469 remainings.append([source, sourcerev, tagname])
465 470 continue
466 471 # From revision may be fake, get one with changes
467 472 try:
468 473 tagid = self.latest(source, sourcerev)
469 474 if tagid:
470 475 tags[tagname] = tagid
471 476 except SvnPathNotFound:
472 477 # It happens when we are following directories we assumed
473 478 # were copied with their parents but were really created
474 479 # in the tag directory.
475 480 pass
476 481 pendings = remainings
477 482 tagspath = srctagspath
478 483
479 484 except SubversionException:
480 485 self.ui.note(_('no tags found at revision %d\n') % start)
481 486 return tags
482 487
483 488 def converted(self, rev, destrev):
484 489 if not self.wc:
485 490 return
486 491 if self.convertfp is None:
487 492 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
488 493 'a')
489 494 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
490 495 self.convertfp.flush()
491 496
492 497 # -- helper functions --
493 498
494 499 def revid(self, revnum, module=None):
495 500 if not module:
496 501 module = self.module
497 502 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
498 503 revnum)
499 504
500 505 def revnum(self, rev):
501 506 return int(rev.split('@')[-1])
502 507
503 508 def revsplit(self, rev):
504 509 url, revnum = rev.encode(self.encoding).rsplit('@', 1)
505 510 revnum = int(revnum)
506 511 parts = url.split('/', 1)
507 512 uuid = parts.pop(0)[4:]
508 513 mod = ''
509 514 if parts:
510 515 mod = '/' + parts[0]
511 516 return uuid, mod, revnum
512 517
513 518 def latest(self, path, stop=0):
514 519 """Find the latest revid affecting path, up to stop. It may return
515 520 a revision in a different module, since a branch may be moved without
516 521 a change being reported. Return None if computed module does not
517 522 belong to rootmodule subtree.
518 523 """
519 524 if not path.startswith(self.rootmodule):
520 525 # Requests on foreign branches may be forbidden at server level
521 526 self.ui.debug(_('ignoring foreign branch %r\n') % path)
522 527 return None
523 528
524 529 if not stop:
525 530 stop = svn.ra.get_latest_revnum(self.ra)
526 531 try:
527 532 prevmodule = self.reparent('')
528 533 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
529 534 self.reparent(prevmodule)
530 535 except SubversionException:
531 536 dirent = None
532 537 if not dirent:
533 538 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
534 539
535 540 # stat() gives us the previous revision on this line of development, but
536 541 # it might be in *another module*. Fetch the log and detect renames down
537 542 # to the latest revision.
538 543 stream = self._getlog([path], stop, dirent.created_rev)
539 544 try:
540 545 for entry in stream:
541 546 paths, revnum, author, date, message = entry
542 547 if revnum <= dirent.created_rev:
543 548 break
544 549
545 550 for p in paths:
546 551 if not path.startswith(p) or not paths[p].copyfrom_path:
547 552 continue
548 553 newpath = paths[p].copyfrom_path + path[len(p):]
549 554 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
550 555 (path, newpath, revnum))
551 556 path = newpath
552 557 break
553 558 finally:
554 559 stream.close()
555 560
556 561 if not path.startswith(self.rootmodule):
557 562 self.ui.debug(_('ignoring foreign branch %r\n') % path)
558 563 return None
559 564 return self.revid(dirent.created_rev, path)
560 565
561 566 def get_blacklist(self):
562 567 """Avoid certain revision numbers.
563 568 It is not uncommon for two nearby revisions to cancel each other
564 569 out, e.g. 'I copied trunk into a subdirectory of itself instead
565 570 of making a branch'. The converted repository is significantly
566 571 smaller if we ignore such revisions."""
567 572 self.blacklist = set()
568 573 blacklist = self.blacklist
569 574 for line in file("blacklist.txt", "r"):
570 575 if not line.startswith("#"):
571 576 try:
572 577 svn_rev = int(line.strip())
573 578 blacklist.add(svn_rev)
574 579 except ValueError:
575 580 pass # not an integer or a comment
576 581
577 582 def is_blacklisted(self, svn_rev):
578 583 return svn_rev in self.blacklist
579 584
580 585 def reparent(self, module):
581 586 """Reparent the svn transport and return the previous parent."""
582 587 if self.prevmodule == module:
583 588 return module
584 589 svnurl = self.baseurl + urllib.quote(module)
585 590 prevmodule = self.prevmodule
586 591 if prevmodule is None:
587 592 prevmodule = ''
588 593 self.ui.debug(_("reparent to %s\n") % svnurl)
589 594 svn.ra.reparent(self.ra, svnurl)
590 595 self.prevmodule = module
591 596 return prevmodule
592 597
593 598 def expandpaths(self, rev, paths, parents):
594 599 entries = []
595 600 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
596 601 copies = {}
597 602
598 603 new_module, revnum = self.revsplit(rev)[1:]
599 604 if new_module != self.module:
600 605 self.module = new_module
601 606 self.reparent(self.module)
602 607
603 608 for path, ent in paths:
604 609 entrypath = self.getrelpath(path)
605 610 entry = entrypath.decode(self.encoding)
606 611
607 612 kind = self._checkpath(entrypath, revnum)
608 613 if kind == svn.core.svn_node_file:
609 614 entries.append(self.recode(entry))
610 615 if not ent.copyfrom_path or not parents:
611 616 continue
612 617 # Copy sources not in parent revisions cannot be represented,
613 618 # ignore their origin for now
614 619 pmodule, prevnum = self.revsplit(parents[0])[1:]
615 620 if ent.copyfrom_rev < prevnum:
616 621 continue
617 622 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
618 623 if not copyfrom_path:
619 624 continue
620 625 self.ui.debug(_("copied to %s from %s@%s\n") %
621 626 (entrypath, copyfrom_path, ent.copyfrom_rev))
622 627 copies[self.recode(entry)] = self.recode(copyfrom_path)
623 628 elif kind == 0: # gone, but had better be a deleted *file*
624 629 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
625 630
626 631 # if a branch is created but entries are removed in the same
627 632 # changeset, get the right fromrev
628 633 # parents cannot be empty here, you cannot remove things from
629 634 # a root revision.
630 635 uuid, old_module, fromrev = self.revsplit(parents[0])
631 636
632 637 basepath = old_module + "/" + self.getrelpath(path)
633 638 entrypath = basepath
634 639
635 640 def lookup_parts(p):
636 641 rc = None
637 642 parts = p.split("/")
638 643 for i in range(len(parts)):
639 644 part = "/".join(parts[:i])
640 645 info = part, copyfrom.get(part, None)
641 646 if info[1] is not None:
642 647 self.ui.debug(_("found parent directory %s\n") % info[1])
643 648 rc = info
644 649 return rc
645 650
646 651 self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
647 652
648 653 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
649 654
650 655 # need to remove fragment from lookup_parts and replace with copyfrom_path
651 656 if frompath is not None:
652 657 self.ui.debug(_("munge-o-matic\n"))
653 658 self.ui.debug(entrypath + '\n')
654 659 self.ui.debug(entrypath[len(frompath):] + '\n')
655 660 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
656 661 fromrev = froment.copyfrom_rev
657 662 self.ui.debug(_("info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
658 663
659 664 # We can avoid the reparent calls if the module has not changed
660 665 # but it probably does not worth the pain.
661 666 prevmodule = self.reparent('')
662 667 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
663 668 self.reparent(prevmodule)
664 669
665 670 if fromkind == svn.core.svn_node_file: # a deleted file
666 671 entries.append(self.recode(entry))
667 672 elif fromkind == svn.core.svn_node_dir:
668 673 # print "Deleted/moved non-file:", revnum, path, ent
669 674 # children = self._find_children(path, revnum - 1)
670 675 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
671 676 # Sometimes this is tricky. For example: in
672 677 # The Subversion Repository revision 6940 a dir
673 678 # was copied and one of its files was deleted
674 679 # from the new location in the same commit. This
675 680 # code can't deal with that yet.
676 681 if ent.action == 'C':
677 682 children = self._find_children(path, fromrev)
678 683 else:
679 684 oroot = entrypath.strip('/')
680 685 nroot = path.strip('/')
681 686 children = self._find_children(oroot, fromrev)
682 687 children = [s.replace(oroot,nroot) for s in children]
683 688 # Mark all [files, not directories] as deleted.
684 689 for child in children:
685 690 # Can we move a child directory and its
686 691 # parent in the same commit? (probably can). Could
687 692 # cause problems if instead of revnum -1,
688 693 # we have to look in (copyfrom_path, revnum - 1)
689 694 entrypath = self.getrelpath("/" + child, module=old_module)
690 695 if entrypath:
691 696 entry = self.recode(entrypath.decode(self.encoding))
692 697 if entry in copies:
693 698 # deleted file within a copy
694 699 del copies[entry]
695 700 else:
696 701 entries.append(entry)
697 702 else:
698 703 self.ui.debug(_('unknown path in revision %d: %s\n') % \
699 704 (revnum, path))
700 705 elif kind == svn.core.svn_node_dir:
701 706 # Should probably synthesize normal file entries
702 707 # and handle as above to clean up copy/rename handling.
703 708
704 709 # If the directory just had a prop change,
705 710 # then we shouldn't need to look for its children.
706 711 if ent.action == 'M':
707 712 continue
708 713
709 714 # Also this could create duplicate entries. Not sure
710 715 # whether this will matter. Maybe should make entries a set.
711 716 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
712 717 # This will fail if a directory was copied
713 718 # from another branch and then some of its files
714 719 # were deleted in the same transaction.
715 720 children = sorted(self._find_children(path, revnum))
716 721 for child in children:
717 722 # Can we move a child directory and its
718 723 # parent in the same commit? (probably can). Could
719 724 # cause problems if instead of revnum -1,
720 725 # we have to look in (copyfrom_path, revnum - 1)
721 726 entrypath = self.getrelpath("/" + child)
722 727 # print child, self.module, entrypath
723 728 if entrypath:
724 729 # Need to filter out directories here...
725 730 kind = self._checkpath(entrypath, revnum)
726 731 if kind != svn.core.svn_node_dir:
727 732 entries.append(self.recode(entrypath))
728 733
729 734 # Copies here (must copy all from source)
730 735 # Probably not a real problem for us if
731 736 # source does not exist
732 737 if not ent.copyfrom_path or not parents:
733 738 continue
734 739 # Copy sources not in parent revisions cannot be represented,
735 740 # ignore their origin for now
736 741 pmodule, prevnum = self.revsplit(parents[0])[1:]
737 742 if ent.copyfrom_rev < prevnum:
738 743 continue
739 744 copyfrompath = ent.copyfrom_path.decode(self.encoding)
740 745 copyfrompath = self.getrelpath(copyfrompath, pmodule)
741 746 if not copyfrompath:
742 747 continue
743 748 copyfrom[path] = ent
744 749 self.ui.debug(_("mark %s came from %s:%d\n")
745 750 % (path, copyfrompath, ent.copyfrom_rev))
746 751 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
747 752 children.sort()
748 753 for child in children:
749 754 entrypath = self.getrelpath("/" + child, pmodule)
750 755 if not entrypath:
751 756 continue
752 757 entry = entrypath.decode(self.encoding)
753 758 copytopath = path + entry[len(copyfrompath):]
754 759 copytopath = self.getrelpath(copytopath)
755 760 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
756 761
757 762 return (list(set(entries)), copies)
758 763
759 764 def _fetch_revisions(self, from_revnum, to_revnum):
760 765 if from_revnum < to_revnum:
761 766 from_revnum, to_revnum = to_revnum, from_revnum
762 767
763 768 self.child_cset = None
764 769
765 770 def parselogentry(orig_paths, revnum, author, date, message):
766 771 """Return the parsed commit object or None, and True if
767 772 the revision is a branch root.
768 773 """
769 774 self.ui.debug(_("parsing revision %d (%d changes)\n") %
770 775 (revnum, len(orig_paths)))
771 776
772 777 branched = False
773 778 rev = self.revid(revnum)
774 779 # branch log might return entries for a parent we already have
775 780
776 781 if rev in self.commits or revnum < to_revnum:
777 782 return None, branched
778 783
779 784 parents = []
780 785 # check whether this revision is the start of a branch or part
781 786 # of a branch renaming
782 787 orig_paths = sorted(orig_paths.iteritems())
783 788 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
784 789 if root_paths:
785 790 path, ent = root_paths[-1]
786 791 if ent.copyfrom_path:
787 792 branched = True
788 793 newpath = ent.copyfrom_path + self.module[len(path):]
789 794 # ent.copyfrom_rev may not be the actual last revision
790 795 previd = self.latest(newpath, ent.copyfrom_rev)
791 796 if previd is not None:
792 797 prevmodule, prevnum = self.revsplit(previd)[1:]
793 798 if prevnum >= self.startrev:
794 799 parents = [previd]
795 800 self.ui.note(_('found parent of branch %s at %d: %s\n') %
796 801 (self.module, prevnum, prevmodule))
797 802 else:
798 803 self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
799 804
800 805 paths = []
801 806 # filter out unrelated paths
802 807 for path, ent in orig_paths:
803 808 if self.getrelpath(path) is None:
804 809 continue
805 810 paths.append((path, ent))
806 811
807 812 # Example SVN datetime. Includes microseconds.
808 813 # ISO-8601 conformant
809 814 # '2007-01-04T17:35:00.902377Z'
810 815 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
811 816
812 817 log = message and self.recode(message) or ''
813 818 author = author and self.recode(author) or ''
814 819 try:
815 820 branch = self.module.split("/")[-1]
816 821 if branch == 'trunk':
817 822 branch = ''
818 823 except IndexError:
819 824 branch = None
820 825
821 826 cset = commit(author=author,
822 827 date=util.datestr(date),
823 828 desc=log,
824 829 parents=parents,
825 830 branch=branch,
826 831 rev=rev.encode('utf-8'))
827 832
828 833 self.commits[rev] = cset
829 834 # The parents list is *shared* among self.paths and the
830 835 # commit object. Both will be updated below.
831 836 self.paths[rev] = (paths, cset.parents)
832 837 if self.child_cset and not self.child_cset.parents:
833 838 self.child_cset.parents[:] = [rev]
834 839 self.child_cset = cset
835 840 return cset, branched
836 841
837 842 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
838 843 (self.module, from_revnum, to_revnum))
839 844
840 845 try:
841 846 firstcset = None
842 847 lastonbranch = False
843 848 stream = self._getlog([self.module], from_revnum, to_revnum)
844 849 try:
845 850 for entry in stream:
846 851 paths, revnum, author, date, message = entry
847 852 if revnum < self.startrev:
848 853 lastonbranch = True
849 854 break
850 855 if self.is_blacklisted(revnum):
851 856 self.ui.note(_('skipping blacklisted revision %d\n')
852 857 % revnum)
853 858 continue
854 859 if not paths:
855 860 self.ui.debug(_('revision %d has no entries\n') % revnum)
856 861 continue
857 862 cset, lastonbranch = parselogentry(paths, revnum, author,
858 863 date, message)
859 864 if cset:
860 865 firstcset = cset
861 866 if lastonbranch:
862 867 break
863 868 finally:
864 869 stream.close()
865 870
866 871 if not lastonbranch and firstcset and not firstcset.parents:
867 872 # The first revision of the sequence (the last fetched one)
868 873 # has invalid parents if not a branch root. Find the parent
869 874 # revision now, if any.
870 875 try:
871 876 firstrevnum = self.revnum(firstcset.rev)
872 877 if firstrevnum > 1:
873 878 latest = self.latest(self.module, firstrevnum - 1)
874 879 if latest:
875 880 firstcset.parents.append(latest)
876 881 except SvnPathNotFound:
877 882 pass
878 883 except SubversionException, (inst, num):
879 884 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
880 885 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
881 886 raise
882 887
883 888 def _getfile(self, file, rev):
884 889 # TODO: ra.get_file transmits the whole file instead of diffs.
885 890 mode = ''
886 891 try:
887 892 new_module, revnum = self.revsplit(rev)[1:]
888 893 if self.module != new_module:
889 894 self.module = new_module
890 895 self.reparent(self.module)
891 896 io = StringIO()
892 897 info = svn.ra.get_file(self.ra, file, revnum, io)
893 898 data = io.getvalue()
894 899 # ra.get_files() seems to keep a reference on the input buffer
895 900 # preventing collection. Release it explicitely.
896 901 io.close()
897 902 if isinstance(info, list):
898 903 info = info[-1]
899 904 mode = ("svn:executable" in info) and 'x' or ''
900 905 mode = ("svn:special" in info) and 'l' or mode
901 906 except SubversionException, e:
902 907 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
903 908 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
904 909 if e.apr_err in notfound: # File not found
905 910 raise IOError()
906 911 raise
907 912 if mode == 'l':
908 913 link_prefix = "link "
909 914 if data.startswith(link_prefix):
910 915 data = data[len(link_prefix):]
911 916 return data, mode
912 917
913 918 def _find_children(self, path, revnum):
914 919 path = path.strip('/')
915 920 pool = Pool()
916 921 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
917 922 return ['%s/%s' % (path, x) for x in
918 923 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
919 924
920 925 def getrelpath(self, path, module=None):
921 926 if module is None:
922 927 module = self.module
923 928 # Given the repository url of this wc, say
924 929 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
925 930 # extract the "entry" portion (a relative path) from what
926 931 # svn log --xml says, ie
927 932 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
928 933 # that is to say "tests/PloneTestCase.py"
929 934 if path.startswith(module):
930 935 relative = path.rstrip('/')[len(module):]
931 936 if relative.startswith('/'):
932 937 return relative[1:]
933 938 elif relative == '':
934 939 return relative
935 940
936 941 # The path is outside our tracked tree...
937 942 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
938 943 return None
939 944
940 945 def _checkpath(self, path, revnum):
941 946 # ra.check_path does not like leading slashes very much, it leads
942 947 # to PROPFIND subversion errors
943 948 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
944 949
945 950 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
946 951 strict_node_history=False):
947 952 # Normalize path names, svn >= 1.5 only wants paths relative to
948 953 # supplied URL
949 954 relpaths = []
950 955 for p in paths:
951 956 if not p.startswith('/'):
952 957 p = self.module + '/' + p
953 958 relpaths.append(p.strip('/'))
954 959 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
955 960 strict_node_history]
956 961 arg = encodeargs(args)
957 962 hgexe = util.hgexecutable()
958 963 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
959 964 stdin, stdout = util.popen2(cmd, 'b')
960 965 stdin.write(arg)
961 966 stdin.close()
962 967 return logstream(stdout)
963 968
964 969 pre_revprop_change = '''#!/bin/sh
965 970
966 971 REPOS="$1"
967 972 REV="$2"
968 973 USER="$3"
969 974 PROPNAME="$4"
970 975 ACTION="$5"
971 976
972 977 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
973 978 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
974 979 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
975 980
976 981 echo "Changing prohibited revision property" >&2
977 982 exit 1
978 983 '''
979 984
980 985 class svn_sink(converter_sink, commandline):
981 986 commit_re = re.compile(r'Committed revision (\d+).', re.M)
982 987
983 988 def prerun(self):
984 989 if self.wc:
985 990 os.chdir(self.wc)
986 991
987 992 def postrun(self):
988 993 if self.wc:
989 994 os.chdir(self.cwd)
990 995
991 996 def join(self, name):
992 997 return os.path.join(self.wc, '.svn', name)
993 998
994 999 def revmapfile(self):
995 1000 return self.join('hg-shamap')
996 1001
997 1002 def authorfile(self):
998 1003 return self.join('hg-authormap')
999 1004
1000 1005 def __init__(self, ui, path):
1001 1006 converter_sink.__init__(self, ui, path)
1002 1007 commandline.__init__(self, ui, 'svn')
1003 1008 self.delete = []
1004 1009 self.setexec = []
1005 1010 self.delexec = []
1006 1011 self.copies = []
1007 1012 self.wc = None
1008 1013 self.cwd = os.getcwd()
1009 1014
1010 1015 path = os.path.realpath(path)
1011 1016
1012 1017 created = False
1013 1018 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
1014 1019 self.wc = path
1015 1020 self.run0('update')
1016 1021 else:
1017 1022 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
1018 1023
1019 1024 if os.path.isdir(os.path.dirname(path)):
1020 1025 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1021 1026 ui.status(_('initializing svn repo %r\n') %
1022 1027 os.path.basename(path))
1023 1028 commandline(ui, 'svnadmin').run0('create', path)
1024 1029 created = path
1025 1030 path = util.normpath(path)
1026 1031 if not path.startswith('/'):
1027 1032 path = '/' + path
1028 1033 path = 'file://' + path
1029 1034
1030 1035 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
1031 1036 self.run0('checkout', path, wcpath)
1032 1037
1033 1038 self.wc = wcpath
1034 1039 self.opener = util.opener(self.wc)
1035 1040 self.wopener = util.opener(self.wc)
1036 1041 self.childmap = mapfile(ui, self.join('hg-childmap'))
1037 1042 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1038 1043
1039 1044 if created:
1040 1045 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1041 1046 fp = open(hook, 'w')
1042 1047 fp.write(pre_revprop_change)
1043 1048 fp.close()
1044 1049 util.set_flags(hook, False, True)
1045 1050
1046 1051 xport = transport.SvnRaTransport(url=geturl(path))
1047 1052 self.uuid = svn.ra.get_uuid(xport.ra)
1048 1053
1049 1054 def wjoin(self, *names):
1050 1055 return os.path.join(self.wc, *names)
1051 1056
1052 1057 def putfile(self, filename, flags, data):
1053 1058 if 'l' in flags:
1054 1059 self.wopener.symlink(data, filename)
1055 1060 else:
1056 1061 try:
1057 1062 if os.path.islink(self.wjoin(filename)):
1058 1063 os.unlink(filename)
1059 1064 except OSError:
1060 1065 pass
1061 1066 self.wopener(filename, 'w').write(data)
1062 1067
1063 1068 if self.is_exec:
1064 1069 was_exec = self.is_exec(self.wjoin(filename))
1065 1070 else:
1066 1071 # On filesystems not supporting execute-bit, there is no way
1067 1072 # to know if it is set but asking subversion. Setting it
1068 1073 # systematically is just as expensive and much simpler.
1069 1074 was_exec = 'x' not in flags
1070 1075
1071 1076 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1072 1077 if was_exec:
1073 1078 if 'x' not in flags:
1074 1079 self.delexec.append(filename)
1075 1080 else:
1076 1081 if 'x' in flags:
1077 1082 self.setexec.append(filename)
1078 1083
1079 1084 def _copyfile(self, source, dest):
1080 1085 # SVN's copy command pukes if the destination file exists, but
1081 1086 # our copyfile method expects to record a copy that has
1082 1087 # already occurred. Cross the semantic gap.
1083 1088 wdest = self.wjoin(dest)
1084 1089 exists = os.path.exists(wdest)
1085 1090 if exists:
1086 1091 fd, tempname = tempfile.mkstemp(
1087 1092 prefix='hg-copy-', dir=os.path.dirname(wdest))
1088 1093 os.close(fd)
1089 1094 os.unlink(tempname)
1090 1095 os.rename(wdest, tempname)
1091 1096 try:
1092 1097 self.run0('copy', source, dest)
1093 1098 finally:
1094 1099 if exists:
1095 1100 try:
1096 1101 os.unlink(wdest)
1097 1102 except OSError:
1098 1103 pass
1099 1104 os.rename(tempname, wdest)
1100 1105
1101 1106 def dirs_of(self, files):
1102 1107 dirs = set()
1103 1108 for f in files:
1104 1109 if os.path.isdir(self.wjoin(f)):
1105 1110 dirs.add(f)
1106 1111 for i in strutil.rfindall(f, '/'):
1107 1112 dirs.add(f[:i])
1108 1113 return dirs
1109 1114
1110 1115 def add_dirs(self, files):
1111 1116 add_dirs = [d for d in sorted(self.dirs_of(files))
1112 1117 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1113 1118 if add_dirs:
1114 1119 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1115 1120 return add_dirs
1116 1121
1117 1122 def add_files(self, files):
1118 1123 if files:
1119 1124 self.xargs(files, 'add', quiet=True)
1120 1125 return files
1121 1126
1122 1127 def tidy_dirs(self, names):
1123 1128 deleted = []
1124 1129 for d in sorted(self.dirs_of(names), reverse=True):
1125 1130 wd = self.wjoin(d)
1126 1131 if os.listdir(wd) == '.svn':
1127 1132 self.run0('delete', d)
1128 1133 deleted.append(d)
1129 1134 return deleted
1130 1135
1131 1136 def addchild(self, parent, child):
1132 1137 self.childmap[parent] = child
1133 1138
1134 1139 def revid(self, rev):
1135 1140 return u"svn:%s@%s" % (self.uuid, rev)
1136 1141
1137 1142 def putcommit(self, files, copies, parents, commit, source):
1138 1143 # Apply changes to working copy
1139 1144 for f, v in files:
1140 1145 try:
1141 1146 data = source.getfile(f, v)
1142 1147 except IOError:
1143 1148 self.delete.append(f)
1144 1149 else:
1145 1150 e = source.getmode(f, v)
1146 1151 self.putfile(f, e, data)
1147 1152 if f in copies:
1148 1153 self.copies.append([copies[f], f])
1149 1154 files = [f[0] for f in files]
1150 1155
1151 1156 for parent in parents:
1152 1157 try:
1153 1158 return self.revid(self.childmap[parent])
1154 1159 except KeyError:
1155 1160 pass
1156 1161 entries = set(self.delete)
1157 1162 files = frozenset(files)
1158 1163 entries.update(self.add_dirs(files.difference(entries)))
1159 1164 if self.copies:
1160 1165 for s, d in self.copies:
1161 1166 self._copyfile(s, d)
1162 1167 self.copies = []
1163 1168 if self.delete:
1164 1169 self.xargs(self.delete, 'delete')
1165 1170 self.delete = []
1166 1171 entries.update(self.add_files(files.difference(entries)))
1167 1172 entries.update(self.tidy_dirs(entries))
1168 1173 if self.delexec:
1169 1174 self.xargs(self.delexec, 'propdel', 'svn:executable')
1170 1175 self.delexec = []
1171 1176 if self.setexec:
1172 1177 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1173 1178 self.setexec = []
1174 1179
1175 1180 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1176 1181 fp = os.fdopen(fd, 'w')
1177 1182 fp.write(commit.desc)
1178 1183 fp.close()
1179 1184 try:
1180 1185 output = self.run0('commit',
1181 1186 username=util.shortuser(commit.author),
1182 1187 file=messagefile,
1183 1188 encoding='utf-8')
1184 1189 try:
1185 1190 rev = self.commit_re.search(output).group(1)
1186 1191 except AttributeError:
1187 1192 self.ui.warn(_('unexpected svn output:\n'))
1188 1193 self.ui.warn(output)
1189 1194 raise util.Abort(_('unable to cope with svn output'))
1190 1195 if commit.rev:
1191 1196 self.run('propset', 'hg:convert-rev', commit.rev,
1192 1197 revprop=True, revision=rev)
1193 1198 if commit.branch and commit.branch != 'default':
1194 1199 self.run('propset', 'hg:convert-branch', commit.branch,
1195 1200 revprop=True, revision=rev)
1196 1201 for parent in parents:
1197 1202 self.addchild(parent, rev)
1198 1203 return self.revid(rev)
1199 1204 finally:
1200 1205 os.unlink(messagefile)
1201 1206
1202 1207 def puttags(self, tags):
1203 1208 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now