##// END OF EJS Templates
convert/svn: remove confusing unicode variable
Patrick Mezard -
r8885:2c184bd2 default
parent child Browse files
Show More
@@ -1,1167 +1,1166
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import locale
6 6 import os
7 7 import re
8 8 import sys
9 9 import cPickle as pickle
10 10 import tempfile
11 11 import urllib
12 12
13 13 from mercurial import strutil, util
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 return 'file://%s' % urllib.quote(path)
54 54 return path
55 55
56 56 def optrev(number):
57 57 optrev = svn.core.svn_opt_revision_t()
58 58 optrev.kind = svn.core.svn_opt_revision_number
59 59 optrev.value.number = number
60 60 return optrev
61 61
62 62 class changedpath(object):
63 63 def __init__(self, p):
64 64 self.copyfrom_path = p.copyfrom_path
65 65 self.copyfrom_rev = p.copyfrom_rev
66 66 self.action = p.action
67 67
68 68 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
69 69 strict_node_history=False):
70 70 protocol = -1
71 71 def receiver(orig_paths, revnum, author, date, message, pool):
72 72 if orig_paths is not None:
73 73 for k, v in orig_paths.iteritems():
74 74 orig_paths[k] = changedpath(v)
75 75 pickle.dump((orig_paths, revnum, author, date, message),
76 76 fp, protocol)
77 77
78 78 try:
79 79 # Use an ra of our own so that our parent can consume
80 80 # our results without confusing the server.
81 81 t = transport.SvnRaTransport(url=url)
82 82 svn.ra.get_log(t.ra, paths, start, end, limit,
83 83 discover_changed_paths,
84 84 strict_node_history,
85 85 receiver)
86 86 except SubversionException, (inst, num):
87 87 pickle.dump(num, fp, protocol)
88 88 except IOError:
89 89 # Caller may interrupt the iteration
90 90 pickle.dump(None, fp, protocol)
91 91 else:
92 92 pickle.dump(None, fp, protocol)
93 93 fp.close()
94 94 # With large history, cleanup process goes crazy and suddenly
95 95 # consumes *huge* amount of memory. The output file being closed,
96 96 # there is no need for clean termination.
97 97 os._exit(0)
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream(object):
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129
130 130 # Check to see if the given path is a local Subversion repo. Verify this by
131 131 # looking for several svn-specific files and directories in the given
132 132 # directory.
133 133 def filecheck(path, proto):
134 134 for x in ('locks', 'hooks', 'format', 'db', ):
135 135 if not os.path.exists(os.path.join(path, x)):
136 136 return False
137 137 return True
138 138
139 139 # Check to see if a given path is the root of an svn repo over http. We verify
140 140 # this by requesting a version-controlled URL we know can't exist and looking
141 141 # for the svn-specific "not found" XML.
142 142 def httpcheck(path, proto):
143 143 return ('<m:human-readable errcode="160013">' in
144 144 urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
145 145
146 146 protomap = {'http': httpcheck,
147 147 'https': httpcheck,
148 148 'file': filecheck,
149 149 }
150 150 def issvnurl(url):
151 151 try:
152 152 proto, path = url.split('://', 1)
153 153 path = urllib.url2pathname(path)
154 154 except ValueError:
155 155 proto = 'file'
156 156 path = os.path.abspath(url)
157 157 path = path.replace(os.sep, '/')
158 158 check = protomap.get(proto, lambda p, p2: False)
159 159 while '/' in path:
160 160 if check(path, proto):
161 161 return True
162 162 path = path.rsplit('/', 1)[0]
163 163 return False
164 164
165 165 # SVN conversion code stolen from bzr-svn and tailor
166 166 #
167 167 # Subversion looks like a versioned filesystem, branches structures
168 168 # are defined by conventions and not enforced by the tool. First,
169 169 # we define the potential branches (modules) as "trunk" and "branches"
170 170 # children directories. Revisions are then identified by their
171 171 # module and revision number (and a repository identifier).
172 172 #
173 173 # The revision graph is really a tree (or a forest). By default, a
174 174 # revision parent is the previous revision in the same module. If the
175 175 # module directory is copied/moved from another module then the
176 176 # revision is the module root and its parent the source revision in
177 177 # the parent module. A revision has at most one parent.
178 178 #
179 179 class svn_source(converter_source):
180 180 def __init__(self, ui, url, rev=None):
181 181 super(svn_source, self).__init__(ui, url, rev=rev)
182 182
183 183 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
184 184 (os.path.exists(url) and
185 185 os.path.exists(os.path.join(url, '.svn'))) or
186 186 issvnurl(url)):
187 187 raise NoRepo("%s does not look like a Subversion repo" % url)
188 188
189 189 try:
190 190 SubversionException
191 191 except NameError:
192 192 raise MissingTool(_('Subversion python bindings could not be loaded'))
193 193
194 194 try:
195 195 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
196 196 if version < (1, 4):
197 197 raise MissingTool(_('Subversion python bindings %d.%d found, '
198 198 '1.4 or later required') % version)
199 199 except AttributeError:
200 200 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
201 201 'or later required'))
202 202
203 203 self.lastrevs = {}
204 204
205 205 latest = None
206 206 try:
207 207 # Support file://path@rev syntax. Useful e.g. to convert
208 208 # deleted branches.
209 209 at = url.rfind('@')
210 210 if at >= 0:
211 211 latest = int(url[at+1:])
212 212 url = url[:at]
213 213 except ValueError:
214 214 pass
215 215 self.url = geturl(url)
216 216 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
217 217 try:
218 218 self.transport = transport.SvnRaTransport(url=self.url)
219 219 self.ra = self.transport.ra
220 220 self.ctx = self.transport.client
221 221 self.baseurl = svn.ra.get_repos_root(self.ra)
222 222 # Module is either empty or a repository path starting with
223 223 # a slash and not ending with a slash.
224 224 self.module = urllib.unquote(self.url[len(self.baseurl):])
225 225 self.prevmodule = None
226 226 self.rootmodule = self.module
227 227 self.commits = {}
228 228 self.paths = {}
229 229 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
230 230 except SubversionException:
231 231 ui.traceback()
232 232 raise NoRepo("%s does not look like a Subversion repo" % self.url)
233 233
234 234 if rev:
235 235 try:
236 236 latest = int(rev)
237 237 except ValueError:
238 238 raise util.Abort(_('svn: revision %s is not an integer') % rev)
239 239
240 240 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
241 241 try:
242 242 self.startrev = int(self.startrev)
243 243 if self.startrev < 0:
244 244 self.startrev = 0
245 245 except ValueError:
246 246 raise util.Abort(_('svn: start revision %s is not an integer')
247 247 % self.startrev)
248 248
249 249 try:
250 250 self.get_blacklist()
251 251 except IOError:
252 252 pass
253 253
254 254 self.head = self.latest(self.module, latest)
255 255 if not self.head:
256 256 raise util.Abort(_('no revision found in module %s') %
257 257 self.module.encode(self.encoding))
258 258 self.last_changed = self.revnum(self.head)
259 259
260 260 self._changescache = None
261 261
262 262 if os.path.exists(os.path.join(url, '.svn/entries')):
263 263 self.wc = url
264 264 else:
265 265 self.wc = None
266 266 self.convertfp = None
267 267
268 268 def setrevmap(self, revmap):
269 269 lastrevs = {}
270 270 for revid in revmap.iterkeys():
271 271 uuid, module, revnum = self.revsplit(revid)
272 272 lastrevnum = lastrevs.setdefault(module, revnum)
273 273 if revnum > lastrevnum:
274 274 lastrevs[module] = revnum
275 275 self.lastrevs = lastrevs
276 276
277 277 def exists(self, path, optrev):
278 278 try:
279 279 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
280 280 optrev, False, self.ctx)
281 281 return True
282 282 except SubversionException:
283 283 return False
284 284
285 285 def getheads(self):
286 286
287 287 def isdir(path, revnum):
288 288 kind = self._checkpath(path, revnum)
289 289 return kind == svn.core.svn_node_dir
290 290
291 291 def getcfgpath(name, rev):
292 292 cfgpath = self.ui.config('convert', 'svn.' + name)
293 293 if cfgpath is not None and cfgpath.strip() == '':
294 294 return None
295 295 path = (cfgpath or name).strip('/')
296 296 if not self.exists(path, rev):
297 297 if cfgpath:
298 298 raise util.Abort(_('expected %s to be at %r, but not found')
299 299 % (name, path))
300 300 return None
301 301 self.ui.note(_('found %s at %r\n') % (name, path))
302 302 return path
303 303
304 304 rev = optrev(self.last_changed)
305 305 oldmodule = ''
306 306 trunk = getcfgpath('trunk', rev)
307 307 self.tags = getcfgpath('tags', rev)
308 308 branches = getcfgpath('branches', rev)
309 309
310 310 # If the project has a trunk or branches, we will extract heads
311 311 # from them. We keep the project root otherwise.
312 312 if trunk:
313 313 oldmodule = self.module or ''
314 314 self.module += '/' + trunk
315 315 self.head = self.latest(self.module, self.last_changed)
316 316 if not self.head:
317 317 raise util.Abort(_('no revision found in module %s') %
318 318 self.module.encode(self.encoding))
319 319
320 320 # First head in the list is the module's head
321 321 self.heads = [self.head]
322 322 if self.tags is not None:
323 323 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
324 324
325 325 # Check if branches bring a few more heads to the list
326 326 if branches:
327 327 rpath = self.url.strip('/')
328 328 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
329 329 rev, False, self.ctx)
330 330 for branch in branchnames.keys():
331 331 module = '%s/%s/%s' % (oldmodule, branches, branch)
332 332 if not isdir(module, self.last_changed):
333 333 continue
334 334 brevid = self.latest(module, self.last_changed)
335 335 if not brevid:
336 336 self.ui.note(_('ignoring empty branch %s\n') %
337 337 branch.encode(self.encoding))
338 338 continue
339 339 self.ui.note(_('found branch %s at %d\n') %
340 340 (branch, self.revnum(brevid)))
341 341 self.heads.append(brevid)
342 342
343 343 if self.startrev and self.heads:
344 344 if len(self.heads) > 1:
345 345 raise util.Abort(_('svn: start revision is not supported '
346 346 'with more than one branch'))
347 347 revnum = self.revnum(self.heads[0])
348 348 if revnum < self.startrev:
349 349 raise util.Abort(_('svn: no revision found after start revision %d')
350 350 % self.startrev)
351 351
352 352 return self.heads
353 353
354 354 def getfile(self, file, rev):
355 355 data, mode = self._getfile(file, rev)
356 356 self.modecache[(file, rev)] = mode
357 357 return data
358 358
359 359 def getmode(self, file, rev):
360 360 return self.modecache[(file, rev)]
361 361
362 362 def getchanges(self, rev):
363 363 if self._changescache and self._changescache[0] == rev:
364 364 return self._changescache[1]
365 365 self._changescache = None
366 366 self.modecache = {}
367 367 (paths, parents) = self.paths[rev]
368 368 if parents:
369 369 files, copies = self.expandpaths(rev, paths, parents)
370 370 else:
371 371 # Perform a full checkout on roots
372 372 uuid, module, revnum = self.revsplit(rev)
373 373 entries = svn.client.ls(self.baseurl + urllib.quote(module),
374 374 optrev(revnum), True, self.ctx)
375 375 files = [n for n,e in entries.iteritems()
376 376 if e.kind == svn.core.svn_node_file]
377 377 copies = {}
378 378
379 379 files.sort()
380 380 files = zip(files, [rev] * len(files))
381 381
382 382 # caller caches the result, so free it here to release memory
383 383 del self.paths[rev]
384 384 return (files, copies)
385 385
386 386 def getchangedfiles(self, rev, i):
387 387 changes = self.getchanges(rev)
388 388 self._changescache = (rev, changes)
389 389 return [f[0] for f in changes[0]]
390 390
391 391 def getcommit(self, rev):
392 392 if rev not in self.commits:
393 393 uuid, module, revnum = self.revsplit(rev)
394 394 self.module = module
395 395 self.reparent(module)
396 396 # We assume that:
397 397 # - requests for revisions after "stop" come from the
398 398 # revision graph backward traversal. Cache all of them
399 399 # down to stop, they will be used eventually.
400 400 # - requests for revisions before "stop" come to get
401 401 # isolated branches parents. Just fetch what is needed.
402 402 stop = self.lastrevs.get(module, 0)
403 403 if revnum < stop:
404 404 stop = revnum + 1
405 405 self._fetch_revisions(revnum, stop)
406 406 commit = self.commits[rev]
407 407 # caller caches the result, so free it here to release memory
408 408 del self.commits[rev]
409 409 return commit
410 410
411 411 def gettags(self):
412 412 tags = {}
413 413 if self.tags is None:
414 414 return tags
415 415
416 416 # svn tags are just a convention, project branches left in a
417 417 # 'tags' directory. There is no other relationship than
418 418 # ancestry, which is expensive to discover and makes them hard
419 419 # to update incrementally. Worse, past revisions may be
420 420 # referenced by tags far away in the future, requiring a deep
421 421 # history traversal on every calculation. Current code
422 422 # performs a single backward traversal, tracking moves within
423 423 # the tags directory (tag renaming) and recording a new tag
424 424 # everytime a project is copied from outside the tags
425 425 # directory. It also lists deleted tags, this behaviour may
426 426 # change in the future.
427 427 pendings = []
428 428 tagspath = self.tags
429 429 start = svn.ra.get_latest_revnum(self.ra)
430 430 try:
431 431 for entry in self._getlog([self.tags], start, self.startrev):
432 432 origpaths, revnum, author, date, message = entry
433 433 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
434 434 in origpaths.iteritems() if e.copyfrom_path]
435 435 # Apply moves/copies from more specific to general
436 436 copies.sort(reverse=True)
437 437
438 438 srctagspath = tagspath
439 439 if copies and copies[-1][2] == tagspath:
440 440 # Track tags directory moves
441 441 srctagspath = copies.pop()[0]
442 442
443 443 for source, sourcerev, dest in copies:
444 444 if not dest.startswith(tagspath + '/'):
445 445 continue
446 446 for tag in pendings:
447 447 if tag[0].startswith(dest):
448 448 tagpath = source + tag[0][len(dest):]
449 449 tag[:2] = [tagpath, sourcerev]
450 450 break
451 451 else:
452 452 pendings.append([source, sourcerev, dest])
453 453
454 454 # Filter out tags with children coming from different
455 455 # parts of the repository like:
456 456 # /tags/tag.1 (from /trunk:10)
457 457 # /tags/tag.1/foo (from /branches/foo:12)
458 458 # Here/tags/tag.1 discarded as well as its children.
459 459 # It happens with tools like cvs2svn. Such tags cannot
460 460 # be represented in mercurial.
461 461 addeds = dict((p, e.copyfrom_path) for p, e
462 462 in origpaths.iteritems() if e.action == 'A')
463 463 badroots = set()
464 464 for destroot in addeds:
465 465 for source, sourcerev, dest in pendings:
466 466 if (not dest.startswith(destroot + '/')
467 467 or source.startswith(addeds[destroot] + '/')):
468 468 continue
469 469 badroots.add(destroot)
470 470 break
471 471
472 472 for badroot in badroots:
473 473 pendings = [p for p in pendings if p[2] != badroot
474 474 and not p[2].startswith(badroot + '/')]
475 475
476 476 # Tell tag renamings from tag creations
477 477 remainings = []
478 478 for source, sourcerev, dest in pendings:
479 479 tagname = dest.split('/')[-1]
480 480 if source.startswith(srctagspath):
481 481 remainings.append([source, sourcerev, tagname])
482 482 continue
483 483 if tagname in tags:
484 484 # Keep the latest tag value
485 485 continue
486 486 # From revision may be fake, get one with changes
487 487 try:
488 488 tagid = self.latest(source, sourcerev)
489 489 if tagid and tagname not in tags:
490 490 tags[tagname] = tagid
491 491 except SvnPathNotFound:
492 492 # It happens when we are following directories
493 493 # we assumed were copied with their parents
494 494 # but were really created in the tag
495 495 # directory.
496 496 pass
497 497 pendings = remainings
498 498 tagspath = srctagspath
499 499
500 500 except SubversionException:
501 501 self.ui.note(_('no tags found at revision %d\n') % start)
502 502 return tags
503 503
504 504 def converted(self, rev, destrev):
505 505 if not self.wc:
506 506 return
507 507 if self.convertfp is None:
508 508 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
509 509 'a')
510 510 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
511 511 self.convertfp.flush()
512 512
513 513 def revid(self, revnum, module=None):
514 514 if not module:
515 515 module = self.module
516 516 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
517 517 revnum)
518 518
519 519 def revnum(self, rev):
520 520 return int(rev.split('@')[-1])
521 521
522 522 def revsplit(self, rev):
523 523 url, revnum = rev.encode(self.encoding).rsplit('@', 1)
524 524 revnum = int(revnum)
525 525 parts = url.split('/', 1)
526 526 uuid = parts.pop(0)[4:]
527 527 mod = ''
528 528 if parts:
529 529 mod = '/' + parts[0]
530 530 return uuid, mod, revnum
531 531
532 532 def latest(self, path, stop=0):
533 533 """Find the latest revid affecting path, up to stop. It may return
534 534 a revision in a different module, since a branch may be moved without
535 535 a change being reported. Return None if computed module does not
536 536 belong to rootmodule subtree.
537 537 """
538 538 if not path.startswith(self.rootmodule):
539 539 # Requests on foreign branches may be forbidden at server level
540 540 self.ui.debug(_('ignoring foreign branch %r\n') % path)
541 541 return None
542 542
543 543 if not stop:
544 544 stop = svn.ra.get_latest_revnum(self.ra)
545 545 try:
546 546 prevmodule = self.reparent('')
547 547 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
548 548 self.reparent(prevmodule)
549 549 except SubversionException:
550 550 dirent = None
551 551 if not dirent:
552 552 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
553 553
554 554 # stat() gives us the previous revision on this line of
555 555 # development, but it might be in *another module*. Fetch the
556 556 # log and detect renames down to the latest revision.
557 557 stream = self._getlog([path], stop, dirent.created_rev)
558 558 try:
559 559 for entry in stream:
560 560 paths, revnum, author, date, message = entry
561 561 if revnum <= dirent.created_rev:
562 562 break
563 563
564 564 for p in paths:
565 565 if not path.startswith(p) or not paths[p].copyfrom_path:
566 566 continue
567 567 newpath = paths[p].copyfrom_path + path[len(p):]
568 568 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
569 569 (path, newpath, revnum))
570 570 path = newpath
571 571 break
572 572 finally:
573 573 stream.close()
574 574
575 575 if not path.startswith(self.rootmodule):
576 576 self.ui.debug(_('ignoring foreign branch %r\n') % path)
577 577 return None
578 578 return self.revid(dirent.created_rev, path)
579 579
580 580 def get_blacklist(self):
581 581 """Avoid certain revision numbers.
582 582 It is not uncommon for two nearby revisions to cancel each other
583 583 out, e.g. 'I copied trunk into a subdirectory of itself instead
584 584 of making a branch'. The converted repository is significantly
585 585 smaller if we ignore such revisions.
586 586 """
587 587 self.blacklist = set()
588 588 blacklist = self.blacklist
589 589 for line in file("blacklist.txt", "r"):
590 590 if not line.startswith("#"):
591 591 try:
592 592 svn_rev = int(line.strip())
593 593 blacklist.add(svn_rev)
594 594 except ValueError:
595 595 pass # not an integer or a comment
596 596
597 597 def is_blacklisted(self, svn_rev):
598 598 return svn_rev in self.blacklist
599 599
600 600 def reparent(self, module):
601 601 """Reparent the svn transport and return the previous parent."""
602 602 if self.prevmodule == module:
603 603 return module
604 604 svnurl = self.baseurl + urllib.quote(module)
605 605 prevmodule = self.prevmodule
606 606 if prevmodule is None:
607 607 prevmodule = ''
608 608 self.ui.debug(_("reparent to %s\n") % svnurl)
609 609 svn.ra.reparent(self.ra, svnurl)
610 610 self.prevmodule = module
611 611 return prevmodule
612 612
613 613 def expandpaths(self, rev, paths, parents):
614 614 entries = []
615 615 # Map of entrypath, revision for finding source of deleted
616 616 # revisions.
617 617 copyfrom = {}
618 618 copies = {}
619 619
620 620 new_module, revnum = self.revsplit(rev)[1:]
621 621 if new_module != self.module:
622 622 self.module = new_module
623 623 self.reparent(self.module)
624 624
625 625 for path, ent in paths:
626 626 entrypath = self.getrelpath(path)
627 entry = entrypath.decode(self.encoding)
628 627
629 628 kind = self._checkpath(entrypath, revnum)
630 629 if kind == svn.core.svn_node_file:
631 entries.append(self.recode(entry))
630 entries.append(self.recode(entrypath))
632 631 if not ent.copyfrom_path or not parents:
633 632 continue
634 633 # Copy sources not in parent revisions cannot be
635 634 # represented, ignore their origin for now
636 635 pmodule, prevnum = self.revsplit(parents[0])[1:]
637 636 if ent.copyfrom_rev < prevnum:
638 637 continue
639 638 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
640 639 if not copyfrom_path:
641 640 continue
642 641 self.ui.debug(_("copied to %s from %s@%s\n") %
643 642 (entrypath, copyfrom_path, ent.copyfrom_rev))
644 copies[self.recode(entry)] = self.recode(copyfrom_path)
643 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
645 644 elif kind == 0: # gone, but had better be a deleted *file*
646 645 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
647 646 pmodule, prevnum = self.revsplit(parents[0])[1:]
648 647 parentpath = pmodule + "/" + entrypath
649 648 self.ui.debug(_("entry %s\n") % parentpath)
650 649
651 650 # We can avoid the reparent calls if the module has
652 651 # not changed but it probably does not worth the pain.
653 652 prevmodule = self.reparent('')
654 653 fromkind = svn.ra.check_path(self.ra, parentpath.strip('/'), prevnum)
655 654 self.reparent(prevmodule)
656 655
657 656 if fromkind == svn.core.svn_node_file:
658 entries.append(self.recode(entry))
657 entries.append(self.recode(entrypath))
659 658 elif fromkind == svn.core.svn_node_dir:
660 659 if ent.action == 'C':
661 660 children = self._find_children(path, prevnum)
662 661 else:
663 662 oroot = parentpath.strip('/')
664 663 nroot = path.strip('/')
665 664 children = self._find_children(oroot, prevnum)
666 665 children = [s.replace(oroot,nroot) for s in children]
667 666
668 667 for child in children:
669 668 childpath = self.getrelpath("/" + child, pmodule)
670 669 if not childpath:
671 670 continue
672 671 if childpath in copies:
673 672 del copies[childpath]
674 673 entries.append(childpath)
675 674 else:
676 675 self.ui.debug(_('unknown path in revision %d: %s\n') % \
677 676 (revnum, path))
678 677 elif kind == svn.core.svn_node_dir:
679 678 # If the directory just had a prop change,
680 679 # then we shouldn't need to look for its children.
681 680 if ent.action == 'M':
682 681 continue
683 682
684 683 children = sorted(self._find_children(path, revnum))
685 684 for child in children:
686 685 # Can we move a child directory and its
687 686 # parent in the same commit? (probably can). Could
688 687 # cause problems if instead of revnum -1,
689 688 # we have to look in (copyfrom_path, revnum - 1)
690 689 entrypath = self.getrelpath("/" + child)
691 690 if entrypath:
692 691 # Need to filter out directories here...
693 692 kind = self._checkpath(entrypath, revnum)
694 693 if kind != svn.core.svn_node_dir:
695 694 entries.append(self.recode(entrypath))
696 695
697 696 # Handle directory copies
698 697 if not ent.copyfrom_path or not parents:
699 698 continue
700 699 # Copy sources not in parent revisions cannot be
701 700 # represented, ignore their origin for now
702 701 pmodule, prevnum = self.revsplit(parents[0])[1:]
703 702 if ent.copyfrom_rev < prevnum:
704 703 continue
705 704 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
706 705 if not copyfrompath:
707 706 continue
708 707 copyfrom[path] = ent
709 708 self.ui.debug(_("mark %s came from %s:%d\n")
710 709 % (path, copyfrompath, ent.copyfrom_rev))
711 710 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
712 711 children.sort()
713 712 for child in children:
714 713 entrypath = self.getrelpath("/" + child, pmodule)
715 714 if not entrypath:
716 715 continue
717 716 copytopath = path + entrypath[len(copyfrompath):]
718 717 copytopath = self.getrelpath(copytopath)
719 718 copies[self.recode(copytopath)] = self.recode(entrypath)
720 719
721 720 return (list(set(entries)), copies)
722 721
723 722 def _fetch_revisions(self, from_revnum, to_revnum):
724 723 if from_revnum < to_revnum:
725 724 from_revnum, to_revnum = to_revnum, from_revnum
726 725
727 726 self.child_cset = None
728 727
729 728 def parselogentry(orig_paths, revnum, author, date, message):
730 729 """Return the parsed commit object or None, and True if
731 730 the revision is a branch root.
732 731 """
733 732 self.ui.debug(_("parsing revision %d (%d changes)\n") %
734 733 (revnum, len(orig_paths)))
735 734
736 735 branched = False
737 736 rev = self.revid(revnum)
738 737 # branch log might return entries for a parent we already have
739 738
740 739 if rev in self.commits or revnum < to_revnum:
741 740 return None, branched
742 741
743 742 parents = []
744 743 # check whether this revision is the start of a branch or part
745 744 # of a branch renaming
746 745 orig_paths = sorted(orig_paths.iteritems())
747 746 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
748 747 if root_paths:
749 748 path, ent = root_paths[-1]
750 749 if ent.copyfrom_path:
751 750 branched = True
752 751 newpath = ent.copyfrom_path + self.module[len(path):]
753 752 # ent.copyfrom_rev may not be the actual last revision
754 753 previd = self.latest(newpath, ent.copyfrom_rev)
755 754 if previd is not None:
756 755 prevmodule, prevnum = self.revsplit(previd)[1:]
757 756 if prevnum >= self.startrev:
758 757 parents = [previd]
759 758 self.ui.note(_('found parent of branch %s at %d: %s\n') %
760 759 (self.module, prevnum, prevmodule))
761 760 else:
762 761 self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
763 762
764 763 paths = []
765 764 # filter out unrelated paths
766 765 for path, ent in orig_paths:
767 766 if self.getrelpath(path) is None:
768 767 continue
769 768 paths.append((path, ent))
770 769
771 770 # Example SVN datetime. Includes microseconds.
772 771 # ISO-8601 conformant
773 772 # '2007-01-04T17:35:00.902377Z'
774 773 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
775 774
776 775 log = message and self.recode(message) or ''
777 776 author = author and self.recode(author) or ''
778 777 try:
779 778 branch = self.module.split("/")[-1]
780 779 if branch == 'trunk':
781 780 branch = ''
782 781 except IndexError:
783 782 branch = None
784 783
785 784 cset = commit(author=author,
786 785 date=util.datestr(date),
787 786 desc=log,
788 787 parents=parents,
789 788 branch=branch,
790 789 rev=rev.encode('utf-8'))
791 790
792 791 self.commits[rev] = cset
793 792 # The parents list is *shared* among self.paths and the
794 793 # commit object. Both will be updated below.
795 794 self.paths[rev] = (paths, cset.parents)
796 795 if self.child_cset and not self.child_cset.parents:
797 796 self.child_cset.parents[:] = [rev]
798 797 self.child_cset = cset
799 798 return cset, branched
800 799
801 800 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
802 801 (self.module, from_revnum, to_revnum))
803 802
804 803 try:
805 804 firstcset = None
806 805 lastonbranch = False
807 806 stream = self._getlog([self.module], from_revnum, to_revnum)
808 807 try:
809 808 for entry in stream:
810 809 paths, revnum, author, date, message = entry
811 810 if revnum < self.startrev:
812 811 lastonbranch = True
813 812 break
814 813 if self.is_blacklisted(revnum):
815 814 self.ui.note(_('skipping blacklisted revision %d\n')
816 815 % revnum)
817 816 continue
818 817 if not paths:
819 818 self.ui.debug(_('revision %d has no entries\n') % revnum)
820 819 continue
821 820 cset, lastonbranch = parselogentry(paths, revnum, author,
822 821 date, message)
823 822 if cset:
824 823 firstcset = cset
825 824 if lastonbranch:
826 825 break
827 826 finally:
828 827 stream.close()
829 828
830 829 if not lastonbranch and firstcset and not firstcset.parents:
831 830 # The first revision of the sequence (the last fetched one)
832 831 # has invalid parents if not a branch root. Find the parent
833 832 # revision now, if any.
834 833 try:
835 834 firstrevnum = self.revnum(firstcset.rev)
836 835 if firstrevnum > 1:
837 836 latest = self.latest(self.module, firstrevnum - 1)
838 837 if latest:
839 838 firstcset.parents.append(latest)
840 839 except SvnPathNotFound:
841 840 pass
842 841 except SubversionException, (inst, num):
843 842 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
844 843 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
845 844 raise
846 845
847 846 def _getfile(self, file, rev):
848 847 # TODO: ra.get_file transmits the whole file instead of diffs.
849 848 mode = ''
850 849 try:
851 850 new_module, revnum = self.revsplit(rev)[1:]
852 851 if self.module != new_module:
853 852 self.module = new_module
854 853 self.reparent(self.module)
855 854 io = StringIO()
856 855 info = svn.ra.get_file(self.ra, file, revnum, io)
857 856 data = io.getvalue()
858 857 # ra.get_files() seems to keep a reference on the input buffer
859 858 # preventing collection. Release it explicitely.
860 859 io.close()
861 860 if isinstance(info, list):
862 861 info = info[-1]
863 862 mode = ("svn:executable" in info) and 'x' or ''
864 863 mode = ("svn:special" in info) and 'l' or mode
865 864 except SubversionException, e:
866 865 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
867 866 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
868 867 if e.apr_err in notfound: # File not found
869 868 raise IOError()
870 869 raise
871 870 if mode == 'l':
872 871 link_prefix = "link "
873 872 if data.startswith(link_prefix):
874 873 data = data[len(link_prefix):]
875 874 return data, mode
876 875
877 876 def _find_children(self, path, revnum):
878 877 path = path.strip('/')
879 878 pool = Pool()
880 879 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
881 880 return ['%s/%s' % (path, x) for x in
882 881 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
883 882
884 883 def getrelpath(self, path, module=None):
885 884 if module is None:
886 885 module = self.module
887 886 # Given the repository url of this wc, say
888 887 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
889 888 # extract the "entry" portion (a relative path) from what
890 889 # svn log --xml says, ie
891 890 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
892 891 # that is to say "tests/PloneTestCase.py"
893 892 if path.startswith(module):
894 893 relative = path.rstrip('/')[len(module):]
895 894 if relative.startswith('/'):
896 895 return relative[1:]
897 896 elif relative == '':
898 897 return relative
899 898
900 899 # The path is outside our tracked tree...
901 900 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
902 901 return None
903 902
904 903 def _checkpath(self, path, revnum):
905 904 # ra.check_path does not like leading slashes very much, it leads
906 905 # to PROPFIND subversion errors
907 906 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
908 907
909 908 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
910 909 strict_node_history=False):
911 910 # Normalize path names, svn >= 1.5 only wants paths relative to
912 911 # supplied URL
913 912 relpaths = []
914 913 for p in paths:
915 914 if not p.startswith('/'):
916 915 p = self.module + '/' + p
917 916 relpaths.append(p.strip('/'))
918 917 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
919 918 strict_node_history]
920 919 arg = encodeargs(args)
921 920 hgexe = util.hgexecutable()
922 921 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
923 922 stdin, stdout = util.popen2(cmd)
924 923 stdin.write(arg)
925 924 stdin.close()
926 925 return logstream(stdout)
927 926
928 927 pre_revprop_change = '''#!/bin/sh
929 928
930 929 REPOS="$1"
931 930 REV="$2"
932 931 USER="$3"
933 932 PROPNAME="$4"
934 933 ACTION="$5"
935 934
936 935 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
937 936 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
938 937 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
939 938
940 939 echo "Changing prohibited revision property" >&2
941 940 exit 1
942 941 '''
943 942
944 943 class svn_sink(converter_sink, commandline):
945 944 commit_re = re.compile(r'Committed revision (\d+).', re.M)
946 945
947 946 def prerun(self):
948 947 if self.wc:
949 948 os.chdir(self.wc)
950 949
951 950 def postrun(self):
952 951 if self.wc:
953 952 os.chdir(self.cwd)
954 953
955 954 def join(self, name):
956 955 return os.path.join(self.wc, '.svn', name)
957 956
958 957 def revmapfile(self):
959 958 return self.join('hg-shamap')
960 959
961 960 def authorfile(self):
962 961 return self.join('hg-authormap')
963 962
964 963 def __init__(self, ui, path):
965 964 converter_sink.__init__(self, ui, path)
966 965 commandline.__init__(self, ui, 'svn')
967 966 self.delete = []
968 967 self.setexec = []
969 968 self.delexec = []
970 969 self.copies = []
971 970 self.wc = None
972 971 self.cwd = os.getcwd()
973 972
974 973 path = os.path.realpath(path)
975 974
976 975 created = False
977 976 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
978 977 self.wc = path
979 978 self.run0('update')
980 979 else:
981 980 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
982 981
983 982 if os.path.isdir(os.path.dirname(path)):
984 983 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
985 984 ui.status(_('initializing svn repo %r\n') %
986 985 os.path.basename(path))
987 986 commandline(ui, 'svnadmin').run0('create', path)
988 987 created = path
989 988 path = util.normpath(path)
990 989 if not path.startswith('/'):
991 990 path = '/' + path
992 991 path = 'file://' + path
993 992
994 993 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
995 994 self.run0('checkout', path, wcpath)
996 995
997 996 self.wc = wcpath
998 997 self.opener = util.opener(self.wc)
999 998 self.wopener = util.opener(self.wc)
1000 999 self.childmap = mapfile(ui, self.join('hg-childmap'))
1001 1000 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1002 1001
1003 1002 if created:
1004 1003 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1005 1004 fp = open(hook, 'w')
1006 1005 fp.write(pre_revprop_change)
1007 1006 fp.close()
1008 1007 util.set_flags(hook, False, True)
1009 1008
1010 1009 xport = transport.SvnRaTransport(url=geturl(path))
1011 1010 self.uuid = svn.ra.get_uuid(xport.ra)
1012 1011
1013 1012 def wjoin(self, *names):
1014 1013 return os.path.join(self.wc, *names)
1015 1014
1016 1015 def putfile(self, filename, flags, data):
1017 1016 if 'l' in flags:
1018 1017 self.wopener.symlink(data, filename)
1019 1018 else:
1020 1019 try:
1021 1020 if os.path.islink(self.wjoin(filename)):
1022 1021 os.unlink(filename)
1023 1022 except OSError:
1024 1023 pass
1025 1024 self.wopener(filename, 'w').write(data)
1026 1025
1027 1026 if self.is_exec:
1028 1027 was_exec = self.is_exec(self.wjoin(filename))
1029 1028 else:
1030 1029 # On filesystems not supporting execute-bit, there is no way
1031 1030 # to know if it is set but asking subversion. Setting it
1032 1031 # systematically is just as expensive and much simpler.
1033 1032 was_exec = 'x' not in flags
1034 1033
1035 1034 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1036 1035 if was_exec:
1037 1036 if 'x' not in flags:
1038 1037 self.delexec.append(filename)
1039 1038 else:
1040 1039 if 'x' in flags:
1041 1040 self.setexec.append(filename)
1042 1041
1043 1042 def _copyfile(self, source, dest):
1044 1043 # SVN's copy command pukes if the destination file exists, but
1045 1044 # our copyfile method expects to record a copy that has
1046 1045 # already occurred. Cross the semantic gap.
1047 1046 wdest = self.wjoin(dest)
1048 1047 exists = os.path.exists(wdest)
1049 1048 if exists:
1050 1049 fd, tempname = tempfile.mkstemp(
1051 1050 prefix='hg-copy-', dir=os.path.dirname(wdest))
1052 1051 os.close(fd)
1053 1052 os.unlink(tempname)
1054 1053 os.rename(wdest, tempname)
1055 1054 try:
1056 1055 self.run0('copy', source, dest)
1057 1056 finally:
1058 1057 if exists:
1059 1058 try:
1060 1059 os.unlink(wdest)
1061 1060 except OSError:
1062 1061 pass
1063 1062 os.rename(tempname, wdest)
1064 1063
1065 1064 def dirs_of(self, files):
1066 1065 dirs = set()
1067 1066 for f in files:
1068 1067 if os.path.isdir(self.wjoin(f)):
1069 1068 dirs.add(f)
1070 1069 for i in strutil.rfindall(f, '/'):
1071 1070 dirs.add(f[:i])
1072 1071 return dirs
1073 1072
1074 1073 def add_dirs(self, files):
1075 1074 add_dirs = [d for d in sorted(self.dirs_of(files))
1076 1075 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1077 1076 if add_dirs:
1078 1077 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1079 1078 return add_dirs
1080 1079
1081 1080 def add_files(self, files):
1082 1081 if files:
1083 1082 self.xargs(files, 'add', quiet=True)
1084 1083 return files
1085 1084
1086 1085 def tidy_dirs(self, names):
1087 1086 deleted = []
1088 1087 for d in sorted(self.dirs_of(names), reverse=True):
1089 1088 wd = self.wjoin(d)
1090 1089 if os.listdir(wd) == '.svn':
1091 1090 self.run0('delete', d)
1092 1091 deleted.append(d)
1093 1092 return deleted
1094 1093
1095 1094 def addchild(self, parent, child):
1096 1095 self.childmap[parent] = child
1097 1096
1098 1097 def revid(self, rev):
1099 1098 return u"svn:%s@%s" % (self.uuid, rev)
1100 1099
1101 1100 def putcommit(self, files, copies, parents, commit, source, revmap):
1102 1101 # Apply changes to working copy
1103 1102 for f, v in files:
1104 1103 try:
1105 1104 data = source.getfile(f, v)
1106 1105 except IOError:
1107 1106 self.delete.append(f)
1108 1107 else:
1109 1108 e = source.getmode(f, v)
1110 1109 self.putfile(f, e, data)
1111 1110 if f in copies:
1112 1111 self.copies.append([copies[f], f])
1113 1112 files = [f[0] for f in files]
1114 1113
1115 1114 for parent in parents:
1116 1115 try:
1117 1116 return self.revid(self.childmap[parent])
1118 1117 except KeyError:
1119 1118 pass
1120 1119 entries = set(self.delete)
1121 1120 files = frozenset(files)
1122 1121 entries.update(self.add_dirs(files.difference(entries)))
1123 1122 if self.copies:
1124 1123 for s, d in self.copies:
1125 1124 self._copyfile(s, d)
1126 1125 self.copies = []
1127 1126 if self.delete:
1128 1127 self.xargs(self.delete, 'delete')
1129 1128 self.delete = []
1130 1129 entries.update(self.add_files(files.difference(entries)))
1131 1130 entries.update(self.tidy_dirs(entries))
1132 1131 if self.delexec:
1133 1132 self.xargs(self.delexec, 'propdel', 'svn:executable')
1134 1133 self.delexec = []
1135 1134 if self.setexec:
1136 1135 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1137 1136 self.setexec = []
1138 1137
1139 1138 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1140 1139 fp = os.fdopen(fd, 'w')
1141 1140 fp.write(commit.desc)
1142 1141 fp.close()
1143 1142 try:
1144 1143 output = self.run0('commit',
1145 1144 username=util.shortuser(commit.author),
1146 1145 file=messagefile,
1147 1146 encoding='utf-8')
1148 1147 try:
1149 1148 rev = self.commit_re.search(output).group(1)
1150 1149 except AttributeError:
1151 1150 self.ui.warn(_('unexpected svn output:\n'))
1152 1151 self.ui.warn(output)
1153 1152 raise util.Abort(_('unable to cope with svn output'))
1154 1153 if commit.rev:
1155 1154 self.run('propset', 'hg:convert-rev', commit.rev,
1156 1155 revprop=True, revision=rev)
1157 1156 if commit.branch and commit.branch != 'default':
1158 1157 self.run('propset', 'hg:convert-branch', commit.branch,
1159 1158 revprop=True, revision=rev)
1160 1159 for parent in parents:
1161 1160 self.addchild(parent, rev)
1162 1161 return self.revid(rev)
1163 1162 finally:
1164 1163 os.unlink(messagefile)
1165 1164
1166 1165 def puttags(self, tags):
1167 1166 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now