##// END OF EJS Templates
convert: honor 2.3 compatibility (rsplit)
Dirkjan Ochtman -
r7857:6af7c0e5 default
parent child Browse files
Show More
@@ -1,1167 +1,1167
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24 import urllib
25 25
26 26 from mercurial import strutil, util
27 27 from mercurial.i18n import _
28 28
29 29 # Subversion stuff. Works best with very recent Python SVN bindings
30 30 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
31 31 # these bindings.
32 32
33 33 from cStringIO import StringIO
34 34
35 35 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
36 36 from common import commandline, converter_source, converter_sink, mapfile
37 37
38 38 try:
39 39 from svn.core import SubversionException, Pool
40 40 import svn
41 41 import svn.client
42 42 import svn.core
43 43 import svn.ra
44 44 import svn.delta
45 45 import transport
46 46 except ImportError:
47 47 pass
48 48
49 49 class SvnPathNotFound(Exception):
50 50 pass
51 51
52 52 def geturl(path):
53 53 try:
54 54 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
55 55 except SubversionException:
56 56 pass
57 57 if os.path.isdir(path):
58 58 path = os.path.normpath(os.path.abspath(path))
59 59 if os.name == 'nt':
60 60 path = '/' + util.normpath(path)
61 61 return 'file://%s' % urllib.quote(path)
62 62 return path
63 63
64 64 def optrev(number):
65 65 optrev = svn.core.svn_opt_revision_t()
66 66 optrev.kind = svn.core.svn_opt_revision_number
67 67 optrev.value.number = number
68 68 return optrev
69 69
70 70 class changedpath(object):
71 71 def __init__(self, p):
72 72 self.copyfrom_path = p.copyfrom_path
73 73 self.copyfrom_rev = p.copyfrom_rev
74 74 self.action = p.action
75 75
76 76 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
77 77 strict_node_history=False):
78 78 protocol = -1
79 79 def receiver(orig_paths, revnum, author, date, message, pool):
80 80 if orig_paths is not None:
81 81 for k, v in orig_paths.iteritems():
82 82 orig_paths[k] = changedpath(v)
83 83 pickle.dump((orig_paths, revnum, author, date, message),
84 84 fp, protocol)
85 85
86 86 try:
87 87 # Use an ra of our own so that our parent can consume
88 88 # our results without confusing the server.
89 89 t = transport.SvnRaTransport(url=url)
90 90 svn.ra.get_log(t.ra, paths, start, end, limit,
91 91 discover_changed_paths,
92 92 strict_node_history,
93 93 receiver)
94 94 except SubversionException, (inst, num):
95 95 pickle.dump(num, fp, protocol)
96 96 except IOError:
97 97 # Caller may interrupt the iteration
98 98 pickle.dump(None, fp, protocol)
99 99 else:
100 100 pickle.dump(None, fp, protocol)
101 101 fp.close()
102 102 # With large history, cleanup process goes crazy and suddenly
103 103 # consumes *huge* amount of memory. The output file being closed,
104 104 # there is no need for clean termination.
105 105 os._exit(0)
106 106
107 107 def debugsvnlog(ui, **opts):
108 108 """Fetch SVN log in a subprocess and channel them back to parent to
109 109 avoid memory collection issues.
110 110 """
111 111 util.set_binary(sys.stdin)
112 112 util.set_binary(sys.stdout)
113 113 args = decodeargs(sys.stdin.read())
114 114 get_log_child(sys.stdout, *args)
115 115
116 116 class logstream:
117 117 """Interruptible revision log iterator."""
118 118 def __init__(self, stdout):
119 119 self._stdout = stdout
120 120
121 121 def __iter__(self):
122 122 while True:
123 123 entry = pickle.load(self._stdout)
124 124 try:
125 125 orig_paths, revnum, author, date, message = entry
126 126 except:
127 127 if entry is None:
128 128 break
129 129 raise SubversionException("child raised exception", entry)
130 130 yield entry
131 131
132 132 def close(self):
133 133 if self._stdout:
134 134 self._stdout.close()
135 135 self._stdout = None
136 136
137 137 # SVN conversion code stolen from bzr-svn and tailor
138 138 #
139 139 # Subversion looks like a versioned filesystem, branches structures
140 140 # are defined by conventions and not enforced by the tool. First,
141 141 # we define the potential branches (modules) as "trunk" and "branches"
142 142 # children directories. Revisions are then identified by their
143 143 # module and revision number (and a repository identifier).
144 144 #
145 145 # The revision graph is really a tree (or a forest). By default, a
146 146 # revision parent is the previous revision in the same module. If the
147 147 # module directory is copied/moved from another module then the
148 148 # revision is the module root and its parent the source revision in
149 149 # the parent module. A revision has at most one parent.
150 150 #
151 151 class svn_source(converter_source):
152 152 def __init__(self, ui, url, rev=None):
153 153 super(svn_source, self).__init__(ui, url, rev=rev)
154 154
155 155 try:
156 156 SubversionException
157 157 except NameError:
158 158 raise MissingTool(_('Subversion python bindings could not be loaded'))
159 159
160 160 try:
161 161 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
162 162 if version < (1, 4):
163 163 raise MissingTool(_('Subversion python bindings %d.%d found, '
164 164 '1.4 or later required') % version)
165 165 except AttributeError:
166 166 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
167 167 'or later required'))
168 168
169 169 self.encoding = locale.getpreferredencoding()
170 170 self.lastrevs = {}
171 171
172 172 latest = None
173 173 try:
174 174 # Support file://path@rev syntax. Useful e.g. to convert
175 175 # deleted branches.
176 176 at = url.rfind('@')
177 177 if at >= 0:
178 178 latest = int(url[at+1:])
179 179 url = url[:at]
180 180 except ValueError, e:
181 181 pass
182 182 self.url = geturl(url)
183 183 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
184 184 try:
185 185 self.transport = transport.SvnRaTransport(url=self.url)
186 186 self.ra = self.transport.ra
187 187 self.ctx = self.transport.client
188 188 self.baseurl = svn.ra.get_repos_root(self.ra)
189 189 # Module is either empty or a repository path starting with
190 190 # a slash and not ending with a slash.
191 191 self.module = urllib.unquote(self.url[len(self.baseurl):])
192 192 self.prevmodule = None
193 193 self.rootmodule = self.module
194 194 self.commits = {}
195 195 self.paths = {}
196 196 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
197 197 except SubversionException, e:
198 198 ui.print_exc()
199 199 raise NoRepo("%s does not look like a Subversion repo" % self.url)
200 200
201 201 if rev:
202 202 try:
203 203 latest = int(rev)
204 204 except ValueError:
205 205 raise util.Abort(_('svn: revision %s is not an integer') % rev)
206 206
207 207 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
208 208 try:
209 209 self.startrev = int(self.startrev)
210 210 if self.startrev < 0:
211 211 self.startrev = 0
212 212 except ValueError:
213 213 raise util.Abort(_('svn: start revision %s is not an integer')
214 214 % self.startrev)
215 215
216 216 try:
217 217 self.get_blacklist()
218 218 except IOError, e:
219 219 pass
220 220
221 221 self.head = self.latest(self.module, latest)
222 222 if not self.head:
223 223 raise util.Abort(_('no revision found in module %s') %
224 224 self.module.encode(self.encoding))
225 225 self.last_changed = self.revnum(self.head)
226 226
227 227 self._changescache = None
228 228
229 229 if os.path.exists(os.path.join(url, '.svn/entries')):
230 230 self.wc = url
231 231 else:
232 232 self.wc = None
233 233 self.convertfp = None
234 234
235 235 def setrevmap(self, revmap):
236 236 lastrevs = {}
237 237 for revid in revmap.iterkeys():
238 238 uuid, module, revnum = self.revsplit(revid)
239 239 lastrevnum = lastrevs.setdefault(module, revnum)
240 240 if revnum > lastrevnum:
241 241 lastrevs[module] = revnum
242 242 self.lastrevs = lastrevs
243 243
244 244 def exists(self, path, optrev):
245 245 try:
246 246 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
247 247 optrev, False, self.ctx)
248 248 return True
249 249 except SubversionException, err:
250 250 return False
251 251
252 252 def getheads(self):
253 253
254 254 def isdir(path, revnum):
255 255 kind = self._checkpath(path, revnum)
256 256 return kind == svn.core.svn_node_dir
257 257
258 258 def getcfgpath(name, rev):
259 259 cfgpath = self.ui.config('convert', 'svn.' + name)
260 260 if cfgpath is not None and cfgpath.strip() == '':
261 261 return None
262 262 path = (cfgpath or name).strip('/')
263 263 if not self.exists(path, rev):
264 264 if cfgpath:
265 265 raise util.Abort(_('expected %s to be at %r, but not found')
266 266 % (name, path))
267 267 return None
268 268 self.ui.note(_('found %s at %r\n') % (name, path))
269 269 return path
270 270
271 271 rev = optrev(self.last_changed)
272 272 oldmodule = ''
273 273 trunk = getcfgpath('trunk', rev)
274 274 self.tags = getcfgpath('tags', rev)
275 275 branches = getcfgpath('branches', rev)
276 276
277 277 # If the project has a trunk or branches, we will extract heads
278 278 # from them. We keep the project root otherwise.
279 279 if trunk:
280 280 oldmodule = self.module or ''
281 281 self.module += '/' + trunk
282 282 self.head = self.latest(self.module, self.last_changed)
283 283 if not self.head:
284 284 raise util.Abort(_('no revision found in module %s') %
285 285 self.module.encode(self.encoding))
286 286
287 287 # First head in the list is the module's head
288 288 self.heads = [self.head]
289 289 if self.tags is not None:
290 290 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
291 291
292 292 # Check if branches bring a few more heads to the list
293 293 if branches:
294 294 rpath = self.url.strip('/')
295 295 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
296 296 rev, False, self.ctx)
297 297 for branch in branchnames.keys():
298 298 module = '%s/%s/%s' % (oldmodule, branches, branch)
299 299 if not isdir(module, self.last_changed):
300 300 continue
301 301 brevid = self.latest(module, self.last_changed)
302 302 if not brevid:
303 303 self.ui.note(_('ignoring empty branch %s\n') %
304 304 branch.encode(self.encoding))
305 305 continue
306 306 self.ui.note(_('found branch %s at %d\n') %
307 307 (branch, self.revnum(brevid)))
308 308 self.heads.append(brevid)
309 309
310 310 if self.startrev and self.heads:
311 311 if len(self.heads) > 1:
312 312 raise util.Abort(_('svn: start revision is not supported with '
313 313 'with more than one branch'))
314 314 revnum = self.revnum(self.heads[0])
315 315 if revnum < self.startrev:
316 316 raise util.Abort(_('svn: no revision found after start revision %d')
317 317 % self.startrev)
318 318
319 319 return self.heads
320 320
321 321 def getfile(self, file, rev):
322 322 data, mode = self._getfile(file, rev)
323 323 self.modecache[(file, rev)] = mode
324 324 return data
325 325
326 326 def getmode(self, file, rev):
327 327 return self.modecache[(file, rev)]
328 328
329 329 def getchanges(self, rev):
330 330 if self._changescache and self._changescache[0] == rev:
331 331 return self._changescache[1]
332 332 self._changescache = None
333 333 self.modecache = {}
334 334 (paths, parents) = self.paths[rev]
335 335 if parents:
336 336 files, copies = self.expandpaths(rev, paths, parents)
337 337 else:
338 338 # Perform a full checkout on roots
339 339 uuid, module, revnum = self.revsplit(rev)
340 340 entries = svn.client.ls(self.baseurl + urllib.quote(module),
341 341 optrev(revnum), True, self.ctx)
342 342 files = [n for n,e in entries.iteritems()
343 343 if e.kind == svn.core.svn_node_file]
344 344 copies = {}
345 345
346 346 files.sort()
347 347 files = zip(files, [rev] * len(files))
348 348
349 349 # caller caches the result, so free it here to release memory
350 350 del self.paths[rev]
351 351 return (files, copies)
352 352
353 353 def getchangedfiles(self, rev, i):
354 354 changes = self.getchanges(rev)
355 355 self._changescache = (rev, changes)
356 356 return [f[0] for f in changes[0]]
357 357
358 358 def getcommit(self, rev):
359 359 if rev not in self.commits:
360 360 uuid, module, revnum = self.revsplit(rev)
361 361 self.module = module
362 362 self.reparent(module)
363 363 # We assume that:
364 364 # - requests for revisions after "stop" come from the
365 365 # revision graph backward traversal. Cache all of them
366 366 # down to stop, they will be used eventually.
367 367 # - requests for revisions before "stop" come to get
368 368 # isolated branches parents. Just fetch what is needed.
369 369 stop = self.lastrevs.get(module, 0)
370 370 if revnum < stop:
371 371 stop = revnum + 1
372 372 self._fetch_revisions(revnum, stop)
373 373 commit = self.commits[rev]
374 374 # caller caches the result, so free it here to release memory
375 375 del self.commits[rev]
376 376 return commit
377 377
378 378 def gettags(self):
379 379 tags = {}
380 380 if self.tags is None:
381 381 return tags
382 382
383 383 # svn tags are just a convention, project branches left in a
384 384 # 'tags' directory. There is no other relationship than
385 385 # ancestry, which is expensive to discover and makes them hard
386 386 # to update incrementally. Worse, past revisions may be
387 387 # referenced by tags far away in the future, requiring a deep
388 388 # history traversal on every calculation. Current code
389 389 # performs a single backward traversal, tracking moves within
390 390 # the tags directory (tag renaming) and recording a new tag
391 391 # everytime a project is copied from outside the tags
392 392 # directory. It also lists deleted tags, this behaviour may
393 393 # change in the future.
394 394 pendings = []
395 395 tagspath = self.tags
396 396 start = svn.ra.get_latest_revnum(self.ra)
397 397 try:
398 398 for entry in self._getlog([self.tags], start, self.startrev):
399 399 origpaths, revnum, author, date, message = entry
400 400 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
401 401 in origpaths.iteritems() if e.copyfrom_path]
402 402 copies.sort()
403 403 # Apply moves/copies from more specific to general
404 404 copies.reverse()
405 405
406 406 srctagspath = tagspath
407 407 if copies and copies[-1][2] == tagspath:
408 408 # Track tags directory moves
409 409 srctagspath = copies.pop()[0]
410 410
411 411 for source, sourcerev, dest in copies:
412 412 if not dest.startswith(tagspath + '/'):
413 413 continue
414 414 for tag in pendings:
415 415 if tag[0].startswith(dest):
416 416 tagpath = source + tag[0][len(dest):]
417 417 tag[:2] = [tagpath, sourcerev]
418 418 break
419 419 else:
420 420 pendings.append([source, sourcerev, dest.split('/')[-1]])
421 421
422 422 # Tell tag renamings from tag creations
423 423 remainings = []
424 424 for source, sourcerev, tagname in pendings:
425 425 if source.startswith(srctagspath):
426 426 remainings.append([source, sourcerev, tagname])
427 427 continue
428 428 # From revision may be fake, get one with changes
429 429 try:
430 430 tagid = self.latest(source, sourcerev)
431 431 if tagid:
432 432 tags[tagname] = tagid
433 433 except SvnPathNotFound:
434 434 # It happens when we are following directories we assumed
435 435 # were copied with their parents but were really created
436 436 # in the tag directory.
437 437 pass
438 438 pendings = remainings
439 439 tagspath = srctagspath
440 440
441 441 except SubversionException, (inst, num):
442 442 self.ui.note(_('no tags found at revision %d\n') % start)
443 443 return tags
444 444
445 445 def converted(self, rev, destrev):
446 446 if not self.wc:
447 447 return
448 448 if self.convertfp is None:
449 449 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
450 450 'a')
451 451 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
452 452 self.convertfp.flush()
453 453
454 454 # -- helper functions --
455 455
456 456 def revid(self, revnum, module=None):
457 457 if not module:
458 458 module = self.module
459 459 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
460 460 revnum)
461 461
462 462 def revnum(self, rev):
463 463 return int(rev.split('@')[-1])
464 464
465 465 def revsplit(self, rev):
466 url, revnum = rev.encode(self.encoding).rsplit('@', 1)
466 url, revnum = strutil.rsplit(rev.encode(self.encoding), '@', 1)
467 467 revnum = int(revnum)
468 468 parts = url.split('/', 1)
469 469 uuid = parts.pop(0)[4:]
470 470 mod = ''
471 471 if parts:
472 472 mod = '/' + parts[0]
473 473 return uuid, mod, revnum
474 474
475 475 def latest(self, path, stop=0):
476 476 """Find the latest revid affecting path, up to stop. It may return
477 477 a revision in a different module, since a branch may be moved without
478 478 a change being reported. Return None if computed module does not
479 479 belong to rootmodule subtree.
480 480 """
481 481 if not path.startswith(self.rootmodule):
482 482 # Requests on foreign branches may be forbidden at server level
483 483 self.ui.debug(_('ignoring foreign branch %r\n') % path)
484 484 return None
485 485
486 486 if not stop:
487 487 stop = svn.ra.get_latest_revnum(self.ra)
488 488 try:
489 489 prevmodule = self.reparent('')
490 490 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
491 491 self.reparent(prevmodule)
492 492 except SubversionException:
493 493 dirent = None
494 494 if not dirent:
495 495 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
496 496
497 497 # stat() gives us the previous revision on this line of development, but
498 498 # it might be in *another module*. Fetch the log and detect renames down
499 499 # to the latest revision.
500 500 stream = self._getlog([path], stop, dirent.created_rev)
501 501 try:
502 502 for entry in stream:
503 503 paths, revnum, author, date, message = entry
504 504 if revnum <= dirent.created_rev:
505 505 break
506 506
507 507 for p in paths:
508 508 if not path.startswith(p) or not paths[p].copyfrom_path:
509 509 continue
510 510 newpath = paths[p].copyfrom_path + path[len(p):]
511 511 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
512 512 (path, newpath, revnum))
513 513 path = newpath
514 514 break
515 515 finally:
516 516 stream.close()
517 517
518 518 if not path.startswith(self.rootmodule):
519 519 self.ui.debug(_('ignoring foreign branch %r\n') % path)
520 520 return None
521 521 return self.revid(dirent.created_rev, path)
522 522
523 523 def get_blacklist(self):
524 524 """Avoid certain revision numbers.
525 525 It is not uncommon for two nearby revisions to cancel each other
526 526 out, e.g. 'I copied trunk into a subdirectory of itself instead
527 527 of making a branch'. The converted repository is significantly
528 528 smaller if we ignore such revisions."""
529 529 self.blacklist = util.set()
530 530 blacklist = self.blacklist
531 531 for line in file("blacklist.txt", "r"):
532 532 if not line.startswith("#"):
533 533 try:
534 534 svn_rev = int(line.strip())
535 535 blacklist.add(svn_rev)
536 536 except ValueError, e:
537 537 pass # not an integer or a comment
538 538
539 539 def is_blacklisted(self, svn_rev):
540 540 return svn_rev in self.blacklist
541 541
542 542 def reparent(self, module):
543 543 """Reparent the svn transport and return the previous parent."""
544 544 if self.prevmodule == module:
545 545 return module
546 546 svnurl = self.baseurl + urllib.quote(module)
547 547 prevmodule = self.prevmodule
548 548 if prevmodule is None:
549 549 prevmodule = ''
550 550 self.ui.debug(_("reparent to %s\n") % svnurl)
551 551 svn.ra.reparent(self.ra, svnurl)
552 552 self.prevmodule = module
553 553 return prevmodule
554 554
555 555 def expandpaths(self, rev, paths, parents):
556 556 entries = []
557 557 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
558 558 copies = {}
559 559
560 560 new_module, revnum = self.revsplit(rev)[1:]
561 561 if new_module != self.module:
562 562 self.module = new_module
563 563 self.reparent(self.module)
564 564
565 565 for path, ent in paths:
566 566 entrypath = self.getrelpath(path)
567 567 entry = entrypath.decode(self.encoding)
568 568
569 569 kind = self._checkpath(entrypath, revnum)
570 570 if kind == svn.core.svn_node_file:
571 571 entries.append(self.recode(entry))
572 572 if not ent.copyfrom_path or not parents:
573 573 continue
574 574 # Copy sources not in parent revisions cannot be represented,
575 575 # ignore their origin for now
576 576 pmodule, prevnum = self.revsplit(parents[0])[1:]
577 577 if ent.copyfrom_rev < prevnum:
578 578 continue
579 579 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
580 580 if not copyfrom_path:
581 581 continue
582 582 self.ui.debug(_("copied to %s from %s@%s\n") %
583 583 (entrypath, copyfrom_path, ent.copyfrom_rev))
584 584 copies[self.recode(entry)] = self.recode(copyfrom_path)
585 585 elif kind == 0: # gone, but had better be a deleted *file*
586 586 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
587 587
588 588 # if a branch is created but entries are removed in the same
589 589 # changeset, get the right fromrev
590 590 # parents cannot be empty here, you cannot remove things from
591 591 # a root revision.
592 592 uuid, old_module, fromrev = self.revsplit(parents[0])
593 593
594 594 basepath = old_module + "/" + self.getrelpath(path)
595 595 entrypath = basepath
596 596
597 597 def lookup_parts(p):
598 598 rc = None
599 599 parts = p.split("/")
600 600 for i in range(len(parts)):
601 601 part = "/".join(parts[:i])
602 602 info = part, copyfrom.get(part, None)
603 603 if info[1] is not None:
604 604 self.ui.debug(_("found parent directory %s\n") % info[1])
605 605 rc = info
606 606 return rc
607 607
608 608 self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
609 609
610 610 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
611 611
612 612 # need to remove fragment from lookup_parts and replace with copyfrom_path
613 613 if frompath is not None:
614 614 self.ui.debug(_("munge-o-matic\n"))
615 615 self.ui.debug(entrypath + '\n')
616 616 self.ui.debug(entrypath[len(frompath):] + '\n')
617 617 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
618 618 fromrev = froment.copyfrom_rev
619 619 self.ui.debug(_("info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
620 620
621 621 # We can avoid the reparent calls if the module has not changed
622 622 # but it probably does not worth the pain.
623 623 prevmodule = self.reparent('')
624 624 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
625 625 self.reparent(prevmodule)
626 626
627 627 if fromkind == svn.core.svn_node_file: # a deleted file
628 628 entries.append(self.recode(entry))
629 629 elif fromkind == svn.core.svn_node_dir:
630 630 # print "Deleted/moved non-file:", revnum, path, ent
631 631 # children = self._find_children(path, revnum - 1)
632 632 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
633 633 # Sometimes this is tricky. For example: in
634 634 # The Subversion Repository revision 6940 a dir
635 635 # was copied and one of its files was deleted
636 636 # from the new location in the same commit. This
637 637 # code can't deal with that yet.
638 638 if ent.action == 'C':
639 639 children = self._find_children(path, fromrev)
640 640 else:
641 641 oroot = entrypath.strip('/')
642 642 nroot = path.strip('/')
643 643 children = self._find_children(oroot, fromrev)
644 644 children = [s.replace(oroot,nroot) for s in children]
645 645 # Mark all [files, not directories] as deleted.
646 646 for child in children:
647 647 # Can we move a child directory and its
648 648 # parent in the same commit? (probably can). Could
649 649 # cause problems if instead of revnum -1,
650 650 # we have to look in (copyfrom_path, revnum - 1)
651 651 entrypath = self.getrelpath("/" + child, module=old_module)
652 652 if entrypath:
653 653 entry = self.recode(entrypath.decode(self.encoding))
654 654 if entry in copies:
655 655 # deleted file within a copy
656 656 del copies[entry]
657 657 else:
658 658 entries.append(entry)
659 659 else:
660 660 self.ui.debug(_('unknown path in revision %d: %s\n') % \
661 661 (revnum, path))
662 662 elif kind == svn.core.svn_node_dir:
663 663 # Should probably synthesize normal file entries
664 664 # and handle as above to clean up copy/rename handling.
665 665
666 666 # If the directory just had a prop change,
667 667 # then we shouldn't need to look for its children.
668 668 if ent.action == 'M':
669 669 continue
670 670
671 671 # Also this could create duplicate entries. Not sure
672 672 # whether this will matter. Maybe should make entries a set.
673 673 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
674 674 # This will fail if a directory was copied
675 675 # from another branch and then some of its files
676 676 # were deleted in the same transaction.
677 677 children = util.sort(self._find_children(path, revnum))
678 678 for child in children:
679 679 # Can we move a child directory and its
680 680 # parent in the same commit? (probably can). Could
681 681 # cause problems if instead of revnum -1,
682 682 # we have to look in (copyfrom_path, revnum - 1)
683 683 entrypath = self.getrelpath("/" + child)
684 684 # print child, self.module, entrypath
685 685 if entrypath:
686 686 # Need to filter out directories here...
687 687 kind = self._checkpath(entrypath, revnum)
688 688 if kind != svn.core.svn_node_dir:
689 689 entries.append(self.recode(entrypath))
690 690
691 691 # Copies here (must copy all from source)
692 692 # Probably not a real problem for us if
693 693 # source does not exist
694 694 if not ent.copyfrom_path or not parents:
695 695 continue
696 696 # Copy sources not in parent revisions cannot be represented,
697 697 # ignore their origin for now
698 698 pmodule, prevnum = self.revsplit(parents[0])[1:]
699 699 if ent.copyfrom_rev < prevnum:
700 700 continue
701 701 copyfrompath = ent.copyfrom_path.decode(self.encoding)
702 702 copyfrompath = self.getrelpath(copyfrompath, pmodule)
703 703 if not copyfrompath:
704 704 continue
705 705 copyfrom[path] = ent
706 706 self.ui.debug(_("mark %s came from %s:%d\n")
707 707 % (path, copyfrompath, ent.copyfrom_rev))
708 708 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
709 709 children.sort()
710 710 for child in children:
711 711 entrypath = self.getrelpath("/" + child, pmodule)
712 712 if not entrypath:
713 713 continue
714 714 entry = entrypath.decode(self.encoding)
715 715 copytopath = path + entry[len(copyfrompath):]
716 716 copytopath = self.getrelpath(copytopath)
717 717 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
718 718
719 719 return (util.unique(entries), copies)
720 720
721 721 def _fetch_revisions(self, from_revnum, to_revnum):
722 722 if from_revnum < to_revnum:
723 723 from_revnum, to_revnum = to_revnum, from_revnum
724 724
725 725 self.child_cset = None
726 726
727 727 def parselogentry(orig_paths, revnum, author, date, message):
728 728 """Return the parsed commit object or None, and True if
729 729 the revision is a branch root.
730 730 """
731 731 self.ui.debug(_("parsing revision %d (%d changes)\n") %
732 732 (revnum, len(orig_paths)))
733 733
734 734 branched = False
735 735 rev = self.revid(revnum)
736 736 # branch log might return entries for a parent we already have
737 737
738 738 if (rev in self.commits or revnum < to_revnum):
739 739 return None, branched
740 740
741 741 parents = []
742 742 # check whether this revision is the start of a branch or part
743 743 # of a branch renaming
744 744 orig_paths = util.sort(orig_paths.items())
745 745 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
746 746 if root_paths:
747 747 path, ent = root_paths[-1]
748 748 if ent.copyfrom_path:
749 749 branched = True
750 750 newpath = ent.copyfrom_path + self.module[len(path):]
751 751 # ent.copyfrom_rev may not be the actual last revision
752 752 previd = self.latest(newpath, ent.copyfrom_rev)
753 753 if previd is not None:
754 754 prevmodule, prevnum = self.revsplit(previd)[1:]
755 755 if prevnum >= self.startrev:
756 756 parents = [previd]
757 757 self.ui.note(_('found parent of branch %s at %d: %s\n') %
758 758 (self.module, prevnum, prevmodule))
759 759 else:
760 760 self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
761 761
762 762 paths = []
763 763 # filter out unrelated paths
764 764 for path, ent in orig_paths:
765 765 if self.getrelpath(path) is None:
766 766 continue
767 767 paths.append((path, ent))
768 768
769 769 # Example SVN datetime. Includes microseconds.
770 770 # ISO-8601 conformant
771 771 # '2007-01-04T17:35:00.902377Z'
772 772 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
773 773
774 774 log = message and self.recode(message) or ''
775 775 author = author and self.recode(author) or ''
776 776 try:
777 777 branch = self.module.split("/")[-1]
778 778 if branch == 'trunk':
779 779 branch = ''
780 780 except IndexError:
781 781 branch = None
782 782
783 783 cset = commit(author=author,
784 784 date=util.datestr(date),
785 785 desc=log,
786 786 parents=parents,
787 787 branch=branch,
788 788 rev=rev.encode('utf-8'))
789 789
790 790 self.commits[rev] = cset
791 791 # The parents list is *shared* among self.paths and the
792 792 # commit object. Both will be updated below.
793 793 self.paths[rev] = (paths, cset.parents)
794 794 if self.child_cset and not self.child_cset.parents:
795 795 self.child_cset.parents[:] = [rev]
796 796 self.child_cset = cset
797 797 return cset, branched
798 798
799 799 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
800 800 (self.module, from_revnum, to_revnum))
801 801
802 802 try:
803 803 firstcset = None
804 804 lastonbranch = False
805 805 stream = self._getlog([self.module], from_revnum, to_revnum)
806 806 try:
807 807 for entry in stream:
808 808 paths, revnum, author, date, message = entry
809 809 if revnum < self.startrev:
810 810 lastonbranch = True
811 811 break
812 812 if self.is_blacklisted(revnum):
813 813 self.ui.note(_('skipping blacklisted revision %d\n')
814 814 % revnum)
815 815 continue
816 816 if paths is None:
817 817 self.ui.debug(_('revision %d has no entries\n') % revnum)
818 818 continue
819 819 cset, lastonbranch = parselogentry(paths, revnum, author,
820 820 date, message)
821 821 if cset:
822 822 firstcset = cset
823 823 if lastonbranch:
824 824 break
825 825 finally:
826 826 stream.close()
827 827
828 828 if not lastonbranch and firstcset and not firstcset.parents:
829 829 # The first revision of the sequence (the last fetched one)
830 830 # has invalid parents if not a branch root. Find the parent
831 831 # revision now, if any.
832 832 try:
833 833 firstrevnum = self.revnum(firstcset.rev)
834 834 if firstrevnum > 1:
835 835 latest = self.latest(self.module, firstrevnum - 1)
836 836 if latest:
837 837 firstcset.parents.append(latest)
838 838 except SvnPathNotFound:
839 839 pass
840 840 except SubversionException, (inst, num):
841 841 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
842 842 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
843 843 raise
844 844
845 845 def _getfile(self, file, rev):
846 846 # TODO: ra.get_file transmits the whole file instead of diffs.
847 847 mode = ''
848 848 try:
849 849 new_module, revnum = self.revsplit(rev)[1:]
850 850 if self.module != new_module:
851 851 self.module = new_module
852 852 self.reparent(self.module)
853 853 io = StringIO()
854 854 info = svn.ra.get_file(self.ra, file, revnum, io)
855 855 data = io.getvalue()
856 856 # ra.get_files() seems to keep a reference on the input buffer
857 857 # preventing collection. Release it explicitely.
858 858 io.close()
859 859 if isinstance(info, list):
860 860 info = info[-1]
861 861 mode = ("svn:executable" in info) and 'x' or ''
862 862 mode = ("svn:special" in info) and 'l' or mode
863 863 except SubversionException, e:
864 864 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
865 865 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
866 866 if e.apr_err in notfound: # File not found
867 867 raise IOError()
868 868 raise
869 869 if mode == 'l':
870 870 link_prefix = "link "
871 871 if data.startswith(link_prefix):
872 872 data = data[len(link_prefix):]
873 873 return data, mode
874 874
875 875 def _find_children(self, path, revnum):
876 876 path = path.strip('/')
877 877 pool = Pool()
878 878 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
879 879 return ['%s/%s' % (path, x) for x in
880 880 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
881 881
882 882 def getrelpath(self, path, module=None):
883 883 if module is None:
884 884 module = self.module
885 885 # Given the repository url of this wc, say
886 886 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
887 887 # extract the "entry" portion (a relative path) from what
888 888 # svn log --xml says, ie
889 889 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
890 890 # that is to say "tests/PloneTestCase.py"
891 891 if path.startswith(module):
892 892 relative = path.rstrip('/')[len(module):]
893 893 if relative.startswith('/'):
894 894 return relative[1:]
895 895 elif relative == '':
896 896 return relative
897 897
898 898 # The path is outside our tracked tree...
899 899 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
900 900 return None
901 901
902 902 def _checkpath(self, path, revnum):
903 903 # ra.check_path does not like leading slashes very much, it leads
904 904 # to PROPFIND subversion errors
905 905 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
906 906
907 907 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
908 908 strict_node_history=False):
909 909 # Normalize path names, svn >= 1.5 only wants paths relative to
910 910 # supplied URL
911 911 relpaths = []
912 912 for p in paths:
913 913 if not p.startswith('/'):
914 914 p = self.module + '/' + p
915 915 relpaths.append(p.strip('/'))
916 916 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
917 917 strict_node_history]
918 918 arg = encodeargs(args)
919 919 hgexe = util.hgexecutable()
920 920 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
921 921 stdin, stdout = util.popen2(cmd, 'b')
922 922 stdin.write(arg)
923 923 stdin.close()
924 924 return logstream(stdout)
925 925
926 926 pre_revprop_change = '''#!/bin/sh
927 927
928 928 REPOS="$1"
929 929 REV="$2"
930 930 USER="$3"
931 931 PROPNAME="$4"
932 932 ACTION="$5"
933 933
934 934 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
935 935 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
936 936 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
937 937
938 938 echo "Changing prohibited revision property" >&2
939 939 exit 1
940 940 '''
941 941
942 942 class svn_sink(converter_sink, commandline):
943 943 commit_re = re.compile(r'Committed revision (\d+).', re.M)
944 944
945 945 def prerun(self):
946 946 if self.wc:
947 947 os.chdir(self.wc)
948 948
949 949 def postrun(self):
950 950 if self.wc:
951 951 os.chdir(self.cwd)
952 952
953 953 def join(self, name):
954 954 return os.path.join(self.wc, '.svn', name)
955 955
956 956 def revmapfile(self):
957 957 return self.join('hg-shamap')
958 958
959 959 def authorfile(self):
960 960 return self.join('hg-authormap')
961 961
962 962 def __init__(self, ui, path):
963 963 converter_sink.__init__(self, ui, path)
964 964 commandline.__init__(self, ui, 'svn')
965 965 self.delete = []
966 966 self.setexec = []
967 967 self.delexec = []
968 968 self.copies = []
969 969 self.wc = None
970 970 self.cwd = os.getcwd()
971 971
972 972 path = os.path.realpath(path)
973 973
974 974 created = False
975 975 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
976 976 self.wc = path
977 977 self.run0('update')
978 978 else:
979 979 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
980 980
981 981 if os.path.isdir(os.path.dirname(path)):
982 982 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
983 983 ui.status(_('initializing svn repo %r\n') %
984 984 os.path.basename(path))
985 985 commandline(ui, 'svnadmin').run0('create', path)
986 986 created = path
987 987 path = util.normpath(path)
988 988 if not path.startswith('/'):
989 989 path = '/' + path
990 990 path = 'file://' + path
991 991
992 992 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
993 993 self.run0('checkout', path, wcpath)
994 994
995 995 self.wc = wcpath
996 996 self.opener = util.opener(self.wc)
997 997 self.wopener = util.opener(self.wc)
998 998 self.childmap = mapfile(ui, self.join('hg-childmap'))
999 999 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1000 1000
1001 1001 if created:
1002 1002 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1003 1003 fp = open(hook, 'w')
1004 1004 fp.write(pre_revprop_change)
1005 1005 fp.close()
1006 1006 util.set_flags(hook, False, True)
1007 1007
1008 1008 xport = transport.SvnRaTransport(url=geturl(path))
1009 1009 self.uuid = svn.ra.get_uuid(xport.ra)
1010 1010
1011 1011 def wjoin(self, *names):
1012 1012 return os.path.join(self.wc, *names)
1013 1013
1014 1014 def putfile(self, filename, flags, data):
1015 1015 if 'l' in flags:
1016 1016 self.wopener.symlink(data, filename)
1017 1017 else:
1018 1018 try:
1019 1019 if os.path.islink(self.wjoin(filename)):
1020 1020 os.unlink(filename)
1021 1021 except OSError:
1022 1022 pass
1023 1023 self.wopener(filename, 'w').write(data)
1024 1024
1025 1025 if self.is_exec:
1026 1026 was_exec = self.is_exec(self.wjoin(filename))
1027 1027 else:
1028 1028 # On filesystems not supporting execute-bit, there is no way
1029 1029 # to know if it is set but asking subversion. Setting it
1030 1030 # systematically is just as expensive and much simpler.
1031 1031 was_exec = 'x' not in flags
1032 1032
1033 1033 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1034 1034 if was_exec:
1035 1035 if 'x' not in flags:
1036 1036 self.delexec.append(filename)
1037 1037 else:
1038 1038 if 'x' in flags:
1039 1039 self.setexec.append(filename)
1040 1040
1041 1041 def _copyfile(self, source, dest):
1042 1042 # SVN's copy command pukes if the destination file exists, but
1043 1043 # our copyfile method expects to record a copy that has
1044 1044 # already occurred. Cross the semantic gap.
1045 1045 wdest = self.wjoin(dest)
1046 1046 exists = os.path.exists(wdest)
1047 1047 if exists:
1048 1048 fd, tempname = tempfile.mkstemp(
1049 1049 prefix='hg-copy-', dir=os.path.dirname(wdest))
1050 1050 os.close(fd)
1051 1051 os.unlink(tempname)
1052 1052 os.rename(wdest, tempname)
1053 1053 try:
1054 1054 self.run0('copy', source, dest)
1055 1055 finally:
1056 1056 if exists:
1057 1057 try:
1058 1058 os.unlink(wdest)
1059 1059 except OSError:
1060 1060 pass
1061 1061 os.rename(tempname, wdest)
1062 1062
1063 1063 def dirs_of(self, files):
1064 1064 dirs = util.set()
1065 1065 for f in files:
1066 1066 if os.path.isdir(self.wjoin(f)):
1067 1067 dirs.add(f)
1068 1068 for i in strutil.rfindall(f, '/'):
1069 1069 dirs.add(f[:i])
1070 1070 return dirs
1071 1071
1072 1072 def add_dirs(self, files):
1073 1073 add_dirs = [d for d in util.sort(self.dirs_of(files))
1074 1074 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1075 1075 if add_dirs:
1076 1076 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1077 1077 return add_dirs
1078 1078
1079 1079 def add_files(self, files):
1080 1080 if files:
1081 1081 self.xargs(files, 'add', quiet=True)
1082 1082 return files
1083 1083
1084 1084 def tidy_dirs(self, names):
1085 1085 dirs = util.sort(self.dirs_of(names))
1086 1086 dirs.reverse()
1087 1087 deleted = []
1088 1088 for d in dirs:
1089 1089 wd = self.wjoin(d)
1090 1090 if os.listdir(wd) == '.svn':
1091 1091 self.run0('delete', d)
1092 1092 deleted.append(d)
1093 1093 return deleted
1094 1094
1095 1095 def addchild(self, parent, child):
1096 1096 self.childmap[parent] = child
1097 1097
1098 1098 def revid(self, rev):
1099 1099 return u"svn:%s@%s" % (self.uuid, rev)
1100 1100
1101 1101 def putcommit(self, files, copies, parents, commit, source):
1102 1102 # Apply changes to working copy
1103 1103 for f, v in files:
1104 1104 try:
1105 1105 data = source.getfile(f, v)
1106 1106 except IOError, inst:
1107 1107 self.delete.append(f)
1108 1108 else:
1109 1109 e = source.getmode(f, v)
1110 1110 self.putfile(f, e, data)
1111 1111 if f in copies:
1112 1112 self.copies.append([copies[f], f])
1113 1113 files = [f[0] for f in files]
1114 1114
1115 1115 for parent in parents:
1116 1116 try:
1117 1117 return self.revid(self.childmap[parent])
1118 1118 except KeyError:
1119 1119 pass
1120 1120 entries = util.set(self.delete)
1121 1121 files = util.frozenset(files)
1122 1122 entries.update(self.add_dirs(files.difference(entries)))
1123 1123 if self.copies:
1124 1124 for s, d in self.copies:
1125 1125 self._copyfile(s, d)
1126 1126 self.copies = []
1127 1127 if self.delete:
1128 1128 self.xargs(self.delete, 'delete')
1129 1129 self.delete = []
1130 1130 entries.update(self.add_files(files.difference(entries)))
1131 1131 entries.update(self.tidy_dirs(entries))
1132 1132 if self.delexec:
1133 1133 self.xargs(self.delexec, 'propdel', 'svn:executable')
1134 1134 self.delexec = []
1135 1135 if self.setexec:
1136 1136 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1137 1137 self.setexec = []
1138 1138
1139 1139 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1140 1140 fp = os.fdopen(fd, 'w')
1141 1141 fp.write(commit.desc)
1142 1142 fp.close()
1143 1143 try:
1144 1144 output = self.run0('commit',
1145 1145 username=util.shortuser(commit.author),
1146 1146 file=messagefile,
1147 1147 encoding='utf-8')
1148 1148 try:
1149 1149 rev = self.commit_re.search(output).group(1)
1150 1150 except AttributeError:
1151 1151 self.ui.warn(_('unexpected svn output:\n'))
1152 1152 self.ui.warn(output)
1153 1153 raise util.Abort(_('unable to cope with svn output'))
1154 1154 if commit.rev:
1155 1155 self.run('propset', 'hg:convert-rev', commit.rev,
1156 1156 revprop=True, revision=rev)
1157 1157 if commit.branch and commit.branch != 'default':
1158 1158 self.run('propset', 'hg:convert-branch', commit.branch,
1159 1159 revprop=True, revision=rev)
1160 1160 for parent in parents:
1161 1161 self.addchild(parent, rev)
1162 1162 return self.revid(rev)
1163 1163 finally:
1164 1164 os.unlink(messagefile)
1165 1165
1166 1166 def puttags(self, tags):
1167 1167 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now