##// END OF EJS Templates
convert/svn: fix _iterfiles() output in root dir case (issue2647)...
Patrick Mezard -
r13651:9777df92 stable
parent child Browse files
Show More
@@ -1,1172 +1,1174 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda *args: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo(_("%s does not look like a Subversion repository")
211 211 % url)
212 212 if svn is None:
213 213 raise MissingTool(_('Could not load Subversion python bindings'))
214 214
215 215 try:
216 216 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
217 217 if version < (1, 4):
218 218 raise MissingTool(_('Subversion python bindings %d.%d found, '
219 219 '1.4 or later required') % version)
220 220 except AttributeError:
221 221 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
222 222 'or later required'))
223 223
224 224 self.lastrevs = {}
225 225
226 226 latest = None
227 227 try:
228 228 # Support file://path@rev syntax. Useful e.g. to convert
229 229 # deleted branches.
230 230 at = url.rfind('@')
231 231 if at >= 0:
232 232 latest = int(url[at + 1:])
233 233 url = url[:at]
234 234 except ValueError:
235 235 pass
236 236 self.url = geturl(url)
237 237 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
238 238 try:
239 239 self.transport = transport.SvnRaTransport(url=self.url)
240 240 self.ra = self.transport.ra
241 241 self.ctx = self.transport.client
242 242 self.baseurl = svn.ra.get_repos_root(self.ra)
243 243 # Module is either empty or a repository path starting with
244 244 # a slash and not ending with a slash.
245 245 self.module = urllib.unquote(self.url[len(self.baseurl):])
246 246 self.prevmodule = None
247 247 self.rootmodule = self.module
248 248 self.commits = {}
249 249 self.paths = {}
250 250 self.uuid = svn.ra.get_uuid(self.ra)
251 251 except SubversionException:
252 252 ui.traceback()
253 253 raise NoRepo(_("%s does not look like a Subversion repository")
254 254 % self.url)
255 255
256 256 if rev:
257 257 try:
258 258 latest = int(rev)
259 259 except ValueError:
260 260 raise util.Abort(_('svn: revision %s is not an integer') % rev)
261 261
262 262 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
263 263 try:
264 264 self.startrev = int(self.startrev)
265 265 if self.startrev < 0:
266 266 self.startrev = 0
267 267 except ValueError:
268 268 raise util.Abort(_('svn: start revision %s is not an integer')
269 269 % self.startrev)
270 270
271 271 self.head = self.latest(self.module, latest)
272 272 if not self.head:
273 273 raise util.Abort(_('no revision found in module %s')
274 274 % self.module)
275 275 self.last_changed = self.revnum(self.head)
276 276
277 277 self._changescache = None
278 278
279 279 if os.path.exists(os.path.join(url, '.svn/entries')):
280 280 self.wc = url
281 281 else:
282 282 self.wc = None
283 283 self.convertfp = None
284 284
285 285 def setrevmap(self, revmap):
286 286 lastrevs = {}
287 287 for revid in revmap.iterkeys():
288 288 uuid, module, revnum = self.revsplit(revid)
289 289 lastrevnum = lastrevs.setdefault(module, revnum)
290 290 if revnum > lastrevnum:
291 291 lastrevs[module] = revnum
292 292 self.lastrevs = lastrevs
293 293
294 294 def exists(self, path, optrev):
295 295 try:
296 296 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
297 297 optrev, False, self.ctx)
298 298 return True
299 299 except SubversionException:
300 300 return False
301 301
302 302 def getheads(self):
303 303
304 304 def isdir(path, revnum):
305 305 kind = self._checkpath(path, revnum)
306 306 return kind == svn.core.svn_node_dir
307 307
308 308 def getcfgpath(name, rev):
309 309 cfgpath = self.ui.config('convert', 'svn.' + name)
310 310 if cfgpath is not None and cfgpath.strip() == '':
311 311 return None
312 312 path = (cfgpath or name).strip('/')
313 313 if not self.exists(path, rev):
314 314 if self.module.endswith(path) and name == 'trunk':
315 315 # we are converting from inside this directory
316 316 return None
317 317 if cfgpath:
318 318 raise util.Abort(_('expected %s to be at %r, but not found')
319 319 % (name, path))
320 320 return None
321 321 self.ui.note(_('found %s at %r\n') % (name, path))
322 322 return path
323 323
324 324 rev = optrev(self.last_changed)
325 325 oldmodule = ''
326 326 trunk = getcfgpath('trunk', rev)
327 327 self.tags = getcfgpath('tags', rev)
328 328 branches = getcfgpath('branches', rev)
329 329
330 330 # If the project has a trunk or branches, we will extract heads
331 331 # from them. We keep the project root otherwise.
332 332 if trunk:
333 333 oldmodule = self.module or ''
334 334 self.module += '/' + trunk
335 335 self.head = self.latest(self.module, self.last_changed)
336 336 if not self.head:
337 337 raise util.Abort(_('no revision found in module %s')
338 338 % self.module)
339 339
340 340 # First head in the list is the module's head
341 341 self.heads = [self.head]
342 342 if self.tags is not None:
343 343 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
344 344
345 345 # Check if branches bring a few more heads to the list
346 346 if branches:
347 347 rpath = self.url.strip('/')
348 348 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
349 349 rev, False, self.ctx)
350 350 for branch in branchnames.keys():
351 351 module = '%s/%s/%s' % (oldmodule, branches, branch)
352 352 if not isdir(module, self.last_changed):
353 353 continue
354 354 brevid = self.latest(module, self.last_changed)
355 355 if not brevid:
356 356 self.ui.note(_('ignoring empty branch %s\n') % branch)
357 357 continue
358 358 self.ui.note(_('found branch %s at %d\n') %
359 359 (branch, self.revnum(brevid)))
360 360 self.heads.append(brevid)
361 361
362 362 if self.startrev and self.heads:
363 363 if len(self.heads) > 1:
364 364 raise util.Abort(_('svn: start revision is not supported '
365 365 'with more than one branch'))
366 366 revnum = self.revnum(self.heads[0])
367 367 if revnum < self.startrev:
368 368 raise util.Abort(
369 369 _('svn: no revision found after start revision %d')
370 370 % self.startrev)
371 371
372 372 return self.heads
373 373
374 374 def getchanges(self, rev):
375 375 if self._changescache and self._changescache[0] == rev:
376 376 return self._changescache[1]
377 377 self._changescache = None
378 378 (paths, parents) = self.paths[rev]
379 379 if parents:
380 380 files, self.removed, copies = self.expandpaths(rev, paths, parents)
381 381 else:
382 382 # Perform a full checkout on roots
383 383 uuid, module, revnum = self.revsplit(rev)
384 384 entries = svn.client.ls(self.baseurl + urllib.quote(module),
385 385 optrev(revnum), True, self.ctx)
386 386 files = [n for n, e in entries.iteritems()
387 387 if e.kind == svn.core.svn_node_file]
388 388 copies = {}
389 389 self.removed = set()
390 390
391 391 files.sort()
392 392 files = zip(files, [rev] * len(files))
393 393
394 394 # caller caches the result, so free it here to release memory
395 395 del self.paths[rev]
396 396 return (files, copies)
397 397
398 398 def getchangedfiles(self, rev, i):
399 399 changes = self.getchanges(rev)
400 400 self._changescache = (rev, changes)
401 401 return [f[0] for f in changes[0]]
402 402
403 403 def getcommit(self, rev):
404 404 if rev not in self.commits:
405 405 uuid, module, revnum = self.revsplit(rev)
406 406 self.module = module
407 407 self.reparent(module)
408 408 # We assume that:
409 409 # - requests for revisions after "stop" come from the
410 410 # revision graph backward traversal. Cache all of them
411 411 # down to stop, they will be used eventually.
412 412 # - requests for revisions before "stop" come to get
413 413 # isolated branches parents. Just fetch what is needed.
414 414 stop = self.lastrevs.get(module, 0)
415 415 if revnum < stop:
416 416 stop = revnum + 1
417 417 self._fetch_revisions(revnum, stop)
418 418 commit = self.commits[rev]
419 419 # caller caches the result, so free it here to release memory
420 420 del self.commits[rev]
421 421 return commit
422 422
423 423 def gettags(self):
424 424 tags = {}
425 425 if self.tags is None:
426 426 return tags
427 427
428 428 # svn tags are just a convention, project branches left in a
429 429 # 'tags' directory. There is no other relationship than
430 430 # ancestry, which is expensive to discover and makes them hard
431 431 # to update incrementally. Worse, past revisions may be
432 432 # referenced by tags far away in the future, requiring a deep
433 433 # history traversal on every calculation. Current code
434 434 # performs a single backward traversal, tracking moves within
435 435 # the tags directory (tag renaming) and recording a new tag
436 436 # everytime a project is copied from outside the tags
437 437 # directory. It also lists deleted tags, this behaviour may
438 438 # change in the future.
439 439 pendings = []
440 440 tagspath = self.tags
441 441 start = svn.ra.get_latest_revnum(self.ra)
442 442 stream = self._getlog([self.tags], start, self.startrev)
443 443 try:
444 444 for entry in stream:
445 445 origpaths, revnum, author, date, message = entry
446 446 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
447 447 in origpaths.iteritems() if e.copyfrom_path]
448 448 # Apply moves/copies from more specific to general
449 449 copies.sort(reverse=True)
450 450
451 451 srctagspath = tagspath
452 452 if copies and copies[-1][2] == tagspath:
453 453 # Track tags directory moves
454 454 srctagspath = copies.pop()[0]
455 455
456 456 for source, sourcerev, dest in copies:
457 457 if not dest.startswith(tagspath + '/'):
458 458 continue
459 459 for tag in pendings:
460 460 if tag[0].startswith(dest):
461 461 tagpath = source + tag[0][len(dest):]
462 462 tag[:2] = [tagpath, sourcerev]
463 463 break
464 464 else:
465 465 pendings.append([source, sourcerev, dest])
466 466
467 467 # Filter out tags with children coming from different
468 468 # parts of the repository like:
469 469 # /tags/tag.1 (from /trunk:10)
470 470 # /tags/tag.1/foo (from /branches/foo:12)
471 471 # Here/tags/tag.1 discarded as well as its children.
472 472 # It happens with tools like cvs2svn. Such tags cannot
473 473 # be represented in mercurial.
474 474 addeds = dict((p, e.copyfrom_path) for p, e
475 475 in origpaths.iteritems()
476 476 if e.action == 'A' and e.copyfrom_path)
477 477 badroots = set()
478 478 for destroot in addeds:
479 479 for source, sourcerev, dest in pendings:
480 480 if (not dest.startswith(destroot + '/')
481 481 or source.startswith(addeds[destroot] + '/')):
482 482 continue
483 483 badroots.add(destroot)
484 484 break
485 485
486 486 for badroot in badroots:
487 487 pendings = [p for p in pendings if p[2] != badroot
488 488 and not p[2].startswith(badroot + '/')]
489 489
490 490 # Tell tag renamings from tag creations
491 491 remainings = []
492 492 for source, sourcerev, dest in pendings:
493 493 tagname = dest.split('/')[-1]
494 494 if source.startswith(srctagspath):
495 495 remainings.append([source, sourcerev, tagname])
496 496 continue
497 497 if tagname in tags:
498 498 # Keep the latest tag value
499 499 continue
500 500 # From revision may be fake, get one with changes
501 501 try:
502 502 tagid = self.latest(source, sourcerev)
503 503 if tagid and tagname not in tags:
504 504 tags[tagname] = tagid
505 505 except SvnPathNotFound:
506 506 # It happens when we are following directories
507 507 # we assumed were copied with their parents
508 508 # but were really created in the tag
509 509 # directory.
510 510 pass
511 511 pendings = remainings
512 512 tagspath = srctagspath
513 513 finally:
514 514 stream.close()
515 515 return tags
516 516
517 517 def converted(self, rev, destrev):
518 518 if not self.wc:
519 519 return
520 520 if self.convertfp is None:
521 521 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
522 522 'a')
523 523 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
524 524 self.convertfp.flush()
525 525
526 526 def revid(self, revnum, module=None):
527 527 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
528 528
529 529 def revnum(self, rev):
530 530 return int(rev.split('@')[-1])
531 531
532 532 def revsplit(self, rev):
533 533 url, revnum = rev.rsplit('@', 1)
534 534 revnum = int(revnum)
535 535 parts = url.split('/', 1)
536 536 uuid = parts.pop(0)[4:]
537 537 mod = ''
538 538 if parts:
539 539 mod = '/' + parts[0]
540 540 return uuid, mod, revnum
541 541
542 542 def latest(self, path, stop=0):
543 543 """Find the latest revid affecting path, up to stop. It may return
544 544 a revision in a different module, since a branch may be moved without
545 545 a change being reported. Return None if computed module does not
546 546 belong to rootmodule subtree.
547 547 """
548 548 if not path.startswith(self.rootmodule):
549 549 # Requests on foreign branches may be forbidden at server level
550 550 self.ui.debug('ignoring foreign branch %r\n' % path)
551 551 return None
552 552
553 553 if not stop:
554 554 stop = svn.ra.get_latest_revnum(self.ra)
555 555 try:
556 556 prevmodule = self.reparent('')
557 557 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
558 558 self.reparent(prevmodule)
559 559 except SubversionException:
560 560 dirent = None
561 561 if not dirent:
562 562 raise SvnPathNotFound(_('%s not found up to revision %d')
563 563 % (path, stop))
564 564
565 565 # stat() gives us the previous revision on this line of
566 566 # development, but it might be in *another module*. Fetch the
567 567 # log and detect renames down to the latest revision.
568 568 stream = self._getlog([path], stop, dirent.created_rev)
569 569 try:
570 570 for entry in stream:
571 571 paths, revnum, author, date, message = entry
572 572 if revnum <= dirent.created_rev:
573 573 break
574 574
575 575 for p in paths:
576 576 if not path.startswith(p) or not paths[p].copyfrom_path:
577 577 continue
578 578 newpath = paths[p].copyfrom_path + path[len(p):]
579 579 self.ui.debug("branch renamed from %s to %s at %d\n" %
580 580 (path, newpath, revnum))
581 581 path = newpath
582 582 break
583 583 finally:
584 584 stream.close()
585 585
586 586 if not path.startswith(self.rootmodule):
587 587 self.ui.debug('ignoring foreign branch %r\n' % path)
588 588 return None
589 589 return self.revid(dirent.created_rev, path)
590 590
591 591 def reparent(self, module):
592 592 """Reparent the svn transport and return the previous parent."""
593 593 if self.prevmodule == module:
594 594 return module
595 595 svnurl = self.baseurl + urllib.quote(module)
596 596 prevmodule = self.prevmodule
597 597 if prevmodule is None:
598 598 prevmodule = ''
599 599 self.ui.debug("reparent to %s\n" % svnurl)
600 600 svn.ra.reparent(self.ra, svnurl)
601 601 self.prevmodule = module
602 602 return prevmodule
603 603
604 604 def expandpaths(self, rev, paths, parents):
605 605 changed, removed = set(), set()
606 606 copies = {}
607 607
608 608 new_module, revnum = self.revsplit(rev)[1:]
609 609 if new_module != self.module:
610 610 self.module = new_module
611 611 self.reparent(self.module)
612 612
613 613 for i, (path, ent) in enumerate(paths):
614 614 self.ui.progress(_('scanning paths'), i, item=path,
615 615 total=len(paths))
616 616 entrypath = self.getrelpath(path)
617 617
618 618 kind = self._checkpath(entrypath, revnum)
619 619 if kind == svn.core.svn_node_file:
620 620 changed.add(self.recode(entrypath))
621 621 if not ent.copyfrom_path or not parents:
622 622 continue
623 623 # Copy sources not in parent revisions cannot be
624 624 # represented, ignore their origin for now
625 625 pmodule, prevnum = self.revsplit(parents[0])[1:]
626 626 if ent.copyfrom_rev < prevnum:
627 627 continue
628 628 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
629 629 if not copyfrom_path:
630 630 continue
631 631 self.ui.debug("copied to %s from %s@%s\n" %
632 632 (entrypath, copyfrom_path, ent.copyfrom_rev))
633 633 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
634 634 elif kind == 0: # gone, but had better be a deleted *file*
635 635 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
636 636 pmodule, prevnum = self.revsplit(parents[0])[1:]
637 637 parentpath = pmodule + "/" + entrypath
638 638 fromkind = self._checkpath(entrypath, prevnum, pmodule)
639 639
640 640 if fromkind == svn.core.svn_node_file:
641 641 removed.add(self.recode(entrypath))
642 642 elif fromkind == svn.core.svn_node_dir:
643 643 oroot = parentpath.strip('/')
644 644 nroot = path.strip('/')
645 645 children = self._iterfiles(oroot, prevnum)
646 646 for childpath in children:
647 647 childpath = childpath.replace(oroot, nroot)
648 648 childpath = self.getrelpath("/" + childpath, pmodule)
649 649 if childpath:
650 650 removed.add(self.recode(childpath))
651 651 else:
652 652 self.ui.debug('unknown path in revision %d: %s\n' % \
653 653 (revnum, path))
654 654 elif kind == svn.core.svn_node_dir:
655 655 if ent.action == 'M':
656 656 # If the directory just had a prop change,
657 657 # then we shouldn't need to look for its children.
658 658 continue
659 659 if ent.action == 'R' and parents:
660 660 # If a directory is replacing a file, mark the previous
661 661 # file as deleted
662 662 pmodule, prevnum = self.revsplit(parents[0])[1:]
663 663 pkind = self._checkpath(entrypath, prevnum, pmodule)
664 664 if pkind == svn.core.svn_node_file:
665 665 removed.add(self.recode(entrypath))
666 666 elif pkind == svn.core.svn_node_dir:
667 667 # We do not know what files were kept or removed,
668 668 # mark them all as changed.
669 669 for childpath in self._iterfiles(pmodule, prevnum):
670 670 childpath = self.getrelpath("/" + childpath)
671 671 if childpath:
672 672 changed.add(self.recode(childpath))
673 673
674 674 for childpath in self._iterfiles(path, revnum):
675 675 childpath = self.getrelpath("/" + childpath)
676 676 if childpath:
677 677 changed.add(self.recode(childpath))
678 678
679 679 # Handle directory copies
680 680 if not ent.copyfrom_path or not parents:
681 681 continue
682 682 # Copy sources not in parent revisions cannot be
683 683 # represented, ignore their origin for now
684 684 pmodule, prevnum = self.revsplit(parents[0])[1:]
685 685 if ent.copyfrom_rev < prevnum:
686 686 continue
687 687 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
688 688 if not copyfrompath:
689 689 continue
690 690 self.ui.debug("mark %s came from %s:%d\n"
691 691 % (path, copyfrompath, ent.copyfrom_rev))
692 692 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
693 693 for childpath in children:
694 694 childpath = self.getrelpath("/" + childpath, pmodule)
695 695 if not childpath:
696 696 continue
697 697 copytopath = path + childpath[len(copyfrompath):]
698 698 copytopath = self.getrelpath(copytopath)
699 699 copies[self.recode(copytopath)] = self.recode(childpath)
700 700
701 701 self.ui.progress(_('scanning paths'), None)
702 702 changed.update(removed)
703 703 return (list(changed), removed, copies)
704 704
705 705 def _fetch_revisions(self, from_revnum, to_revnum):
706 706 if from_revnum < to_revnum:
707 707 from_revnum, to_revnum = to_revnum, from_revnum
708 708
709 709 self.child_cset = None
710 710
711 711 def parselogentry(orig_paths, revnum, author, date, message):
712 712 """Return the parsed commit object or None, and True if
713 713 the revision is a branch root.
714 714 """
715 715 self.ui.debug("parsing revision %d (%d changes)\n" %
716 716 (revnum, len(orig_paths)))
717 717
718 718 branched = False
719 719 rev = self.revid(revnum)
720 720 # branch log might return entries for a parent we already have
721 721
722 722 if rev in self.commits or revnum < to_revnum:
723 723 return None, branched
724 724
725 725 parents = []
726 726 # check whether this revision is the start of a branch or part
727 727 # of a branch renaming
728 728 orig_paths = sorted(orig_paths.iteritems())
729 729 root_paths = [(p, e) for p, e in orig_paths
730 730 if self.module.startswith(p)]
731 731 if root_paths:
732 732 path, ent = root_paths[-1]
733 733 if ent.copyfrom_path:
734 734 branched = True
735 735 newpath = ent.copyfrom_path + self.module[len(path):]
736 736 # ent.copyfrom_rev may not be the actual last revision
737 737 previd = self.latest(newpath, ent.copyfrom_rev)
738 738 if previd is not None:
739 739 prevmodule, prevnum = self.revsplit(previd)[1:]
740 740 if prevnum >= self.startrev:
741 741 parents = [previd]
742 742 self.ui.note(
743 743 _('found parent of branch %s at %d: %s\n') %
744 744 (self.module, prevnum, prevmodule))
745 745 else:
746 746 self.ui.debug("no copyfrom path, don't know what to do.\n")
747 747
748 748 paths = []
749 749 # filter out unrelated paths
750 750 for path, ent in orig_paths:
751 751 if self.getrelpath(path) is None:
752 752 continue
753 753 paths.append((path, ent))
754 754
755 755 # Example SVN datetime. Includes microseconds.
756 756 # ISO-8601 conformant
757 757 # '2007-01-04T17:35:00.902377Z'
758 758 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
759 759
760 760 log = message and self.recode(message) or ''
761 761 author = author and self.recode(author) or ''
762 762 try:
763 763 branch = self.module.split("/")[-1]
764 764 trunkname = self.ui.config('convert', 'svn.trunk', 'trunk')
765 765 if branch == trunkname.strip('/'):
766 766 branch = ''
767 767 except IndexError:
768 768 branch = None
769 769
770 770 cset = commit(author=author,
771 771 date=util.datestr(date),
772 772 desc=log,
773 773 parents=parents,
774 774 branch=branch,
775 775 rev=rev)
776 776
777 777 self.commits[rev] = cset
778 778 # The parents list is *shared* among self.paths and the
779 779 # commit object. Both will be updated below.
780 780 self.paths[rev] = (paths, cset.parents)
781 781 if self.child_cset and not self.child_cset.parents:
782 782 self.child_cset.parents[:] = [rev]
783 783 self.child_cset = cset
784 784 return cset, branched
785 785
786 786 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
787 787 (self.module, from_revnum, to_revnum))
788 788
789 789 try:
790 790 firstcset = None
791 791 lastonbranch = False
792 792 stream = self._getlog([self.module], from_revnum, to_revnum)
793 793 try:
794 794 for entry in stream:
795 795 paths, revnum, author, date, message = entry
796 796 if revnum < self.startrev:
797 797 lastonbranch = True
798 798 break
799 799 if not paths:
800 800 self.ui.debug('revision %d has no entries\n' % revnum)
801 801 # If we ever leave the loop on an empty
802 802 # revision, do not try to get a parent branch
803 803 lastonbranch = lastonbranch or revnum == 0
804 804 continue
805 805 cset, lastonbranch = parselogentry(paths, revnum, author,
806 806 date, message)
807 807 if cset:
808 808 firstcset = cset
809 809 if lastonbranch:
810 810 break
811 811 finally:
812 812 stream.close()
813 813
814 814 if not lastonbranch and firstcset and not firstcset.parents:
815 815 # The first revision of the sequence (the last fetched one)
816 816 # has invalid parents if not a branch root. Find the parent
817 817 # revision now, if any.
818 818 try:
819 819 firstrevnum = self.revnum(firstcset.rev)
820 820 if firstrevnum > 1:
821 821 latest = self.latest(self.module, firstrevnum - 1)
822 822 if latest:
823 823 firstcset.parents.append(latest)
824 824 except SvnPathNotFound:
825 825 pass
826 826 except SubversionException, (inst, num):
827 827 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
828 828 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
829 829 raise
830 830
831 831 def getfile(self, file, rev):
832 832 # TODO: ra.get_file transmits the whole file instead of diffs.
833 833 if file in self.removed:
834 834 raise IOError()
835 835 mode = ''
836 836 try:
837 837 new_module, revnum = self.revsplit(rev)[1:]
838 838 if self.module != new_module:
839 839 self.module = new_module
840 840 self.reparent(self.module)
841 841 io = StringIO()
842 842 info = svn.ra.get_file(self.ra, file, revnum, io)
843 843 data = io.getvalue()
844 844 # ra.get_files() seems to keep a reference on the input buffer
845 845 # preventing collection. Release it explicitely.
846 846 io.close()
847 847 if isinstance(info, list):
848 848 info = info[-1]
849 849 mode = ("svn:executable" in info) and 'x' or ''
850 850 mode = ("svn:special" in info) and 'l' or mode
851 851 except SubversionException, e:
852 852 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
853 853 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
854 854 if e.apr_err in notfound: # File not found
855 855 raise IOError()
856 856 raise
857 857 if mode == 'l':
858 858 link_prefix = "link "
859 859 if data.startswith(link_prefix):
860 860 data = data[len(link_prefix):]
861 861 return data, mode
862 862
863 863 def _iterfiles(self, path, revnum):
864 864 """Enumerate all files in path at revnum, recursively."""
865 865 path = path.strip('/')
866 866 pool = Pool()
867 867 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
868 868 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
869 return ((path + '/' + p) for p, e in entries.iteritems()
869 if path:
870 path += '/'
871 return ((path + p) for p, e in entries.iteritems()
870 872 if e.kind == svn.core.svn_node_file)
871 873
872 874 def getrelpath(self, path, module=None):
873 875 if module is None:
874 876 module = self.module
875 877 # Given the repository url of this wc, say
876 878 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
877 879 # extract the "entry" portion (a relative path) from what
878 880 # svn log --xml says, ie
879 881 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
880 882 # that is to say "tests/PloneTestCase.py"
881 883 if path.startswith(module):
882 884 relative = path.rstrip('/')[len(module):]
883 885 if relative.startswith('/'):
884 886 return relative[1:]
885 887 elif relative == '':
886 888 return relative
887 889
888 890 # The path is outside our tracked tree...
889 891 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
890 892 return None
891 893
892 894 def _checkpath(self, path, revnum, module=None):
893 895 if module is not None:
894 896 prevmodule = self.reparent('')
895 897 path = module + '/' + path
896 898 try:
897 899 # ra.check_path does not like leading slashes very much, it leads
898 900 # to PROPFIND subversion errors
899 901 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
900 902 finally:
901 903 if module is not None:
902 904 self.reparent(prevmodule)
903 905
904 906 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
905 907 strict_node_history=False):
906 908 # Normalize path names, svn >= 1.5 only wants paths relative to
907 909 # supplied URL
908 910 relpaths = []
909 911 for p in paths:
910 912 if not p.startswith('/'):
911 913 p = self.module + '/' + p
912 914 relpaths.append(p.strip('/'))
913 915 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
914 916 strict_node_history]
915 917 arg = encodeargs(args)
916 918 hgexe = util.hgexecutable()
917 919 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
918 920 stdin, stdout = util.popen2(util.quotecommand(cmd))
919 921 stdin.write(arg)
920 922 try:
921 923 stdin.close()
922 924 except IOError:
923 925 raise util.Abort(_('Mercurial failed to run itself, check'
924 926 ' hg executable is in PATH'))
925 927 return logstream(stdout)
926 928
927 929 pre_revprop_change = '''#!/bin/sh
928 930
929 931 REPOS="$1"
930 932 REV="$2"
931 933 USER="$3"
932 934 PROPNAME="$4"
933 935 ACTION="$5"
934 936
935 937 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
936 938 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
937 939 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
938 940
939 941 echo "Changing prohibited revision property" >&2
940 942 exit 1
941 943 '''
942 944
943 945 class svn_sink(converter_sink, commandline):
944 946 commit_re = re.compile(r'Committed revision (\d+).', re.M)
945 947
946 948 def prerun(self):
947 949 if self.wc:
948 950 os.chdir(self.wc)
949 951
950 952 def postrun(self):
951 953 if self.wc:
952 954 os.chdir(self.cwd)
953 955
954 956 def join(self, name):
955 957 return os.path.join(self.wc, '.svn', name)
956 958
957 959 def revmapfile(self):
958 960 return self.join('hg-shamap')
959 961
960 962 def authorfile(self):
961 963 return self.join('hg-authormap')
962 964
963 965 def __init__(self, ui, path):
964 966
965 967 if svn is None:
966 968 raise MissingTool(_('Could not load Subversion python bindings'))
967 969 converter_sink.__init__(self, ui, path)
968 970 commandline.__init__(self, ui, 'svn')
969 971 self.delete = []
970 972 self.setexec = []
971 973 self.delexec = []
972 974 self.copies = []
973 975 self.wc = None
974 976 self.cwd = os.getcwd()
975 977
976 978 path = os.path.realpath(path)
977 979
978 980 created = False
979 981 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
980 982 self.wc = path
981 983 self.run0('update')
982 984 else:
983 985 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
984 986
985 987 if os.path.isdir(os.path.dirname(path)):
986 988 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
987 989 ui.status(_('initializing svn repository %r\n') %
988 990 os.path.basename(path))
989 991 commandline(ui, 'svnadmin').run0('create', path)
990 992 created = path
991 993 path = util.normpath(path)
992 994 if not path.startswith('/'):
993 995 path = '/' + path
994 996 path = 'file://' + path
995 997
996 998 ui.status(_('initializing svn working copy %r\n')
997 999 % os.path.basename(wcpath))
998 1000 self.run0('checkout', path, wcpath)
999 1001
1000 1002 self.wc = wcpath
1001 1003 self.opener = util.opener(self.wc)
1002 1004 self.wopener = util.opener(self.wc)
1003 1005 self.childmap = mapfile(ui, self.join('hg-childmap'))
1004 1006 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1005 1007
1006 1008 if created:
1007 1009 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1008 1010 fp = open(hook, 'w')
1009 1011 fp.write(pre_revprop_change)
1010 1012 fp.close()
1011 1013 util.set_flags(hook, False, True)
1012 1014
1013 1015 xport = transport.SvnRaTransport(url=geturl(path))
1014 1016 self.uuid = svn.ra.get_uuid(xport.ra)
1015 1017
1016 1018 def wjoin(self, *names):
1017 1019 return os.path.join(self.wc, *names)
1018 1020
1019 1021 def putfile(self, filename, flags, data):
1020 1022 if 'l' in flags:
1021 1023 self.wopener.symlink(data, filename)
1022 1024 else:
1023 1025 try:
1024 1026 if os.path.islink(self.wjoin(filename)):
1025 1027 os.unlink(filename)
1026 1028 except OSError:
1027 1029 pass
1028 1030 self.wopener(filename, 'w').write(data)
1029 1031
1030 1032 if self.is_exec:
1031 1033 was_exec = self.is_exec(self.wjoin(filename))
1032 1034 else:
1033 1035 # On filesystems not supporting execute-bit, there is no way
1034 1036 # to know if it is set but asking subversion. Setting it
1035 1037 # systematically is just as expensive and much simpler.
1036 1038 was_exec = 'x' not in flags
1037 1039
1038 1040 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1039 1041 if was_exec:
1040 1042 if 'x' not in flags:
1041 1043 self.delexec.append(filename)
1042 1044 else:
1043 1045 if 'x' in flags:
1044 1046 self.setexec.append(filename)
1045 1047
1046 1048 def _copyfile(self, source, dest):
1047 1049 # SVN's copy command pukes if the destination file exists, but
1048 1050 # our copyfile method expects to record a copy that has
1049 1051 # already occurred. Cross the semantic gap.
1050 1052 wdest = self.wjoin(dest)
1051 1053 exists = os.path.lexists(wdest)
1052 1054 if exists:
1053 1055 fd, tempname = tempfile.mkstemp(
1054 1056 prefix='hg-copy-', dir=os.path.dirname(wdest))
1055 1057 os.close(fd)
1056 1058 os.unlink(tempname)
1057 1059 os.rename(wdest, tempname)
1058 1060 try:
1059 1061 self.run0('copy', source, dest)
1060 1062 finally:
1061 1063 if exists:
1062 1064 try:
1063 1065 os.unlink(wdest)
1064 1066 except OSError:
1065 1067 pass
1066 1068 os.rename(tempname, wdest)
1067 1069
1068 1070 def dirs_of(self, files):
1069 1071 dirs = set()
1070 1072 for f in files:
1071 1073 if os.path.isdir(self.wjoin(f)):
1072 1074 dirs.add(f)
1073 1075 for i in strutil.rfindall(f, '/'):
1074 1076 dirs.add(f[:i])
1075 1077 return dirs
1076 1078
1077 1079 def add_dirs(self, files):
1078 1080 add_dirs = [d for d in sorted(self.dirs_of(files))
1079 1081 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1080 1082 if add_dirs:
1081 1083 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1082 1084 return add_dirs
1083 1085
1084 1086 def add_files(self, files):
1085 1087 if files:
1086 1088 self.xargs(files, 'add', quiet=True)
1087 1089 return files
1088 1090
1089 1091 def tidy_dirs(self, names):
1090 1092 deleted = []
1091 1093 for d in sorted(self.dirs_of(names), reverse=True):
1092 1094 wd = self.wjoin(d)
1093 1095 if os.listdir(wd) == '.svn':
1094 1096 self.run0('delete', d)
1095 1097 deleted.append(d)
1096 1098 return deleted
1097 1099
1098 1100 def addchild(self, parent, child):
1099 1101 self.childmap[parent] = child
1100 1102
1101 1103 def revid(self, rev):
1102 1104 return u"svn:%s@%s" % (self.uuid, rev)
1103 1105
1104 1106 def putcommit(self, files, copies, parents, commit, source, revmap):
1105 1107 # Apply changes to working copy
1106 1108 for f, v in files:
1107 1109 try:
1108 1110 data, mode = source.getfile(f, v)
1109 1111 except IOError:
1110 1112 self.delete.append(f)
1111 1113 else:
1112 1114 self.putfile(f, mode, data)
1113 1115 if f in copies:
1114 1116 self.copies.append([copies[f], f])
1115 1117 files = [f[0] for f in files]
1116 1118
1117 1119 for parent in parents:
1118 1120 try:
1119 1121 return self.revid(self.childmap[parent])
1120 1122 except KeyError:
1121 1123 pass
1122 1124 entries = set(self.delete)
1123 1125 files = frozenset(files)
1124 1126 entries.update(self.add_dirs(files.difference(entries)))
1125 1127 if self.copies:
1126 1128 for s, d in self.copies:
1127 1129 self._copyfile(s, d)
1128 1130 self.copies = []
1129 1131 if self.delete:
1130 1132 self.xargs(self.delete, 'delete')
1131 1133 self.delete = []
1132 1134 entries.update(self.add_files(files.difference(entries)))
1133 1135 entries.update(self.tidy_dirs(entries))
1134 1136 if self.delexec:
1135 1137 self.xargs(self.delexec, 'propdel', 'svn:executable')
1136 1138 self.delexec = []
1137 1139 if self.setexec:
1138 1140 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1139 1141 self.setexec = []
1140 1142
1141 1143 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1142 1144 fp = os.fdopen(fd, 'w')
1143 1145 fp.write(commit.desc)
1144 1146 fp.close()
1145 1147 try:
1146 1148 output = self.run0('commit',
1147 1149 username=util.shortuser(commit.author),
1148 1150 file=messagefile,
1149 1151 encoding='utf-8')
1150 1152 try:
1151 1153 rev = self.commit_re.search(output).group(1)
1152 1154 except AttributeError:
1153 1155 if not files:
1154 1156 return parents[0]
1155 1157 self.ui.warn(_('unexpected svn output:\n'))
1156 1158 self.ui.warn(output)
1157 1159 raise util.Abort(_('unable to cope with svn output'))
1158 1160 if commit.rev:
1159 1161 self.run('propset', 'hg:convert-rev', commit.rev,
1160 1162 revprop=True, revision=rev)
1161 1163 if commit.branch and commit.branch != 'default':
1162 1164 self.run('propset', 'hg:convert-branch', commit.branch,
1163 1165 revprop=True, revision=rev)
1164 1166 for parent in parents:
1165 1167 self.addchild(parent, rev)
1166 1168 return self.revid(rev)
1167 1169 finally:
1168 1170 os.unlink(messagefile)
1169 1171
1170 1172 def puttags(self, tags):
1171 1173 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1172 1174 return None, None
General Comments 0
You need to be logged in to leave comments. Login now