##// END OF EJS Templates
convert/svn: clarify svn_source.latest() stop arg default value...
Patrick Mezard -
r16464:0e1329d9 stable
parent child Browse files
Show More
@@ -1,1198 +1,1200 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, scmutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 svn = None
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def revsplit(rev):
45 45 """Parse a revision string and return (uuid, path, revnum)."""
46 46 url, revnum = rev.rsplit('@', 1)
47 47 parts = url.split('/', 1)
48 48 mod = ''
49 49 if len(parts) > 1:
50 50 mod = '/' + parts[1]
51 51 return parts[0][4:], mod, int(revnum)
52 52
53 53 def quote(s):
54 54 # As of svn 1.7, many svn calls expect "canonical" paths. In
55 55 # theory, we should call svn.core.*canonicalize() on all paths
56 56 # before passing them to the API. Instead, we assume the base url
57 57 # is canonical and copy the behaviour of svn URL encoding function
58 58 # so we can extend it safely with new components. The "safe"
59 59 # characters were taken from the "svn_uri__char_validity" table in
60 60 # libsvn_subr/path.c.
61 61 return urllib.quote(s, "!$&'()*+,-./:=@_~")
62 62
63 63 def geturl(path):
64 64 try:
65 65 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
66 66 except SubversionException:
67 67 # svn.client.url_from_path() fails with local repositories
68 68 pass
69 69 if os.path.isdir(path):
70 70 path = os.path.normpath(os.path.abspath(path))
71 71 if os.name == 'nt':
72 72 path = '/' + util.normpath(path)
73 73 # Module URL is later compared with the repository URL returned
74 74 # by svn API, which is UTF-8.
75 75 path = encoding.tolocal(path)
76 76 path = 'file://%s' % quote(path)
77 77 return svn.core.svn_path_canonicalize(path)
78 78
79 79 def optrev(number):
80 80 optrev = svn.core.svn_opt_revision_t()
81 81 optrev.kind = svn.core.svn_opt_revision_number
82 82 optrev.value.number = number
83 83 return optrev
84 84
85 85 class changedpath(object):
86 86 def __init__(self, p):
87 87 self.copyfrom_path = p.copyfrom_path
88 88 self.copyfrom_rev = p.copyfrom_rev
89 89 self.action = p.action
90 90
91 91 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
92 92 strict_node_history=False):
93 93 protocol = -1
94 94 def receiver(orig_paths, revnum, author, date, message, pool):
95 95 if orig_paths is not None:
96 96 for k, v in orig_paths.iteritems():
97 97 orig_paths[k] = changedpath(v)
98 98 pickle.dump((orig_paths, revnum, author, date, message),
99 99 fp, protocol)
100 100
101 101 try:
102 102 # Use an ra of our own so that our parent can consume
103 103 # our results without confusing the server.
104 104 t = transport.SvnRaTransport(url=url)
105 105 svn.ra.get_log(t.ra, paths, start, end, limit,
106 106 discover_changed_paths,
107 107 strict_node_history,
108 108 receiver)
109 109 except IOError:
110 110 # Caller may interrupt the iteration
111 111 pickle.dump(None, fp, protocol)
112 112 except Exception, inst:
113 113 pickle.dump(str(inst), fp, protocol)
114 114 else:
115 115 pickle.dump(None, fp, protocol)
116 116 fp.close()
117 117 # With large history, cleanup process goes crazy and suddenly
118 118 # consumes *huge* amount of memory. The output file being closed,
119 119 # there is no need for clean termination.
120 120 os._exit(0)
121 121
122 122 def debugsvnlog(ui, **opts):
123 123 """Fetch SVN log in a subprocess and channel them back to parent to
124 124 avoid memory collection issues.
125 125 """
126 126 util.setbinary(sys.stdin)
127 127 util.setbinary(sys.stdout)
128 128 args = decodeargs(sys.stdin.read())
129 129 get_log_child(sys.stdout, *args)
130 130
131 131 class logstream(object):
132 132 """Interruptible revision log iterator."""
133 133 def __init__(self, stdout):
134 134 self._stdout = stdout
135 135
136 136 def __iter__(self):
137 137 while True:
138 138 try:
139 139 entry = pickle.load(self._stdout)
140 140 except EOFError:
141 141 raise util.Abort(_('Mercurial failed to run itself, check'
142 142 ' hg executable is in PATH'))
143 143 try:
144 144 orig_paths, revnum, author, date, message = entry
145 145 except:
146 146 if entry is None:
147 147 break
148 148 raise util.Abort(_("log stream exception '%s'") % entry)
149 149 yield entry
150 150
151 151 def close(self):
152 152 if self._stdout:
153 153 self._stdout.close()
154 154 self._stdout = None
155 155
156 156
157 157 # Check to see if the given path is a local Subversion repo. Verify this by
158 158 # looking for several svn-specific files and directories in the given
159 159 # directory.
160 160 def filecheck(ui, path, proto):
161 161 for x in ('locks', 'hooks', 'format', 'db'):
162 162 if not os.path.exists(os.path.join(path, x)):
163 163 return False
164 164 return True
165 165
166 166 # Check to see if a given path is the root of an svn repo over http. We verify
167 167 # this by requesting a version-controlled URL we know can't exist and looking
168 168 # for the svn-specific "not found" XML.
169 169 def httpcheck(ui, path, proto):
170 170 try:
171 171 opener = urllib2.build_opener()
172 172 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
173 173 data = rsp.read()
174 174 except urllib2.HTTPError, inst:
175 175 if inst.code != 404:
176 176 # Except for 404 we cannot know for sure this is not an svn repo
177 177 ui.warn(_('svn: cannot probe remote repository, assume it could '
178 178 'be a subversion repository. Use --source-type if you '
179 179 'know better.\n'))
180 180 return True
181 181 data = inst.fp.read()
182 182 except:
183 183 # Could be urllib2.URLError if the URL is invalid or anything else.
184 184 return False
185 185 return '<m:human-readable errcode="160013">' in data
186 186
187 187 protomap = {'http': httpcheck,
188 188 'https': httpcheck,
189 189 'file': filecheck,
190 190 }
191 191 def issvnurl(ui, url):
192 192 try:
193 193 proto, path = url.split('://', 1)
194 194 if proto == 'file':
195 195 path = urllib.url2pathname(path)
196 196 except ValueError:
197 197 proto = 'file'
198 198 path = os.path.abspath(url)
199 199 if proto == 'file':
200 200 path = util.pconvert(path)
201 201 check = protomap.get(proto, lambda *args: False)
202 202 while '/' in path:
203 203 if check(ui, path, proto):
204 204 return True
205 205 path = path.rsplit('/', 1)[0]
206 206 return False
207 207
208 208 # SVN conversion code stolen from bzr-svn and tailor
209 209 #
210 210 # Subversion looks like a versioned filesystem, branches structures
211 211 # are defined by conventions and not enforced by the tool. First,
212 212 # we define the potential branches (modules) as "trunk" and "branches"
213 213 # children directories. Revisions are then identified by their
214 214 # module and revision number (and a repository identifier).
215 215 #
216 216 # The revision graph is really a tree (or a forest). By default, a
217 217 # revision parent is the previous revision in the same module. If the
218 218 # module directory is copied/moved from another module then the
219 219 # revision is the module root and its parent the source revision in
220 220 # the parent module. A revision has at most one parent.
221 221 #
222 222 class svn_source(converter_source):
223 223 def __init__(self, ui, url, rev=None):
224 224 super(svn_source, self).__init__(ui, url, rev=rev)
225 225
226 226 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
227 227 (os.path.exists(url) and
228 228 os.path.exists(os.path.join(url, '.svn'))) or
229 229 issvnurl(ui, url)):
230 230 raise NoRepo(_("%s does not look like a Subversion repository")
231 231 % url)
232 232 if svn is None:
233 233 raise MissingTool(_('Could not load Subversion python bindings'))
234 234
235 235 try:
236 236 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
237 237 if version < (1, 4):
238 238 raise MissingTool(_('Subversion python bindings %d.%d found, '
239 239 '1.4 or later required') % version)
240 240 except AttributeError:
241 241 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
242 242 'or later required'))
243 243
244 244 self.lastrevs = {}
245 245
246 246 latest = None
247 247 try:
248 248 # Support file://path@rev syntax. Useful e.g. to convert
249 249 # deleted branches.
250 250 at = url.rfind('@')
251 251 if at >= 0:
252 252 latest = int(url[at + 1:])
253 253 url = url[:at]
254 254 except ValueError:
255 255 pass
256 256 self.url = geturl(url)
257 257 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
258 258 try:
259 259 self.transport = transport.SvnRaTransport(url=self.url)
260 260 self.ra = self.transport.ra
261 261 self.ctx = self.transport.client
262 262 self.baseurl = svn.ra.get_repos_root(self.ra)
263 263 # Module is either empty or a repository path starting with
264 264 # a slash and not ending with a slash.
265 265 self.module = urllib.unquote(self.url[len(self.baseurl):])
266 266 self.prevmodule = None
267 267 self.rootmodule = self.module
268 268 self.commits = {}
269 269 self.paths = {}
270 270 self.uuid = svn.ra.get_uuid(self.ra)
271 271 except SubversionException:
272 272 ui.traceback()
273 273 raise NoRepo(_("%s does not look like a Subversion repository")
274 274 % self.url)
275 275
276 276 if rev:
277 277 try:
278 278 latest = int(rev)
279 279 except ValueError:
280 280 raise util.Abort(_('svn: revision %s is not an integer') % rev)
281 281
282 282 self.trunkname = self.ui.config('convert', 'svn.trunk', 'trunk').strip('/')
283 283 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
284 284 try:
285 285 self.startrev = int(self.startrev)
286 286 if self.startrev < 0:
287 287 self.startrev = 0
288 288 except ValueError:
289 289 raise util.Abort(_('svn: start revision %s is not an integer')
290 290 % self.startrev)
291 291
292 292 try:
293 293 self.head = self.latest(self.module, latest)
294 294 except SvnPathNotFound:
295 295 self.head = None
296 296 if not self.head:
297 297 raise util.Abort(_('no revision found in module %s')
298 298 % self.module)
299 299 self.last_changed = self.revnum(self.head)
300 300
301 301 self._changescache = None
302 302
303 303 if os.path.exists(os.path.join(url, '.svn/entries')):
304 304 self.wc = url
305 305 else:
306 306 self.wc = None
307 307 self.convertfp = None
308 308
309 309 def setrevmap(self, revmap):
310 310 lastrevs = {}
311 311 for revid in revmap.iterkeys():
312 312 uuid, module, revnum = revsplit(revid)
313 313 lastrevnum = lastrevs.setdefault(module, revnum)
314 314 if revnum > lastrevnum:
315 315 lastrevs[module] = revnum
316 316 self.lastrevs = lastrevs
317 317
318 318 def exists(self, path, optrev):
319 319 try:
320 320 svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
321 321 optrev, False, self.ctx)
322 322 return True
323 323 except SubversionException:
324 324 return False
325 325
326 326 def getheads(self):
327 327
328 328 def isdir(path, revnum):
329 329 kind = self._checkpath(path, revnum)
330 330 return kind == svn.core.svn_node_dir
331 331
332 332 def getcfgpath(name, rev):
333 333 cfgpath = self.ui.config('convert', 'svn.' + name)
334 334 if cfgpath is not None and cfgpath.strip() == '':
335 335 return None
336 336 path = (cfgpath or name).strip('/')
337 337 if not self.exists(path, rev):
338 338 if self.module.endswith(path) and name == 'trunk':
339 339 # we are converting from inside this directory
340 340 return None
341 341 if cfgpath:
342 342 raise util.Abort(_('expected %s to be at %r, but not found')
343 343 % (name, path))
344 344 return None
345 345 self.ui.note(_('found %s at %r\n') % (name, path))
346 346 return path
347 347
348 348 rev = optrev(self.last_changed)
349 349 oldmodule = ''
350 350 trunk = getcfgpath('trunk', rev)
351 351 self.tags = getcfgpath('tags', rev)
352 352 branches = getcfgpath('branches', rev)
353 353
354 354 # If the project has a trunk or branches, we will extract heads
355 355 # from them. We keep the project root otherwise.
356 356 if trunk:
357 357 oldmodule = self.module or ''
358 358 self.module += '/' + trunk
359 359 self.head = self.latest(self.module, self.last_changed)
360 360 if not self.head:
361 361 raise util.Abort(_('no revision found in module %s')
362 362 % self.module)
363 363
364 364 # First head in the list is the module's head
365 365 self.heads = [self.head]
366 366 if self.tags is not None:
367 367 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
368 368
369 369 # Check if branches bring a few more heads to the list
370 370 if branches:
371 371 rpath = self.url.strip('/')
372 372 branchnames = svn.client.ls(rpath + '/' + quote(branches),
373 373 rev, False, self.ctx)
374 374 for branch in branchnames.keys():
375 375 module = '%s/%s/%s' % (oldmodule, branches, branch)
376 376 if not isdir(module, self.last_changed):
377 377 continue
378 378 brevid = self.latest(module, self.last_changed)
379 379 if not brevid:
380 380 self.ui.note(_('ignoring empty branch %s\n') % branch)
381 381 continue
382 382 self.ui.note(_('found branch %s at %d\n') %
383 383 (branch, self.revnum(brevid)))
384 384 self.heads.append(brevid)
385 385
386 386 if self.startrev and self.heads:
387 387 if len(self.heads) > 1:
388 388 raise util.Abort(_('svn: start revision is not supported '
389 389 'with more than one branch'))
390 390 revnum = self.revnum(self.heads[0])
391 391 if revnum < self.startrev:
392 392 raise util.Abort(
393 393 _('svn: no revision found after start revision %d')
394 394 % self.startrev)
395 395
396 396 return self.heads
397 397
398 398 def getchanges(self, rev):
399 399 if self._changescache and self._changescache[0] == rev:
400 400 return self._changescache[1]
401 401 self._changescache = None
402 402 (paths, parents) = self.paths[rev]
403 403 if parents:
404 404 files, self.removed, copies = self.expandpaths(rev, paths, parents)
405 405 else:
406 406 # Perform a full checkout on roots
407 407 uuid, module, revnum = revsplit(rev)
408 408 entries = svn.client.ls(self.baseurl + quote(module),
409 409 optrev(revnum), True, self.ctx)
410 410 files = [n for n, e in entries.iteritems()
411 411 if e.kind == svn.core.svn_node_file]
412 412 copies = {}
413 413 self.removed = set()
414 414
415 415 files.sort()
416 416 files = zip(files, [rev] * len(files))
417 417
418 418 # caller caches the result, so free it here to release memory
419 419 del self.paths[rev]
420 420 return (files, copies)
421 421
422 422 def getchangedfiles(self, rev, i):
423 423 changes = self.getchanges(rev)
424 424 self._changescache = (rev, changes)
425 425 return [f[0] for f in changes[0]]
426 426
427 427 def getcommit(self, rev):
428 428 if rev not in self.commits:
429 429 uuid, module, revnum = revsplit(rev)
430 430 self.module = module
431 431 self.reparent(module)
432 432 # We assume that:
433 433 # - requests for revisions after "stop" come from the
434 434 # revision graph backward traversal. Cache all of them
435 435 # down to stop, they will be used eventually.
436 436 # - requests for revisions before "stop" come to get
437 437 # isolated branches parents. Just fetch what is needed.
438 438 stop = self.lastrevs.get(module, 0)
439 439 if revnum < stop:
440 440 stop = revnum + 1
441 441 self._fetch_revisions(revnum, stop)
442 442 if rev not in self.commits:
443 443 raise util.Abort(_('svn: revision %s not found') % revnum)
444 444 commit = self.commits[rev]
445 445 # caller caches the result, so free it here to release memory
446 446 del self.commits[rev]
447 447 return commit
448 448
449 449 def gettags(self):
450 450 tags = {}
451 451 if self.tags is None:
452 452 return tags
453 453
454 454 # svn tags are just a convention, project branches left in a
455 455 # 'tags' directory. There is no other relationship than
456 456 # ancestry, which is expensive to discover and makes them hard
457 457 # to update incrementally. Worse, past revisions may be
458 458 # referenced by tags far away in the future, requiring a deep
459 459 # history traversal on every calculation. Current code
460 460 # performs a single backward traversal, tracking moves within
461 461 # the tags directory (tag renaming) and recording a new tag
462 462 # everytime a project is copied from outside the tags
463 463 # directory. It also lists deleted tags, this behaviour may
464 464 # change in the future.
465 465 pendings = []
466 466 tagspath = self.tags
467 467 start = svn.ra.get_latest_revnum(self.ra)
468 468 stream = self._getlog([self.tags], start, self.startrev)
469 469 try:
470 470 for entry in stream:
471 471 origpaths, revnum, author, date, message = entry
472 472 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
473 473 in origpaths.iteritems() if e.copyfrom_path]
474 474 # Apply moves/copies from more specific to general
475 475 copies.sort(reverse=True)
476 476
477 477 srctagspath = tagspath
478 478 if copies and copies[-1][2] == tagspath:
479 479 # Track tags directory moves
480 480 srctagspath = copies.pop()[0]
481 481
482 482 for source, sourcerev, dest in copies:
483 483 if not dest.startswith(tagspath + '/'):
484 484 continue
485 485 for tag in pendings:
486 486 if tag[0].startswith(dest):
487 487 tagpath = source + tag[0][len(dest):]
488 488 tag[:2] = [tagpath, sourcerev]
489 489 break
490 490 else:
491 491 pendings.append([source, sourcerev, dest])
492 492
493 493 # Filter out tags with children coming from different
494 494 # parts of the repository like:
495 495 # /tags/tag.1 (from /trunk:10)
496 496 # /tags/tag.1/foo (from /branches/foo:12)
497 497 # Here/tags/tag.1 discarded as well as its children.
498 498 # It happens with tools like cvs2svn. Such tags cannot
499 499 # be represented in mercurial.
500 500 addeds = dict((p, e.copyfrom_path) for p, e
501 501 in origpaths.iteritems()
502 502 if e.action == 'A' and e.copyfrom_path)
503 503 badroots = set()
504 504 for destroot in addeds:
505 505 for source, sourcerev, dest in pendings:
506 506 if (not dest.startswith(destroot + '/')
507 507 or source.startswith(addeds[destroot] + '/')):
508 508 continue
509 509 badroots.add(destroot)
510 510 break
511 511
512 512 for badroot in badroots:
513 513 pendings = [p for p in pendings if p[2] != badroot
514 514 and not p[2].startswith(badroot + '/')]
515 515
516 516 # Tell tag renamings from tag creations
517 517 renamings = []
518 518 for source, sourcerev, dest in pendings:
519 519 tagname = dest.split('/')[-1]
520 520 if source.startswith(srctagspath):
521 521 renamings.append([source, sourcerev, tagname])
522 522 continue
523 523 if tagname in tags:
524 524 # Keep the latest tag value
525 525 continue
526 526 # From revision may be fake, get one with changes
527 527 try:
528 528 tagid = self.latest(source, sourcerev)
529 529 if tagid and tagname not in tags:
530 530 tags[tagname] = tagid
531 531 except SvnPathNotFound:
532 532 # It happens when we are following directories
533 533 # we assumed were copied with their parents
534 534 # but were really created in the tag
535 535 # directory.
536 536 pass
537 537 pendings = renamings
538 538 tagspath = srctagspath
539 539 finally:
540 540 stream.close()
541 541 return tags
542 542
543 543 def converted(self, rev, destrev):
544 544 if not self.wc:
545 545 return
546 546 if self.convertfp is None:
547 547 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
548 548 'a')
549 549 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
550 550 self.convertfp.flush()
551 551
552 552 def revid(self, revnum, module=None):
553 553 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
554 554
555 555 def revnum(self, rev):
556 556 return int(rev.split('@')[-1])
557 557
558 def latest(self, path, stop=0):
559 """Find the latest revid affecting path, up to stop. It may return
560 a revision in a different module, since a branch may be moved without
561 a change being reported. Return None if computed module does not
562 belong to rootmodule subtree.
558 def latest(self, path, stop=None):
559 """Find the latest revid affecting path, up to stop revision
560 number. If stop is None, default to repository latest
561 revision. It may return a revision in a different module,
562 since a branch may be moved without a change being
563 reported. Return None if computed module does not belong to
564 rootmodule subtree.
563 565 """
564 566 if not path.startswith(self.rootmodule):
565 567 # Requests on foreign branches may be forbidden at server level
566 568 self.ui.debug('ignoring foreign branch %r\n' % path)
567 569 return None
568 570
569 if not stop:
571 if stop is None:
570 572 stop = svn.ra.get_latest_revnum(self.ra)
571 573 try:
572 574 prevmodule = self.reparent('')
573 575 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
574 576 self.reparent(prevmodule)
575 577 except SubversionException:
576 578 dirent = None
577 579 if not dirent:
578 580 raise SvnPathNotFound(_('%s not found up to revision %d')
579 581 % (path, stop))
580 582
581 583 # stat() gives us the previous revision on this line of
582 584 # development, but it might be in *another module*. Fetch the
583 585 # log and detect renames down to the latest revision.
584 586 stream = self._getlog([path], stop, dirent.created_rev)
585 587 try:
586 588 for entry in stream:
587 589 paths, revnum, author, date, message = entry
588 590 if revnum <= dirent.created_rev:
589 591 break
590 592
591 593 for p in paths:
592 594 if not path.startswith(p) or not paths[p].copyfrom_path:
593 595 continue
594 596 newpath = paths[p].copyfrom_path + path[len(p):]
595 597 self.ui.debug("branch renamed from %s to %s at %d\n" %
596 598 (path, newpath, revnum))
597 599 path = newpath
598 600 break
599 601 finally:
600 602 stream.close()
601 603
602 604 if not path.startswith(self.rootmodule):
603 605 self.ui.debug('ignoring foreign branch %r\n' % path)
604 606 return None
605 607 return self.revid(dirent.created_rev, path)
606 608
607 609 def reparent(self, module):
608 610 """Reparent the svn transport and return the previous parent."""
609 611 if self.prevmodule == module:
610 612 return module
611 613 svnurl = self.baseurl + quote(module)
612 614 prevmodule = self.prevmodule
613 615 if prevmodule is None:
614 616 prevmodule = ''
615 617 self.ui.debug("reparent to %s\n" % svnurl)
616 618 svn.ra.reparent(self.ra, svnurl)
617 619 self.prevmodule = module
618 620 return prevmodule
619 621
620 622 def expandpaths(self, rev, paths, parents):
621 623 changed, removed = set(), set()
622 624 copies = {}
623 625
624 626 new_module, revnum = revsplit(rev)[1:]
625 627 if new_module != self.module:
626 628 self.module = new_module
627 629 self.reparent(self.module)
628 630
629 631 for i, (path, ent) in enumerate(paths):
630 632 self.ui.progress(_('scanning paths'), i, item=path,
631 633 total=len(paths))
632 634 entrypath = self.getrelpath(path)
633 635
634 636 kind = self._checkpath(entrypath, revnum)
635 637 if kind == svn.core.svn_node_file:
636 638 changed.add(self.recode(entrypath))
637 639 if not ent.copyfrom_path or not parents:
638 640 continue
639 641 # Copy sources not in parent revisions cannot be
640 642 # represented, ignore their origin for now
641 643 pmodule, prevnum = revsplit(parents[0])[1:]
642 644 if ent.copyfrom_rev < prevnum:
643 645 continue
644 646 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
645 647 if not copyfrom_path:
646 648 continue
647 649 self.ui.debug("copied to %s from %s@%s\n" %
648 650 (entrypath, copyfrom_path, ent.copyfrom_rev))
649 651 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
650 652 elif kind == 0: # gone, but had better be a deleted *file*
651 653 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
652 654 pmodule, prevnum = revsplit(parents[0])[1:]
653 655 parentpath = pmodule + "/" + entrypath
654 656 fromkind = self._checkpath(entrypath, prevnum, pmodule)
655 657
656 658 if fromkind == svn.core.svn_node_file:
657 659 removed.add(self.recode(entrypath))
658 660 elif fromkind == svn.core.svn_node_dir:
659 661 oroot = parentpath.strip('/')
660 662 nroot = path.strip('/')
661 663 children = self._iterfiles(oroot, prevnum)
662 664 for childpath in children:
663 665 childpath = childpath.replace(oroot, nroot)
664 666 childpath = self.getrelpath("/" + childpath, pmodule)
665 667 if childpath:
666 668 removed.add(self.recode(childpath))
667 669 else:
668 670 self.ui.debug('unknown path in revision %d: %s\n' % \
669 671 (revnum, path))
670 672 elif kind == svn.core.svn_node_dir:
671 673 if ent.action == 'M':
672 674 # If the directory just had a prop change,
673 675 # then we shouldn't need to look for its children.
674 676 continue
675 677 if ent.action == 'R' and parents:
676 678 # If a directory is replacing a file, mark the previous
677 679 # file as deleted
678 680 pmodule, prevnum = revsplit(parents[0])[1:]
679 681 pkind = self._checkpath(entrypath, prevnum, pmodule)
680 682 if pkind == svn.core.svn_node_file:
681 683 removed.add(self.recode(entrypath))
682 684 elif pkind == svn.core.svn_node_dir:
683 685 # We do not know what files were kept or removed,
684 686 # mark them all as changed.
685 687 for childpath in self._iterfiles(pmodule, prevnum):
686 688 childpath = self.getrelpath("/" + childpath)
687 689 if childpath:
688 690 changed.add(self.recode(childpath))
689 691
690 692 for childpath in self._iterfiles(path, revnum):
691 693 childpath = self.getrelpath("/" + childpath)
692 694 if childpath:
693 695 changed.add(self.recode(childpath))
694 696
695 697 # Handle directory copies
696 698 if not ent.copyfrom_path or not parents:
697 699 continue
698 700 # Copy sources not in parent revisions cannot be
699 701 # represented, ignore their origin for now
700 702 pmodule, prevnum = revsplit(parents[0])[1:]
701 703 if ent.copyfrom_rev < prevnum:
702 704 continue
703 705 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
704 706 if not copyfrompath:
705 707 continue
706 708 self.ui.debug("mark %s came from %s:%d\n"
707 709 % (path, copyfrompath, ent.copyfrom_rev))
708 710 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
709 711 for childpath in children:
710 712 childpath = self.getrelpath("/" + childpath, pmodule)
711 713 if not childpath:
712 714 continue
713 715 copytopath = path + childpath[len(copyfrompath):]
714 716 copytopath = self.getrelpath(copytopath)
715 717 copies[self.recode(copytopath)] = self.recode(childpath)
716 718
717 719 self.ui.progress(_('scanning paths'), None)
718 720 changed.update(removed)
719 721 return (list(changed), removed, copies)
720 722
721 723 def _fetch_revisions(self, from_revnum, to_revnum):
722 724 if from_revnum < to_revnum:
723 725 from_revnum, to_revnum = to_revnum, from_revnum
724 726
725 727 self.child_cset = None
726 728
727 729 def parselogentry(orig_paths, revnum, author, date, message):
728 730 """Return the parsed commit object or None, and True if
729 731 the revision is a branch root.
730 732 """
731 733 self.ui.debug("parsing revision %d (%d changes)\n" %
732 734 (revnum, len(orig_paths)))
733 735
734 736 branched = False
735 737 rev = self.revid(revnum)
736 738 # branch log might return entries for a parent we already have
737 739
738 740 if rev in self.commits or revnum < to_revnum:
739 741 return None, branched
740 742
741 743 parents = []
742 744 # check whether this revision is the start of a branch or part
743 745 # of a branch renaming
744 746 orig_paths = sorted(orig_paths.iteritems())
745 747 root_paths = [(p, e) for p, e in orig_paths
746 748 if self.module.startswith(p)]
747 749 if root_paths:
748 750 path, ent = root_paths[-1]
749 751 if ent.copyfrom_path:
750 752 branched = True
751 753 newpath = ent.copyfrom_path + self.module[len(path):]
752 754 # ent.copyfrom_rev may not be the actual last revision
753 755 previd = self.latest(newpath, ent.copyfrom_rev)
754 756 if previd is not None:
755 757 prevmodule, prevnum = revsplit(previd)[1:]
756 758 if prevnum >= self.startrev:
757 759 parents = [previd]
758 760 self.ui.note(
759 761 _('found parent of branch %s at %d: %s\n') %
760 762 (self.module, prevnum, prevmodule))
761 763 else:
762 764 self.ui.debug("no copyfrom path, don't know what to do.\n")
763 765
764 766 paths = []
765 767 # filter out unrelated paths
766 768 for path, ent in orig_paths:
767 769 if self.getrelpath(path) is None:
768 770 continue
769 771 paths.append((path, ent))
770 772
771 773 # Example SVN datetime. Includes microseconds.
772 774 # ISO-8601 conformant
773 775 # '2007-01-04T17:35:00.902377Z'
774 776 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
775 777
776 778 log = message and self.recode(message) or ''
777 779 author = author and self.recode(author) or ''
778 780 try:
779 781 branch = self.module.split("/")[-1]
780 782 if branch == self.trunkname:
781 783 branch = None
782 784 except IndexError:
783 785 branch = None
784 786
785 787 cset = commit(author=author,
786 788 date=util.datestr(date),
787 789 desc=log,
788 790 parents=parents,
789 791 branch=branch,
790 792 rev=rev)
791 793
792 794 self.commits[rev] = cset
793 795 # The parents list is *shared* among self.paths and the
794 796 # commit object. Both will be updated below.
795 797 self.paths[rev] = (paths, cset.parents)
796 798 if self.child_cset and not self.child_cset.parents:
797 799 self.child_cset.parents[:] = [rev]
798 800 self.child_cset = cset
799 801 return cset, branched
800 802
801 803 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
802 804 (self.module, from_revnum, to_revnum))
803 805
804 806 try:
805 807 firstcset = None
806 808 lastonbranch = False
807 809 stream = self._getlog([self.module], from_revnum, to_revnum)
808 810 try:
809 811 for entry in stream:
810 812 paths, revnum, author, date, message = entry
811 813 if revnum < self.startrev:
812 814 lastonbranch = True
813 815 break
814 816 if not paths:
815 817 self.ui.debug('revision %d has no entries\n' % revnum)
816 818 # If we ever leave the loop on an empty
817 819 # revision, do not try to get a parent branch
818 820 lastonbranch = lastonbranch or revnum == 0
819 821 continue
820 822 cset, lastonbranch = parselogentry(paths, revnum, author,
821 823 date, message)
822 824 if cset:
823 825 firstcset = cset
824 826 if lastonbranch:
825 827 break
826 828 finally:
827 829 stream.close()
828 830
829 831 if not lastonbranch and firstcset and not firstcset.parents:
830 832 # The first revision of the sequence (the last fetched one)
831 833 # has invalid parents if not a branch root. Find the parent
832 834 # revision now, if any.
833 835 try:
834 836 firstrevnum = self.revnum(firstcset.rev)
835 837 if firstrevnum > 1:
836 838 latest = self.latest(self.module, firstrevnum - 1)
837 839 if latest:
838 840 firstcset.parents.append(latest)
839 841 except SvnPathNotFound:
840 842 pass
841 843 except SubversionException, (inst, num):
842 844 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
843 845 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
844 846 raise
845 847
846 848 def getfile(self, file, rev):
847 849 # TODO: ra.get_file transmits the whole file instead of diffs.
848 850 if file in self.removed:
849 851 raise IOError()
850 852 mode = ''
851 853 try:
852 854 new_module, revnum = revsplit(rev)[1:]
853 855 if self.module != new_module:
854 856 self.module = new_module
855 857 self.reparent(self.module)
856 858 io = StringIO()
857 859 info = svn.ra.get_file(self.ra, file, revnum, io)
858 860 data = io.getvalue()
859 861 # ra.get_files() seems to keep a reference on the input buffer
860 862 # preventing collection. Release it explicitely.
861 863 io.close()
862 864 if isinstance(info, list):
863 865 info = info[-1]
864 866 mode = ("svn:executable" in info) and 'x' or ''
865 867 mode = ("svn:special" in info) and 'l' or mode
866 868 except SubversionException, e:
867 869 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
868 870 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
869 871 if e.apr_err in notfound: # File not found
870 872 raise IOError()
871 873 raise
872 874 if mode == 'l':
873 875 link_prefix = "link "
874 876 if data.startswith(link_prefix):
875 877 data = data[len(link_prefix):]
876 878 return data, mode
877 879
878 880 def _iterfiles(self, path, revnum):
879 881 """Enumerate all files in path at revnum, recursively."""
880 882 path = path.strip('/')
881 883 pool = Pool()
882 884 rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
883 885 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
884 886 if path:
885 887 path += '/'
886 888 return ((path + p) for p, e in entries.iteritems()
887 889 if e.kind == svn.core.svn_node_file)
888 890
889 891 def getrelpath(self, path, module=None):
890 892 if module is None:
891 893 module = self.module
892 894 # Given the repository url of this wc, say
893 895 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
894 896 # extract the "entry" portion (a relative path) from what
895 897 # svn log --xml says, ie
896 898 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
897 899 # that is to say "tests/PloneTestCase.py"
898 900 if path.startswith(module):
899 901 relative = path.rstrip('/')[len(module):]
900 902 if relative.startswith('/'):
901 903 return relative[1:]
902 904 elif relative == '':
903 905 return relative
904 906
905 907 # The path is outside our tracked tree...
906 908 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
907 909 return None
908 910
909 911 def _checkpath(self, path, revnum, module=None):
910 912 if module is not None:
911 913 prevmodule = self.reparent('')
912 914 path = module + '/' + path
913 915 try:
914 916 # ra.check_path does not like leading slashes very much, it leads
915 917 # to PROPFIND subversion errors
916 918 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
917 919 finally:
918 920 if module is not None:
919 921 self.reparent(prevmodule)
920 922
921 923 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
922 924 strict_node_history=False):
923 925 # Normalize path names, svn >= 1.5 only wants paths relative to
924 926 # supplied URL
925 927 relpaths = []
926 928 for p in paths:
927 929 if not p.startswith('/'):
928 930 p = self.module + '/' + p
929 931 relpaths.append(p.strip('/'))
930 932 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
931 933 strict_node_history]
932 934 arg = encodeargs(args)
933 935 hgexe = util.hgexecutable()
934 936 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
935 937 stdin, stdout = util.popen2(util.quotecommand(cmd))
936 938 stdin.write(arg)
937 939 try:
938 940 stdin.close()
939 941 except IOError:
940 942 raise util.Abort(_('Mercurial failed to run itself, check'
941 943 ' hg executable is in PATH'))
942 944 return logstream(stdout)
943 945
944 946 pre_revprop_change = '''#!/bin/sh
945 947
946 948 REPOS="$1"
947 949 REV="$2"
948 950 USER="$3"
949 951 PROPNAME="$4"
950 952 ACTION="$5"
951 953
952 954 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
953 955 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
954 956 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
955 957
956 958 echo "Changing prohibited revision property" >&2
957 959 exit 1
958 960 '''
959 961
960 962 class svn_sink(converter_sink, commandline):
961 963 commit_re = re.compile(r'Committed revision (\d+).', re.M)
962 964 uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
963 965
964 966 def prerun(self):
965 967 if self.wc:
966 968 os.chdir(self.wc)
967 969
968 970 def postrun(self):
969 971 if self.wc:
970 972 os.chdir(self.cwd)
971 973
972 974 def join(self, name):
973 975 return os.path.join(self.wc, '.svn', name)
974 976
975 977 def revmapfile(self):
976 978 return self.join('hg-shamap')
977 979
978 980 def authorfile(self):
979 981 return self.join('hg-authormap')
980 982
981 983 def __init__(self, ui, path):
982 984
983 985 converter_sink.__init__(self, ui, path)
984 986 commandline.__init__(self, ui, 'svn')
985 987 self.delete = []
986 988 self.setexec = []
987 989 self.delexec = []
988 990 self.copies = []
989 991 self.wc = None
990 992 self.cwd = os.getcwd()
991 993
992 994 path = os.path.realpath(path)
993 995
994 996 created = False
995 997 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
996 998 self.wc = path
997 999 self.run0('update')
998 1000 else:
999 1001 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
1000 1002
1001 1003 if os.path.isdir(os.path.dirname(path)):
1002 1004 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1003 1005 ui.status(_('initializing svn repository %r\n') %
1004 1006 os.path.basename(path))
1005 1007 commandline(ui, 'svnadmin').run0('create', path)
1006 1008 created = path
1007 1009 path = util.normpath(path)
1008 1010 if not path.startswith('/'):
1009 1011 path = '/' + path
1010 1012 path = 'file://' + path
1011 1013
1012 1014 ui.status(_('initializing svn working copy %r\n')
1013 1015 % os.path.basename(wcpath))
1014 1016 self.run0('checkout', path, wcpath)
1015 1017
1016 1018 self.wc = wcpath
1017 1019 self.opener = scmutil.opener(self.wc)
1018 1020 self.wopener = scmutil.opener(self.wc)
1019 1021 self.childmap = mapfile(ui, self.join('hg-childmap'))
1020 1022 self.is_exec = util.checkexec(self.wc) and util.isexec or None
1021 1023
1022 1024 if created:
1023 1025 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1024 1026 fp = open(hook, 'w')
1025 1027 fp.write(pre_revprop_change)
1026 1028 fp.close()
1027 1029 util.setflags(hook, False, True)
1028 1030
1029 1031 output = self.run0('info')
1030 1032 self.uuid = self.uuid_re.search(output).group(1).strip()
1031 1033
1032 1034 def wjoin(self, *names):
1033 1035 return os.path.join(self.wc, *names)
1034 1036
1035 1037 def putfile(self, filename, flags, data):
1036 1038 if 'l' in flags:
1037 1039 self.wopener.symlink(data, filename)
1038 1040 else:
1039 1041 try:
1040 1042 if os.path.islink(self.wjoin(filename)):
1041 1043 os.unlink(filename)
1042 1044 except OSError:
1043 1045 pass
1044 1046 self.wopener.write(filename, data)
1045 1047
1046 1048 if self.is_exec:
1047 1049 was_exec = self.is_exec(self.wjoin(filename))
1048 1050 else:
1049 1051 # On filesystems not supporting execute-bit, there is no way
1050 1052 # to know if it is set but asking subversion. Setting it
1051 1053 # systematically is just as expensive and much simpler.
1052 1054 was_exec = 'x' not in flags
1053 1055
1054 1056 util.setflags(self.wjoin(filename), False, 'x' in flags)
1055 1057 if was_exec:
1056 1058 if 'x' not in flags:
1057 1059 self.delexec.append(filename)
1058 1060 else:
1059 1061 if 'x' in flags:
1060 1062 self.setexec.append(filename)
1061 1063
1062 1064 def _copyfile(self, source, dest):
1063 1065 # SVN's copy command pukes if the destination file exists, but
1064 1066 # our copyfile method expects to record a copy that has
1065 1067 # already occurred. Cross the semantic gap.
1066 1068 wdest = self.wjoin(dest)
1067 1069 exists = os.path.lexists(wdest)
1068 1070 if exists:
1069 1071 fd, tempname = tempfile.mkstemp(
1070 1072 prefix='hg-copy-', dir=os.path.dirname(wdest))
1071 1073 os.close(fd)
1072 1074 os.unlink(tempname)
1073 1075 os.rename(wdest, tempname)
1074 1076 try:
1075 1077 self.run0('copy', source, dest)
1076 1078 finally:
1077 1079 if exists:
1078 1080 try:
1079 1081 os.unlink(wdest)
1080 1082 except OSError:
1081 1083 pass
1082 1084 os.rename(tempname, wdest)
1083 1085
1084 1086 def dirs_of(self, files):
1085 1087 dirs = set()
1086 1088 for f in files:
1087 1089 if os.path.isdir(self.wjoin(f)):
1088 1090 dirs.add(f)
1089 1091 for i in strutil.rfindall(f, '/'):
1090 1092 dirs.add(f[:i])
1091 1093 return dirs
1092 1094
1093 1095 def add_dirs(self, files):
1094 1096 add_dirs = [d for d in sorted(self.dirs_of(files))
1095 1097 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1096 1098 if add_dirs:
1097 1099 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1098 1100 return add_dirs
1099 1101
1100 1102 def add_files(self, files):
1101 1103 if files:
1102 1104 self.xargs(files, 'add', quiet=True)
1103 1105 return files
1104 1106
1105 1107 def tidy_dirs(self, names):
1106 1108 deleted = []
1107 1109 for d in sorted(self.dirs_of(names), reverse=True):
1108 1110 wd = self.wjoin(d)
1109 1111 if os.listdir(wd) == '.svn':
1110 1112 self.run0('delete', d)
1111 1113 deleted.append(d)
1112 1114 return deleted
1113 1115
1114 1116 def addchild(self, parent, child):
1115 1117 self.childmap[parent] = child
1116 1118
1117 1119 def revid(self, rev):
1118 1120 return u"svn:%s@%s" % (self.uuid, rev)
1119 1121
1120 1122 def putcommit(self, files, copies, parents, commit, source, revmap):
1121 1123 for parent in parents:
1122 1124 try:
1123 1125 return self.revid(self.childmap[parent])
1124 1126 except KeyError:
1125 1127 pass
1126 1128
1127 1129 # Apply changes to working copy
1128 1130 for f, v in files:
1129 1131 try:
1130 1132 data, mode = source.getfile(f, v)
1131 1133 except IOError:
1132 1134 self.delete.append(f)
1133 1135 else:
1134 1136 self.putfile(f, mode, data)
1135 1137 if f in copies:
1136 1138 self.copies.append([copies[f], f])
1137 1139 files = [f[0] for f in files]
1138 1140
1139 1141 entries = set(self.delete)
1140 1142 files = frozenset(files)
1141 1143 entries.update(self.add_dirs(files.difference(entries)))
1142 1144 if self.copies:
1143 1145 for s, d in self.copies:
1144 1146 self._copyfile(s, d)
1145 1147 self.copies = []
1146 1148 if self.delete:
1147 1149 self.xargs(self.delete, 'delete')
1148 1150 self.delete = []
1149 1151 entries.update(self.add_files(files.difference(entries)))
1150 1152 entries.update(self.tidy_dirs(entries))
1151 1153 if self.delexec:
1152 1154 self.xargs(self.delexec, 'propdel', 'svn:executable')
1153 1155 self.delexec = []
1154 1156 if self.setexec:
1155 1157 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1156 1158 self.setexec = []
1157 1159
1158 1160 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1159 1161 fp = os.fdopen(fd, 'w')
1160 1162 fp.write(commit.desc)
1161 1163 fp.close()
1162 1164 try:
1163 1165 output = self.run0('commit',
1164 1166 username=util.shortuser(commit.author),
1165 1167 file=messagefile,
1166 1168 encoding='utf-8')
1167 1169 try:
1168 1170 rev = self.commit_re.search(output).group(1)
1169 1171 except AttributeError:
1170 1172 if not files:
1171 1173 return parents[0]
1172 1174 self.ui.warn(_('unexpected svn output:\n'))
1173 1175 self.ui.warn(output)
1174 1176 raise util.Abort(_('unable to cope with svn output'))
1175 1177 if commit.rev:
1176 1178 self.run('propset', 'hg:convert-rev', commit.rev,
1177 1179 revprop=True, revision=rev)
1178 1180 if commit.branch and commit.branch != 'default':
1179 1181 self.run('propset', 'hg:convert-branch', commit.branch,
1180 1182 revprop=True, revision=rev)
1181 1183 for parent in parents:
1182 1184 self.addchild(parent, rev)
1183 1185 return self.revid(rev)
1184 1186 finally:
1185 1187 os.unlink(messagefile)
1186 1188
1187 1189 def puttags(self, tags):
1188 1190 self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
1189 1191 return None, None
1190 1192
1191 1193 def hascommit(self, rev):
1192 1194 # This is not correct as one can convert to an existing subversion
1193 1195 # repository and childmap would not list all revisions. Too bad.
1194 1196 if rev in self.childmap:
1195 1197 return True
1196 1198 raise util.Abort(_('splice map revision %s not found in subversion '
1197 1199 'child map (revision lookups are not implemented)')
1198 1200 % rev)
General Comments 0
You need to be logged in to leave comments. Login now