##// END OF EJS Templates
convert/svn: list files explicitely, stop checking their type...
Patrick Mezard -
r11132:321b8b2a default
parent child Browse files
Show More
@@ -1,1174 +1,1169 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda *args: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo(_("%s does not look like a Subversion repository")
211 211 % url)
212 212
213 213 try:
214 214 SubversionException
215 215 except NameError:
216 216 raise MissingTool(_('Subversion python bindings could not be loaded'))
217 217
218 218 try:
219 219 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
220 220 if version < (1, 4):
221 221 raise MissingTool(_('Subversion python bindings %d.%d found, '
222 222 '1.4 or later required') % version)
223 223 except AttributeError:
224 224 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
225 225 'or later required'))
226 226
227 227 self.lastrevs = {}
228 228
229 229 latest = None
230 230 try:
231 231 # Support file://path@rev syntax. Useful e.g. to convert
232 232 # deleted branches.
233 233 at = url.rfind('@')
234 234 if at >= 0:
235 235 latest = int(url[at + 1:])
236 236 url = url[:at]
237 237 except ValueError:
238 238 pass
239 239 self.url = geturl(url)
240 240 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
241 241 try:
242 242 self.transport = transport.SvnRaTransport(url=self.url)
243 243 self.ra = self.transport.ra
244 244 self.ctx = self.transport.client
245 245 self.baseurl = svn.ra.get_repos_root(self.ra)
246 246 # Module is either empty or a repository path starting with
247 247 # a slash and not ending with a slash.
248 248 self.module = urllib.unquote(self.url[len(self.baseurl):])
249 249 self.prevmodule = None
250 250 self.rootmodule = self.module
251 251 self.commits = {}
252 252 self.paths = {}
253 253 self.uuid = svn.ra.get_uuid(self.ra)
254 254 except SubversionException:
255 255 ui.traceback()
256 256 raise NoRepo(_("%s does not look like a Subversion repository")
257 257 % self.url)
258 258
259 259 if rev:
260 260 try:
261 261 latest = int(rev)
262 262 except ValueError:
263 263 raise util.Abort(_('svn: revision %s is not an integer') % rev)
264 264
265 265 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
266 266 try:
267 267 self.startrev = int(self.startrev)
268 268 if self.startrev < 0:
269 269 self.startrev = 0
270 270 except ValueError:
271 271 raise util.Abort(_('svn: start revision %s is not an integer')
272 272 % self.startrev)
273 273
274 274 self.head = self.latest(self.module, latest)
275 275 if not self.head:
276 276 raise util.Abort(_('no revision found in module %s')
277 277 % self.module)
278 278 self.last_changed = self.revnum(self.head)
279 279
280 280 self._changescache = None
281 281
282 282 if os.path.exists(os.path.join(url, '.svn/entries')):
283 283 self.wc = url
284 284 else:
285 285 self.wc = None
286 286 self.convertfp = None
287 287
288 288 def setrevmap(self, revmap):
289 289 lastrevs = {}
290 290 for revid in revmap.iterkeys():
291 291 uuid, module, revnum = self.revsplit(revid)
292 292 lastrevnum = lastrevs.setdefault(module, revnum)
293 293 if revnum > lastrevnum:
294 294 lastrevs[module] = revnum
295 295 self.lastrevs = lastrevs
296 296
297 297 def exists(self, path, optrev):
298 298 try:
299 299 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
300 300 optrev, False, self.ctx)
301 301 return True
302 302 except SubversionException:
303 303 return False
304 304
305 305 def getheads(self):
306 306
307 307 def isdir(path, revnum):
308 308 kind = self._checkpath(path, revnum)
309 309 return kind == svn.core.svn_node_dir
310 310
311 311 def getcfgpath(name, rev):
312 312 cfgpath = self.ui.config('convert', 'svn.' + name)
313 313 if cfgpath is not None and cfgpath.strip() == '':
314 314 return None
315 315 path = (cfgpath or name).strip('/')
316 316 if not self.exists(path, rev):
317 317 if cfgpath:
318 318 raise util.Abort(_('expected %s to be at %r, but not found')
319 319 % (name, path))
320 320 return None
321 321 self.ui.note(_('found %s at %r\n') % (name, path))
322 322 return path
323 323
324 324 rev = optrev(self.last_changed)
325 325 oldmodule = ''
326 326 trunk = getcfgpath('trunk', rev)
327 327 self.tags = getcfgpath('tags', rev)
328 328 branches = getcfgpath('branches', rev)
329 329
330 330 # If the project has a trunk or branches, we will extract heads
331 331 # from them. We keep the project root otherwise.
332 332 if trunk:
333 333 oldmodule = self.module or ''
334 334 self.module += '/' + trunk
335 335 self.head = self.latest(self.module, self.last_changed)
336 336 if not self.head:
337 337 raise util.Abort(_('no revision found in module %s')
338 338 % self.module)
339 339
340 340 # First head in the list is the module's head
341 341 self.heads = [self.head]
342 342 if self.tags is not None:
343 343 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
344 344
345 345 # Check if branches bring a few more heads to the list
346 346 if branches:
347 347 rpath = self.url.strip('/')
348 348 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
349 349 rev, False, self.ctx)
350 350 for branch in branchnames.keys():
351 351 module = '%s/%s/%s' % (oldmodule, branches, branch)
352 352 if not isdir(module, self.last_changed):
353 353 continue
354 354 brevid = self.latest(module, self.last_changed)
355 355 if not brevid:
356 356 self.ui.note(_('ignoring empty branch %s\n') % branch)
357 357 continue
358 358 self.ui.note(_('found branch %s at %d\n') %
359 359 (branch, self.revnum(brevid)))
360 360 self.heads.append(brevid)
361 361
362 362 if self.startrev and self.heads:
363 363 if len(self.heads) > 1:
364 364 raise util.Abort(_('svn: start revision is not supported '
365 365 'with more than one branch'))
366 366 revnum = self.revnum(self.heads[0])
367 367 if revnum < self.startrev:
368 368 raise util.Abort(
369 369 _('svn: no revision found after start revision %d')
370 370 % self.startrev)
371 371
372 372 return self.heads
373 373
374 374 def getfile(self, file, rev):
375 375 data, mode = self._getfile(file, rev)
376 376 self.modecache[(file, rev)] = mode
377 377 return data
378 378
379 379 def getmode(self, file, rev):
380 380 return self.modecache[(file, rev)]
381 381
382 382 def getchanges(self, rev):
383 383 if self._changescache and self._changescache[0] == rev:
384 384 return self._changescache[1]
385 385 self._changescache = None
386 386 self.modecache = {}
387 387 (paths, parents) = self.paths[rev]
388 388 if parents:
389 389 files, self.removed, copies = self.expandpaths(rev, paths, parents)
390 390 else:
391 391 # Perform a full checkout on roots
392 392 uuid, module, revnum = self.revsplit(rev)
393 393 entries = svn.client.ls(self.baseurl + urllib.quote(module),
394 394 optrev(revnum), True, self.ctx)
395 395 files = [n for n, e in entries.iteritems()
396 396 if e.kind == svn.core.svn_node_file]
397 397 copies = {}
398 398 self.removed = set()
399 399
400 400 files.sort()
401 401 files = zip(files, [rev] * len(files))
402 402
403 403 # caller caches the result, so free it here to release memory
404 404 del self.paths[rev]
405 405 return (files, copies)
406 406
407 407 def getchangedfiles(self, rev, i):
408 408 changes = self.getchanges(rev)
409 409 self._changescache = (rev, changes)
410 410 return [f[0] for f in changes[0]]
411 411
412 412 def getcommit(self, rev):
413 413 if rev not in self.commits:
414 414 uuid, module, revnum = self.revsplit(rev)
415 415 self.module = module
416 416 self.reparent(module)
417 417 # We assume that:
418 418 # - requests for revisions after "stop" come from the
419 419 # revision graph backward traversal. Cache all of them
420 420 # down to stop, they will be used eventually.
421 421 # - requests for revisions before "stop" come to get
422 422 # isolated branches parents. Just fetch what is needed.
423 423 stop = self.lastrevs.get(module, 0)
424 424 if revnum < stop:
425 425 stop = revnum + 1
426 426 self._fetch_revisions(revnum, stop)
427 427 commit = self.commits[rev]
428 428 # caller caches the result, so free it here to release memory
429 429 del self.commits[rev]
430 430 return commit
431 431
432 432 def gettags(self):
433 433 tags = {}
434 434 if self.tags is None:
435 435 return tags
436 436
437 437 # svn tags are just a convention, project branches left in a
438 438 # 'tags' directory. There is no other relationship than
439 439 # ancestry, which is expensive to discover and makes them hard
440 440 # to update incrementally. Worse, past revisions may be
441 441 # referenced by tags far away in the future, requiring a deep
442 442 # history traversal on every calculation. Current code
443 443 # performs a single backward traversal, tracking moves within
444 444 # the tags directory (tag renaming) and recording a new tag
445 445 # everytime a project is copied from outside the tags
446 446 # directory. It also lists deleted tags, this behaviour may
447 447 # change in the future.
448 448 pendings = []
449 449 tagspath = self.tags
450 450 start = svn.ra.get_latest_revnum(self.ra)
451 451 try:
452 452 for entry in self._getlog([self.tags], start, self.startrev):
453 453 origpaths, revnum, author, date, message = entry
454 454 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
455 455 in origpaths.iteritems() if e.copyfrom_path]
456 456 # Apply moves/copies from more specific to general
457 457 copies.sort(reverse=True)
458 458
459 459 srctagspath = tagspath
460 460 if copies and copies[-1][2] == tagspath:
461 461 # Track tags directory moves
462 462 srctagspath = copies.pop()[0]
463 463
464 464 for source, sourcerev, dest in copies:
465 465 if not dest.startswith(tagspath + '/'):
466 466 continue
467 467 for tag in pendings:
468 468 if tag[0].startswith(dest):
469 469 tagpath = source + tag[0][len(dest):]
470 470 tag[:2] = [tagpath, sourcerev]
471 471 break
472 472 else:
473 473 pendings.append([source, sourcerev, dest])
474 474
475 475 # Filter out tags with children coming from different
476 476 # parts of the repository like:
477 477 # /tags/tag.1 (from /trunk:10)
478 478 # /tags/tag.1/foo (from /branches/foo:12)
479 479 # Here/tags/tag.1 discarded as well as its children.
480 480 # It happens with tools like cvs2svn. Such tags cannot
481 481 # be represented in mercurial.
482 482 addeds = dict((p, e.copyfrom_path) for p, e
483 483 in origpaths.iteritems()
484 484 if e.action == 'A' and e.copyfrom_path)
485 485 badroots = set()
486 486 for destroot in addeds:
487 487 for source, sourcerev, dest in pendings:
488 488 if (not dest.startswith(destroot + '/')
489 489 or source.startswith(addeds[destroot] + '/')):
490 490 continue
491 491 badroots.add(destroot)
492 492 break
493 493
494 494 for badroot in badroots:
495 495 pendings = [p for p in pendings if p[2] != badroot
496 496 and not p[2].startswith(badroot + '/')]
497 497
498 498 # Tell tag renamings from tag creations
499 499 remainings = []
500 500 for source, sourcerev, dest in pendings:
501 501 tagname = dest.split('/')[-1]
502 502 if source.startswith(srctagspath):
503 503 remainings.append([source, sourcerev, tagname])
504 504 continue
505 505 if tagname in tags:
506 506 # Keep the latest tag value
507 507 continue
508 508 # From revision may be fake, get one with changes
509 509 try:
510 510 tagid = self.latest(source, sourcerev)
511 511 if tagid and tagname not in tags:
512 512 tags[tagname] = tagid
513 513 except SvnPathNotFound:
514 514 # It happens when we are following directories
515 515 # we assumed were copied with their parents
516 516 # but were really created in the tag
517 517 # directory.
518 518 pass
519 519 pendings = remainings
520 520 tagspath = srctagspath
521 521
522 522 except SubversionException:
523 523 self.ui.note(_('no tags found at revision %d\n') % start)
524 524 return tags
525 525
526 526 def converted(self, rev, destrev):
527 527 if not self.wc:
528 528 return
529 529 if self.convertfp is None:
530 530 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
531 531 'a')
532 532 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
533 533 self.convertfp.flush()
534 534
535 535 def revid(self, revnum, module=None):
536 536 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
537 537
538 538 def revnum(self, rev):
539 539 return int(rev.split('@')[-1])
540 540
541 541 def revsplit(self, rev):
542 542 url, revnum = rev.rsplit('@', 1)
543 543 revnum = int(revnum)
544 544 parts = url.split('/', 1)
545 545 uuid = parts.pop(0)[4:]
546 546 mod = ''
547 547 if parts:
548 548 mod = '/' + parts[0]
549 549 return uuid, mod, revnum
550 550
551 551 def latest(self, path, stop=0):
552 552 """Find the latest revid affecting path, up to stop. It may return
553 553 a revision in a different module, since a branch may be moved without
554 554 a change being reported. Return None if computed module does not
555 555 belong to rootmodule subtree.
556 556 """
557 557 if not path.startswith(self.rootmodule):
558 558 # Requests on foreign branches may be forbidden at server level
559 559 self.ui.debug('ignoring foreign branch %r\n' % path)
560 560 return None
561 561
562 562 if not stop:
563 563 stop = svn.ra.get_latest_revnum(self.ra)
564 564 try:
565 565 prevmodule = self.reparent('')
566 566 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
567 567 self.reparent(prevmodule)
568 568 except SubversionException:
569 569 dirent = None
570 570 if not dirent:
571 571 raise SvnPathNotFound(_('%s not found up to revision %d')
572 572 % (path, stop))
573 573
574 574 # stat() gives us the previous revision on this line of
575 575 # development, but it might be in *another module*. Fetch the
576 576 # log and detect renames down to the latest revision.
577 577 stream = self._getlog([path], stop, dirent.created_rev)
578 578 try:
579 579 for entry in stream:
580 580 paths, revnum, author, date, message = entry
581 581 if revnum <= dirent.created_rev:
582 582 break
583 583
584 584 for p in paths:
585 585 if not path.startswith(p) or not paths[p].copyfrom_path:
586 586 continue
587 587 newpath = paths[p].copyfrom_path + path[len(p):]
588 588 self.ui.debug("branch renamed from %s to %s at %d\n" %
589 589 (path, newpath, revnum))
590 590 path = newpath
591 591 break
592 592 finally:
593 593 stream.close()
594 594
595 595 if not path.startswith(self.rootmodule):
596 596 self.ui.debug('ignoring foreign branch %r\n' % path)
597 597 return None
598 598 return self.revid(dirent.created_rev, path)
599 599
600 600 def reparent(self, module):
601 601 """Reparent the svn transport and return the previous parent."""
602 602 if self.prevmodule == module:
603 603 return module
604 604 svnurl = self.baseurl + urllib.quote(module)
605 605 prevmodule = self.prevmodule
606 606 if prevmodule is None:
607 607 prevmodule = ''
608 608 self.ui.debug("reparent to %s\n" % svnurl)
609 609 svn.ra.reparent(self.ra, svnurl)
610 610 self.prevmodule = module
611 611 return prevmodule
612 612
613 613 def expandpaths(self, rev, paths, parents):
614 614 changed, removed = set(), set()
615 615 copies = {}
616 616
617 617 new_module, revnum = self.revsplit(rev)[1:]
618 618 if new_module != self.module:
619 619 self.module = new_module
620 620 self.reparent(self.module)
621 621
622 622 for path, ent in paths:
623 623 entrypath = self.getrelpath(path)
624 624
625 625 kind = self._checkpath(entrypath, revnum)
626 626 if kind == svn.core.svn_node_file:
627 627 changed.add(self.recode(entrypath))
628 628 if not ent.copyfrom_path or not parents:
629 629 continue
630 630 # Copy sources not in parent revisions cannot be
631 631 # represented, ignore their origin for now
632 632 pmodule, prevnum = self.revsplit(parents[0])[1:]
633 633 if ent.copyfrom_rev < prevnum:
634 634 continue
635 635 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
636 636 if not copyfrom_path:
637 637 continue
638 638 self.ui.debug("copied to %s from %s@%s\n" %
639 639 (entrypath, copyfrom_path, ent.copyfrom_rev))
640 640 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
641 641 elif kind == 0: # gone, but had better be a deleted *file*
642 642 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
643 643 pmodule, prevnum = self.revsplit(parents[0])[1:]
644 644 parentpath = pmodule + "/" + entrypath
645 645 fromkind = self._checkpath(entrypath, prevnum, pmodule)
646 646
647 647 if fromkind == svn.core.svn_node_file:
648 648 removed.add(self.recode(entrypath))
649 649 elif fromkind == svn.core.svn_node_dir:
650 650 oroot = parentpath.strip('/')
651 651 nroot = path.strip('/')
652 children = self._find_children(oroot, prevnum)
653 children = [s.replace(oroot, nroot) for s in children]
654 for child in children:
655 childpath = self.getrelpath("/" + child, pmodule)
652 children = self._listfiles(oroot, prevnum)
653 for childpath in children:
654 childpath = childpath.replace(oroot, nroot)
655 childpath = self.getrelpath("/" + childpath, pmodule)
656 656 if childpath:
657 657 removed.add(self.recode(childpath))
658 658 else:
659 659 self.ui.debug('unknown path in revision %d: %s\n' % \
660 660 (revnum, path))
661 661 elif kind == svn.core.svn_node_dir:
662 662 if ent.action == 'M':
663 663 # If the directory just had a prop change,
664 664 # then we shouldn't need to look for its children.
665 665 continue
666 666 elif ent.action == 'R' and parents:
667 667 # If a directory is replacing a file, mark the previous
668 668 # file as deleted
669 669 pmodule, prevnum = self.revsplit(parents[0])[1:]
670 670 pkind = self._checkpath(entrypath, prevnum, pmodule)
671 671 if pkind == svn.core.svn_node_file:
672 672 removed.add(self.recode(entrypath))
673 673
674 children = sorted(self._find_children(path, revnum))
675 for child in children:
676 # Can we move a child directory and its
677 # parent in the same commit? (probably can). Could
678 # cause problems if instead of revnum -1,
679 # we have to look in (copyfrom_path, revnum - 1)
680 entrypath = self.getrelpath("/" + child)
681 if entrypath:
682 # Need to filter out directories here...
683 kind = self._checkpath(entrypath, revnum)
684 if kind != svn.core.svn_node_dir:
685 changed.add(self.recode(entrypath))
674 children = sorted(self._listfiles(path, revnum))
675 for childpath in children:
676 childpath = self.getrelpath("/" + childpath)
677 if childpath:
678 changed.add(self.recode(childpath))
686 679
687 680 # Handle directory copies
688 681 if not ent.copyfrom_path or not parents:
689 682 continue
690 683 # Copy sources not in parent revisions cannot be
691 684 # represented, ignore their origin for now
692 685 pmodule, prevnum = self.revsplit(parents[0])[1:]
693 686 if ent.copyfrom_rev < prevnum:
694 687 continue
695 688 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
696 689 if not copyfrompath:
697 690 continue
698 691 self.ui.debug("mark %s came from %s:%d\n"
699 692 % (path, copyfrompath, ent.copyfrom_rev))
700 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
693 children = self._listfiles(ent.copyfrom_path, ent.copyfrom_rev)
701 694 children.sort()
702 for child in children:
703 entrypath = self.getrelpath("/" + child, pmodule)
704 if not entrypath:
695 for childpath in children:
696 childpath = self.getrelpath("/" + childpath, pmodule)
697 if not childpath:
705 698 continue
706 copytopath = path + entrypath[len(copyfrompath):]
699 copytopath = path + childpath[len(copyfrompath):]
707 700 copytopath = self.getrelpath(copytopath)
708 copies[self.recode(copytopath)] = self.recode(entrypath)
701 copies[self.recode(copytopath)] = self.recode(childpath)
709 702
710 703 changed.update(removed)
711 704 return (list(changed), removed, copies)
712 705
713 706 def _fetch_revisions(self, from_revnum, to_revnum):
714 707 if from_revnum < to_revnum:
715 708 from_revnum, to_revnum = to_revnum, from_revnum
716 709
717 710 self.child_cset = None
718 711
719 712 def parselogentry(orig_paths, revnum, author, date, message):
720 713 """Return the parsed commit object or None, and True if
721 714 the revision is a branch root.
722 715 """
723 716 self.ui.debug("parsing revision %d (%d changes)\n" %
724 717 (revnum, len(orig_paths)))
725 718
726 719 branched = False
727 720 rev = self.revid(revnum)
728 721 # branch log might return entries for a parent we already have
729 722
730 723 if rev in self.commits or revnum < to_revnum:
731 724 return None, branched
732 725
733 726 parents = []
734 727 # check whether this revision is the start of a branch or part
735 728 # of a branch renaming
736 729 orig_paths = sorted(orig_paths.iteritems())
737 730 root_paths = [(p, e) for p, e in orig_paths
738 731 if self.module.startswith(p)]
739 732 if root_paths:
740 733 path, ent = root_paths[-1]
741 734 if ent.copyfrom_path:
742 735 branched = True
743 736 newpath = ent.copyfrom_path + self.module[len(path):]
744 737 # ent.copyfrom_rev may not be the actual last revision
745 738 previd = self.latest(newpath, ent.copyfrom_rev)
746 739 if previd is not None:
747 740 prevmodule, prevnum = self.revsplit(previd)[1:]
748 741 if prevnum >= self.startrev:
749 742 parents = [previd]
750 743 self.ui.note(
751 744 _('found parent of branch %s at %d: %s\n') %
752 745 (self.module, prevnum, prevmodule))
753 746 else:
754 747 self.ui.debug("no copyfrom path, don't know what to do.\n")
755 748
756 749 paths = []
757 750 # filter out unrelated paths
758 751 for path, ent in orig_paths:
759 752 if self.getrelpath(path) is None:
760 753 continue
761 754 paths.append((path, ent))
762 755
763 756 # Example SVN datetime. Includes microseconds.
764 757 # ISO-8601 conformant
765 758 # '2007-01-04T17:35:00.902377Z'
766 759 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
767 760
768 761 log = message and self.recode(message) or ''
769 762 author = author and self.recode(author) or ''
770 763 try:
771 764 branch = self.module.split("/")[-1]
772 765 if branch == 'trunk':
773 766 branch = ''
774 767 except IndexError:
775 768 branch = None
776 769
777 770 cset = commit(author=author,
778 771 date=util.datestr(date),
779 772 desc=log,
780 773 parents=parents,
781 774 branch=branch,
782 775 rev=rev)
783 776
784 777 self.commits[rev] = cset
785 778 # The parents list is *shared* among self.paths and the
786 779 # commit object. Both will be updated below.
787 780 self.paths[rev] = (paths, cset.parents)
788 781 if self.child_cset and not self.child_cset.parents:
789 782 self.child_cset.parents[:] = [rev]
790 783 self.child_cset = cset
791 784 return cset, branched
792 785
793 786 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
794 787 (self.module, from_revnum, to_revnum))
795 788
796 789 try:
797 790 firstcset = None
798 791 lastonbranch = False
799 792 stream = self._getlog([self.module], from_revnum, to_revnum)
800 793 try:
801 794 for entry in stream:
802 795 paths, revnum, author, date, message = entry
803 796 if revnum < self.startrev:
804 797 lastonbranch = True
805 798 break
806 799 if not paths:
807 800 self.ui.debug('revision %d has no entries\n' % revnum)
808 801 # If we ever leave the loop on an empty
809 802 # revision, do not try to get a parent branch
810 803 lastonbranch = lastonbranch or revnum == 0
811 804 continue
812 805 cset, lastonbranch = parselogentry(paths, revnum, author,
813 806 date, message)
814 807 if cset:
815 808 firstcset = cset
816 809 if lastonbranch:
817 810 break
818 811 finally:
819 812 stream.close()
820 813
821 814 if not lastonbranch and firstcset and not firstcset.parents:
822 815 # The first revision of the sequence (the last fetched one)
823 816 # has invalid parents if not a branch root. Find the parent
824 817 # revision now, if any.
825 818 try:
826 819 firstrevnum = self.revnum(firstcset.rev)
827 820 if firstrevnum > 1:
828 821 latest = self.latest(self.module, firstrevnum - 1)
829 822 if latest:
830 823 firstcset.parents.append(latest)
831 824 except SvnPathNotFound:
832 825 pass
833 826 except SubversionException, (inst, num):
834 827 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
835 828 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
836 829 raise
837 830
838 831 def _getfile(self, file, rev):
839 832 # TODO: ra.get_file transmits the whole file instead of diffs.
840 833 if file in self.removed:
841 834 raise IOError()
842 835 mode = ''
843 836 try:
844 837 new_module, revnum = self.revsplit(rev)[1:]
845 838 if self.module != new_module:
846 839 self.module = new_module
847 840 self.reparent(self.module)
848 841 io = StringIO()
849 842 info = svn.ra.get_file(self.ra, file, revnum, io)
850 843 data = io.getvalue()
851 844 # ra.get_files() seems to keep a reference on the input buffer
852 845 # preventing collection. Release it explicitely.
853 846 io.close()
854 847 if isinstance(info, list):
855 848 info = info[-1]
856 849 mode = ("svn:executable" in info) and 'x' or ''
857 850 mode = ("svn:special" in info) and 'l' or mode
858 851 except SubversionException, e:
859 852 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
860 853 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
861 854 if e.apr_err in notfound: # File not found
862 855 raise IOError()
863 856 raise
864 857 if mode == 'l':
865 858 link_prefix = "link "
866 859 if data.startswith(link_prefix):
867 860 data = data[len(link_prefix):]
868 861 return data, mode
869 862
870 def _find_children(self, path, revnum):
863 def _listfiles(self, path, revnum):
864 """List all files in path at revnum, recursively."""
871 865 path = path.strip('/')
872 866 pool = Pool()
873 867 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
874 return ['%s/%s' % (path, x) for x in
875 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
868 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
869 return [(path + '/' + p) for p, e in entries.iteritems()
870 if e.kind == svn.core.svn_node_file]
876 871
877 872 def getrelpath(self, path, module=None):
878 873 if module is None:
879 874 module = self.module
880 875 # Given the repository url of this wc, say
881 876 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
882 877 # extract the "entry" portion (a relative path) from what
883 878 # svn log --xml says, ie
884 879 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
885 880 # that is to say "tests/PloneTestCase.py"
886 881 if path.startswith(module):
887 882 relative = path.rstrip('/')[len(module):]
888 883 if relative.startswith('/'):
889 884 return relative[1:]
890 885 elif relative == '':
891 886 return relative
892 887
893 888 # The path is outside our tracked tree...
894 889 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
895 890 return None
896 891
897 892 def _checkpath(self, path, revnum, module=None):
898 893 if module is not None:
899 894 prevmodule = self.reparent('')
900 895 path = module + '/' + path
901 896 try:
902 897 # ra.check_path does not like leading slashes very much, it leads
903 898 # to PROPFIND subversion errors
904 899 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
905 900 finally:
906 901 if module is not None:
907 902 self.reparent(prevmodule)
908 903
909 904 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
910 905 strict_node_history=False):
911 906 # Normalize path names, svn >= 1.5 only wants paths relative to
912 907 # supplied URL
913 908 relpaths = []
914 909 for p in paths:
915 910 if not p.startswith('/'):
916 911 p = self.module + '/' + p
917 912 relpaths.append(p.strip('/'))
918 913 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
919 914 strict_node_history]
920 915 arg = encodeargs(args)
921 916 hgexe = util.hgexecutable()
922 917 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
923 918 stdin, stdout = util.popen2(cmd)
924 919 stdin.write(arg)
925 920 try:
926 921 stdin.close()
927 922 except IOError:
928 923 raise util.Abort(_('Mercurial failed to run itself, check'
929 924 ' hg executable is in PATH'))
930 925 return logstream(stdout)
931 926
932 927 pre_revprop_change = '''#!/bin/sh
933 928
934 929 REPOS="$1"
935 930 REV="$2"
936 931 USER="$3"
937 932 PROPNAME="$4"
938 933 ACTION="$5"
939 934
940 935 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
941 936 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
942 937 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
943 938
944 939 echo "Changing prohibited revision property" >&2
945 940 exit 1
946 941 '''
947 942
948 943 class svn_sink(converter_sink, commandline):
949 944 commit_re = re.compile(r'Committed revision (\d+).', re.M)
950 945
951 946 def prerun(self):
952 947 if self.wc:
953 948 os.chdir(self.wc)
954 949
955 950 def postrun(self):
956 951 if self.wc:
957 952 os.chdir(self.cwd)
958 953
959 954 def join(self, name):
960 955 return os.path.join(self.wc, '.svn', name)
961 956
962 957 def revmapfile(self):
963 958 return self.join('hg-shamap')
964 959
965 960 def authorfile(self):
966 961 return self.join('hg-authormap')
967 962
968 963 def __init__(self, ui, path):
969 964 converter_sink.__init__(self, ui, path)
970 965 commandline.__init__(self, ui, 'svn')
971 966 self.delete = []
972 967 self.setexec = []
973 968 self.delexec = []
974 969 self.copies = []
975 970 self.wc = None
976 971 self.cwd = os.getcwd()
977 972
978 973 path = os.path.realpath(path)
979 974
980 975 created = False
981 976 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
982 977 self.wc = path
983 978 self.run0('update')
984 979 else:
985 980 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
986 981
987 982 if os.path.isdir(os.path.dirname(path)):
988 983 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
989 984 ui.status(_('initializing svn repository %r\n') %
990 985 os.path.basename(path))
991 986 commandline(ui, 'svnadmin').run0('create', path)
992 987 created = path
993 988 path = util.normpath(path)
994 989 if not path.startswith('/'):
995 990 path = '/' + path
996 991 path = 'file://' + path
997 992
998 993 ui.status(_('initializing svn working copy %r\n')
999 994 % os.path.basename(wcpath))
1000 995 self.run0('checkout', path, wcpath)
1001 996
1002 997 self.wc = wcpath
1003 998 self.opener = util.opener(self.wc)
1004 999 self.wopener = util.opener(self.wc)
1005 1000 self.childmap = mapfile(ui, self.join('hg-childmap'))
1006 1001 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1007 1002
1008 1003 if created:
1009 1004 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1010 1005 fp = open(hook, 'w')
1011 1006 fp.write(pre_revprop_change)
1012 1007 fp.close()
1013 1008 util.set_flags(hook, False, True)
1014 1009
1015 1010 xport = transport.SvnRaTransport(url=geturl(path))
1016 1011 self.uuid = svn.ra.get_uuid(xport.ra)
1017 1012
1018 1013 def wjoin(self, *names):
1019 1014 return os.path.join(self.wc, *names)
1020 1015
1021 1016 def putfile(self, filename, flags, data):
1022 1017 if 'l' in flags:
1023 1018 self.wopener.symlink(data, filename)
1024 1019 else:
1025 1020 try:
1026 1021 if os.path.islink(self.wjoin(filename)):
1027 1022 os.unlink(filename)
1028 1023 except OSError:
1029 1024 pass
1030 1025 self.wopener(filename, 'w').write(data)
1031 1026
1032 1027 if self.is_exec:
1033 1028 was_exec = self.is_exec(self.wjoin(filename))
1034 1029 else:
1035 1030 # On filesystems not supporting execute-bit, there is no way
1036 1031 # to know if it is set but asking subversion. Setting it
1037 1032 # systematically is just as expensive and much simpler.
1038 1033 was_exec = 'x' not in flags
1039 1034
1040 1035 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1041 1036 if was_exec:
1042 1037 if 'x' not in flags:
1043 1038 self.delexec.append(filename)
1044 1039 else:
1045 1040 if 'x' in flags:
1046 1041 self.setexec.append(filename)
1047 1042
1048 1043 def _copyfile(self, source, dest):
1049 1044 # SVN's copy command pukes if the destination file exists, but
1050 1045 # our copyfile method expects to record a copy that has
1051 1046 # already occurred. Cross the semantic gap.
1052 1047 wdest = self.wjoin(dest)
1053 1048 exists = os.path.exists(wdest)
1054 1049 if exists:
1055 1050 fd, tempname = tempfile.mkstemp(
1056 1051 prefix='hg-copy-', dir=os.path.dirname(wdest))
1057 1052 os.close(fd)
1058 1053 os.unlink(tempname)
1059 1054 os.rename(wdest, tempname)
1060 1055 try:
1061 1056 self.run0('copy', source, dest)
1062 1057 finally:
1063 1058 if exists:
1064 1059 try:
1065 1060 os.unlink(wdest)
1066 1061 except OSError:
1067 1062 pass
1068 1063 os.rename(tempname, wdest)
1069 1064
1070 1065 def dirs_of(self, files):
1071 1066 dirs = set()
1072 1067 for f in files:
1073 1068 if os.path.isdir(self.wjoin(f)):
1074 1069 dirs.add(f)
1075 1070 for i in strutil.rfindall(f, '/'):
1076 1071 dirs.add(f[:i])
1077 1072 return dirs
1078 1073
1079 1074 def add_dirs(self, files):
1080 1075 add_dirs = [d for d in sorted(self.dirs_of(files))
1081 1076 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1082 1077 if add_dirs:
1083 1078 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1084 1079 return add_dirs
1085 1080
1086 1081 def add_files(self, files):
1087 1082 if files:
1088 1083 self.xargs(files, 'add', quiet=True)
1089 1084 return files
1090 1085
1091 1086 def tidy_dirs(self, names):
1092 1087 deleted = []
1093 1088 for d in sorted(self.dirs_of(names), reverse=True):
1094 1089 wd = self.wjoin(d)
1095 1090 if os.listdir(wd) == '.svn':
1096 1091 self.run0('delete', d)
1097 1092 deleted.append(d)
1098 1093 return deleted
1099 1094
1100 1095 def addchild(self, parent, child):
1101 1096 self.childmap[parent] = child
1102 1097
1103 1098 def revid(self, rev):
1104 1099 return u"svn:%s@%s" % (self.uuid, rev)
1105 1100
1106 1101 def putcommit(self, files, copies, parents, commit, source, revmap):
1107 1102 # Apply changes to working copy
1108 1103 for f, v in files:
1109 1104 try:
1110 1105 data = source.getfile(f, v)
1111 1106 except IOError:
1112 1107 self.delete.append(f)
1113 1108 else:
1114 1109 e = source.getmode(f, v)
1115 1110 self.putfile(f, e, data)
1116 1111 if f in copies:
1117 1112 self.copies.append([copies[f], f])
1118 1113 files = [f[0] for f in files]
1119 1114
1120 1115 for parent in parents:
1121 1116 try:
1122 1117 return self.revid(self.childmap[parent])
1123 1118 except KeyError:
1124 1119 pass
1125 1120 entries = set(self.delete)
1126 1121 files = frozenset(files)
1127 1122 entries.update(self.add_dirs(files.difference(entries)))
1128 1123 if self.copies:
1129 1124 for s, d in self.copies:
1130 1125 self._copyfile(s, d)
1131 1126 self.copies = []
1132 1127 if self.delete:
1133 1128 self.xargs(self.delete, 'delete')
1134 1129 self.delete = []
1135 1130 entries.update(self.add_files(files.difference(entries)))
1136 1131 entries.update(self.tidy_dirs(entries))
1137 1132 if self.delexec:
1138 1133 self.xargs(self.delexec, 'propdel', 'svn:executable')
1139 1134 self.delexec = []
1140 1135 if self.setexec:
1141 1136 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1142 1137 self.setexec = []
1143 1138
1144 1139 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1145 1140 fp = os.fdopen(fd, 'w')
1146 1141 fp.write(commit.desc)
1147 1142 fp.close()
1148 1143 try:
1149 1144 output = self.run0('commit',
1150 1145 username=util.shortuser(commit.author),
1151 1146 file=messagefile,
1152 1147 encoding='utf-8')
1153 1148 try:
1154 1149 rev = self.commit_re.search(output).group(1)
1155 1150 except AttributeError:
1156 1151 if not files:
1157 1152 return parents[0]
1158 1153 self.ui.warn(_('unexpected svn output:\n'))
1159 1154 self.ui.warn(output)
1160 1155 raise util.Abort(_('unable to cope with svn output'))
1161 1156 if commit.rev:
1162 1157 self.run('propset', 'hg:convert-rev', commit.rev,
1163 1158 revprop=True, revision=rev)
1164 1159 if commit.branch and commit.branch != 'default':
1165 1160 self.run('propset', 'hg:convert-branch', commit.branch,
1166 1161 revprop=True, revision=rev)
1167 1162 for parent in parents:
1168 1163 self.addchild(parent, rev)
1169 1164 return self.revid(rev)
1170 1165 finally:
1171 1166 os.unlink(messagefile)
1172 1167
1173 1168 def puttags(self, tags):
1174 1169 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now