##// END OF EJS Templates
convert/svn: remove useless sort
Patrick Mezard -
r11133:d7b6aab6 default
parent child Browse files
Show More
@@ -1,1169 +1,1167
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import os
6 6 import re
7 7 import sys
8 8 import cPickle as pickle
9 9 import tempfile
10 10 import urllib
11 11 import urllib2
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 try:
119 119 entry = pickle.load(self._stdout)
120 120 except EOFError:
121 121 raise util.Abort(_('Mercurial failed to run itself, check'
122 122 ' hg executable is in PATH'))
123 123 try:
124 124 orig_paths, revnum, author, date, message = entry
125 125 except:
126 126 if entry is None:
127 127 break
128 128 raise SubversionException("child raised exception", entry)
129 129 yield entry
130 130
131 131 def close(self):
132 132 if self._stdout:
133 133 self._stdout.close()
134 134 self._stdout = None
135 135
136 136
137 137 # Check to see if the given path is a local Subversion repo. Verify this by
138 138 # looking for several svn-specific files and directories in the given
139 139 # directory.
140 140 def filecheck(ui, path, proto):
141 141 for x in ('locks', 'hooks', 'format', 'db'):
142 142 if not os.path.exists(os.path.join(path, x)):
143 143 return False
144 144 return True
145 145
146 146 # Check to see if a given path is the root of an svn repo over http. We verify
147 147 # this by requesting a version-controlled URL we know can't exist and looking
148 148 # for the svn-specific "not found" XML.
149 149 def httpcheck(ui, path, proto):
150 150 try:
151 151 opener = urllib2.build_opener()
152 152 rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
153 153 data = rsp.read()
154 154 except urllib2.HTTPError, inst:
155 155 if inst.code != 404:
156 156 # Except for 404 we cannot know for sure this is not an svn repo
157 157 ui.warn(_('svn: cannot probe remote repository, assume it could '
158 158 'be a subversion repository. Use --source-type if you '
159 159 'know better.\n'))
160 160 return True
161 161 data = inst.fp.read()
162 162 except:
163 163 # Could be urllib2.URLError if the URL is invalid or anything else.
164 164 return False
165 165 return '<m:human-readable errcode="160013">' in data
166 166
167 167 protomap = {'http': httpcheck,
168 168 'https': httpcheck,
169 169 'file': filecheck,
170 170 }
171 171 def issvnurl(ui, url):
172 172 try:
173 173 proto, path = url.split('://', 1)
174 174 if proto == 'file':
175 175 path = urllib.url2pathname(path)
176 176 except ValueError:
177 177 proto = 'file'
178 178 path = os.path.abspath(url)
179 179 if proto == 'file':
180 180 path = path.replace(os.sep, '/')
181 181 check = protomap.get(proto, lambda *args: False)
182 182 while '/' in path:
183 183 if check(ui, path, proto):
184 184 return True
185 185 path = path.rsplit('/', 1)[0]
186 186 return False
187 187
188 188 # SVN conversion code stolen from bzr-svn and tailor
189 189 #
190 190 # Subversion looks like a versioned filesystem, branches structures
191 191 # are defined by conventions and not enforced by the tool. First,
192 192 # we define the potential branches (modules) as "trunk" and "branches"
193 193 # children directories. Revisions are then identified by their
194 194 # module and revision number (and a repository identifier).
195 195 #
196 196 # The revision graph is really a tree (or a forest). By default, a
197 197 # revision parent is the previous revision in the same module. If the
198 198 # module directory is copied/moved from another module then the
199 199 # revision is the module root and its parent the source revision in
200 200 # the parent module. A revision has at most one parent.
201 201 #
202 202 class svn_source(converter_source):
203 203 def __init__(self, ui, url, rev=None):
204 204 super(svn_source, self).__init__(ui, url, rev=rev)
205 205
206 206 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
207 207 (os.path.exists(url) and
208 208 os.path.exists(os.path.join(url, '.svn'))) or
209 209 issvnurl(ui, url)):
210 210 raise NoRepo(_("%s does not look like a Subversion repository")
211 211 % url)
212 212
213 213 try:
214 214 SubversionException
215 215 except NameError:
216 216 raise MissingTool(_('Subversion python bindings could not be loaded'))
217 217
218 218 try:
219 219 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
220 220 if version < (1, 4):
221 221 raise MissingTool(_('Subversion python bindings %d.%d found, '
222 222 '1.4 or later required') % version)
223 223 except AttributeError:
224 224 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
225 225 'or later required'))
226 226
227 227 self.lastrevs = {}
228 228
229 229 latest = None
230 230 try:
231 231 # Support file://path@rev syntax. Useful e.g. to convert
232 232 # deleted branches.
233 233 at = url.rfind('@')
234 234 if at >= 0:
235 235 latest = int(url[at + 1:])
236 236 url = url[:at]
237 237 except ValueError:
238 238 pass
239 239 self.url = geturl(url)
240 240 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
241 241 try:
242 242 self.transport = transport.SvnRaTransport(url=self.url)
243 243 self.ra = self.transport.ra
244 244 self.ctx = self.transport.client
245 245 self.baseurl = svn.ra.get_repos_root(self.ra)
246 246 # Module is either empty or a repository path starting with
247 247 # a slash and not ending with a slash.
248 248 self.module = urllib.unquote(self.url[len(self.baseurl):])
249 249 self.prevmodule = None
250 250 self.rootmodule = self.module
251 251 self.commits = {}
252 252 self.paths = {}
253 253 self.uuid = svn.ra.get_uuid(self.ra)
254 254 except SubversionException:
255 255 ui.traceback()
256 256 raise NoRepo(_("%s does not look like a Subversion repository")
257 257 % self.url)
258 258
259 259 if rev:
260 260 try:
261 261 latest = int(rev)
262 262 except ValueError:
263 263 raise util.Abort(_('svn: revision %s is not an integer') % rev)
264 264
265 265 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
266 266 try:
267 267 self.startrev = int(self.startrev)
268 268 if self.startrev < 0:
269 269 self.startrev = 0
270 270 except ValueError:
271 271 raise util.Abort(_('svn: start revision %s is not an integer')
272 272 % self.startrev)
273 273
274 274 self.head = self.latest(self.module, latest)
275 275 if not self.head:
276 276 raise util.Abort(_('no revision found in module %s')
277 277 % self.module)
278 278 self.last_changed = self.revnum(self.head)
279 279
280 280 self._changescache = None
281 281
282 282 if os.path.exists(os.path.join(url, '.svn/entries')):
283 283 self.wc = url
284 284 else:
285 285 self.wc = None
286 286 self.convertfp = None
287 287
288 288 def setrevmap(self, revmap):
289 289 lastrevs = {}
290 290 for revid in revmap.iterkeys():
291 291 uuid, module, revnum = self.revsplit(revid)
292 292 lastrevnum = lastrevs.setdefault(module, revnum)
293 293 if revnum > lastrevnum:
294 294 lastrevs[module] = revnum
295 295 self.lastrevs = lastrevs
296 296
297 297 def exists(self, path, optrev):
298 298 try:
299 299 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
300 300 optrev, False, self.ctx)
301 301 return True
302 302 except SubversionException:
303 303 return False
304 304
305 305 def getheads(self):
306 306
307 307 def isdir(path, revnum):
308 308 kind = self._checkpath(path, revnum)
309 309 return kind == svn.core.svn_node_dir
310 310
311 311 def getcfgpath(name, rev):
312 312 cfgpath = self.ui.config('convert', 'svn.' + name)
313 313 if cfgpath is not None and cfgpath.strip() == '':
314 314 return None
315 315 path = (cfgpath or name).strip('/')
316 316 if not self.exists(path, rev):
317 317 if cfgpath:
318 318 raise util.Abort(_('expected %s to be at %r, but not found')
319 319 % (name, path))
320 320 return None
321 321 self.ui.note(_('found %s at %r\n') % (name, path))
322 322 return path
323 323
324 324 rev = optrev(self.last_changed)
325 325 oldmodule = ''
326 326 trunk = getcfgpath('trunk', rev)
327 327 self.tags = getcfgpath('tags', rev)
328 328 branches = getcfgpath('branches', rev)
329 329
330 330 # If the project has a trunk or branches, we will extract heads
331 331 # from them. We keep the project root otherwise.
332 332 if trunk:
333 333 oldmodule = self.module or ''
334 334 self.module += '/' + trunk
335 335 self.head = self.latest(self.module, self.last_changed)
336 336 if not self.head:
337 337 raise util.Abort(_('no revision found in module %s')
338 338 % self.module)
339 339
340 340 # First head in the list is the module's head
341 341 self.heads = [self.head]
342 342 if self.tags is not None:
343 343 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
344 344
345 345 # Check if branches bring a few more heads to the list
346 346 if branches:
347 347 rpath = self.url.strip('/')
348 348 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
349 349 rev, False, self.ctx)
350 350 for branch in branchnames.keys():
351 351 module = '%s/%s/%s' % (oldmodule, branches, branch)
352 352 if not isdir(module, self.last_changed):
353 353 continue
354 354 brevid = self.latest(module, self.last_changed)
355 355 if not brevid:
356 356 self.ui.note(_('ignoring empty branch %s\n') % branch)
357 357 continue
358 358 self.ui.note(_('found branch %s at %d\n') %
359 359 (branch, self.revnum(brevid)))
360 360 self.heads.append(brevid)
361 361
362 362 if self.startrev and self.heads:
363 363 if len(self.heads) > 1:
364 364 raise util.Abort(_('svn: start revision is not supported '
365 365 'with more than one branch'))
366 366 revnum = self.revnum(self.heads[0])
367 367 if revnum < self.startrev:
368 368 raise util.Abort(
369 369 _('svn: no revision found after start revision %d')
370 370 % self.startrev)
371 371
372 372 return self.heads
373 373
374 374 def getfile(self, file, rev):
375 375 data, mode = self._getfile(file, rev)
376 376 self.modecache[(file, rev)] = mode
377 377 return data
378 378
379 379 def getmode(self, file, rev):
380 380 return self.modecache[(file, rev)]
381 381
382 382 def getchanges(self, rev):
383 383 if self._changescache and self._changescache[0] == rev:
384 384 return self._changescache[1]
385 385 self._changescache = None
386 386 self.modecache = {}
387 387 (paths, parents) = self.paths[rev]
388 388 if parents:
389 389 files, self.removed, copies = self.expandpaths(rev, paths, parents)
390 390 else:
391 391 # Perform a full checkout on roots
392 392 uuid, module, revnum = self.revsplit(rev)
393 393 entries = svn.client.ls(self.baseurl + urllib.quote(module),
394 394 optrev(revnum), True, self.ctx)
395 395 files = [n for n, e in entries.iteritems()
396 396 if e.kind == svn.core.svn_node_file]
397 397 copies = {}
398 398 self.removed = set()
399 399
400 400 files.sort()
401 401 files = zip(files, [rev] * len(files))
402 402
403 403 # caller caches the result, so free it here to release memory
404 404 del self.paths[rev]
405 405 return (files, copies)
406 406
407 407 def getchangedfiles(self, rev, i):
408 408 changes = self.getchanges(rev)
409 409 self._changescache = (rev, changes)
410 410 return [f[0] for f in changes[0]]
411 411
412 412 def getcommit(self, rev):
413 413 if rev not in self.commits:
414 414 uuid, module, revnum = self.revsplit(rev)
415 415 self.module = module
416 416 self.reparent(module)
417 417 # We assume that:
418 418 # - requests for revisions after "stop" come from the
419 419 # revision graph backward traversal. Cache all of them
420 420 # down to stop, they will be used eventually.
421 421 # - requests for revisions before "stop" come to get
422 422 # isolated branches parents. Just fetch what is needed.
423 423 stop = self.lastrevs.get(module, 0)
424 424 if revnum < stop:
425 425 stop = revnum + 1
426 426 self._fetch_revisions(revnum, stop)
427 427 commit = self.commits[rev]
428 428 # caller caches the result, so free it here to release memory
429 429 del self.commits[rev]
430 430 return commit
431 431
432 432 def gettags(self):
433 433 tags = {}
434 434 if self.tags is None:
435 435 return tags
436 436
437 437 # svn tags are just a convention, project branches left in a
438 438 # 'tags' directory. There is no other relationship than
439 439 # ancestry, which is expensive to discover and makes them hard
440 440 # to update incrementally. Worse, past revisions may be
441 441 # referenced by tags far away in the future, requiring a deep
442 442 # history traversal on every calculation. Current code
443 443 # performs a single backward traversal, tracking moves within
444 444 # the tags directory (tag renaming) and recording a new tag
445 445 # everytime a project is copied from outside the tags
446 446 # directory. It also lists deleted tags, this behaviour may
447 447 # change in the future.
448 448 pendings = []
449 449 tagspath = self.tags
450 450 start = svn.ra.get_latest_revnum(self.ra)
451 451 try:
452 452 for entry in self._getlog([self.tags], start, self.startrev):
453 453 origpaths, revnum, author, date, message = entry
454 454 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
455 455 in origpaths.iteritems() if e.copyfrom_path]
456 456 # Apply moves/copies from more specific to general
457 457 copies.sort(reverse=True)
458 458
459 459 srctagspath = tagspath
460 460 if copies and copies[-1][2] == tagspath:
461 461 # Track tags directory moves
462 462 srctagspath = copies.pop()[0]
463 463
464 464 for source, sourcerev, dest in copies:
465 465 if not dest.startswith(tagspath + '/'):
466 466 continue
467 467 for tag in pendings:
468 468 if tag[0].startswith(dest):
469 469 tagpath = source + tag[0][len(dest):]
470 470 tag[:2] = [tagpath, sourcerev]
471 471 break
472 472 else:
473 473 pendings.append([source, sourcerev, dest])
474 474
475 475 # Filter out tags with children coming from different
476 476 # parts of the repository like:
477 477 # /tags/tag.1 (from /trunk:10)
478 478 # /tags/tag.1/foo (from /branches/foo:12)
479 479 # Here/tags/tag.1 discarded as well as its children.
480 480 # It happens with tools like cvs2svn. Such tags cannot
481 481 # be represented in mercurial.
482 482 addeds = dict((p, e.copyfrom_path) for p, e
483 483 in origpaths.iteritems()
484 484 if e.action == 'A' and e.copyfrom_path)
485 485 badroots = set()
486 486 for destroot in addeds:
487 487 for source, sourcerev, dest in pendings:
488 488 if (not dest.startswith(destroot + '/')
489 489 or source.startswith(addeds[destroot] + '/')):
490 490 continue
491 491 badroots.add(destroot)
492 492 break
493 493
494 494 for badroot in badroots:
495 495 pendings = [p for p in pendings if p[2] != badroot
496 496 and not p[2].startswith(badroot + '/')]
497 497
498 498 # Tell tag renamings from tag creations
499 499 remainings = []
500 500 for source, sourcerev, dest in pendings:
501 501 tagname = dest.split('/')[-1]
502 502 if source.startswith(srctagspath):
503 503 remainings.append([source, sourcerev, tagname])
504 504 continue
505 505 if tagname in tags:
506 506 # Keep the latest tag value
507 507 continue
508 508 # From revision may be fake, get one with changes
509 509 try:
510 510 tagid = self.latest(source, sourcerev)
511 511 if tagid and tagname not in tags:
512 512 tags[tagname] = tagid
513 513 except SvnPathNotFound:
514 514 # It happens when we are following directories
515 515 # we assumed were copied with their parents
516 516 # but were really created in the tag
517 517 # directory.
518 518 pass
519 519 pendings = remainings
520 520 tagspath = srctagspath
521 521
522 522 except SubversionException:
523 523 self.ui.note(_('no tags found at revision %d\n') % start)
524 524 return tags
525 525
526 526 def converted(self, rev, destrev):
527 527 if not self.wc:
528 528 return
529 529 if self.convertfp is None:
530 530 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
531 531 'a')
532 532 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
533 533 self.convertfp.flush()
534 534
535 535 def revid(self, revnum, module=None):
536 536 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
537 537
538 538 def revnum(self, rev):
539 539 return int(rev.split('@')[-1])
540 540
541 541 def revsplit(self, rev):
542 542 url, revnum = rev.rsplit('@', 1)
543 543 revnum = int(revnum)
544 544 parts = url.split('/', 1)
545 545 uuid = parts.pop(0)[4:]
546 546 mod = ''
547 547 if parts:
548 548 mod = '/' + parts[0]
549 549 return uuid, mod, revnum
550 550
551 551 def latest(self, path, stop=0):
552 552 """Find the latest revid affecting path, up to stop. It may return
553 553 a revision in a different module, since a branch may be moved without
554 554 a change being reported. Return None if computed module does not
555 555 belong to rootmodule subtree.
556 556 """
557 557 if not path.startswith(self.rootmodule):
558 558 # Requests on foreign branches may be forbidden at server level
559 559 self.ui.debug('ignoring foreign branch %r\n' % path)
560 560 return None
561 561
562 562 if not stop:
563 563 stop = svn.ra.get_latest_revnum(self.ra)
564 564 try:
565 565 prevmodule = self.reparent('')
566 566 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
567 567 self.reparent(prevmodule)
568 568 except SubversionException:
569 569 dirent = None
570 570 if not dirent:
571 571 raise SvnPathNotFound(_('%s not found up to revision %d')
572 572 % (path, stop))
573 573
574 574 # stat() gives us the previous revision on this line of
575 575 # development, but it might be in *another module*. Fetch the
576 576 # log and detect renames down to the latest revision.
577 577 stream = self._getlog([path], stop, dirent.created_rev)
578 578 try:
579 579 for entry in stream:
580 580 paths, revnum, author, date, message = entry
581 581 if revnum <= dirent.created_rev:
582 582 break
583 583
584 584 for p in paths:
585 585 if not path.startswith(p) or not paths[p].copyfrom_path:
586 586 continue
587 587 newpath = paths[p].copyfrom_path + path[len(p):]
588 588 self.ui.debug("branch renamed from %s to %s at %d\n" %
589 589 (path, newpath, revnum))
590 590 path = newpath
591 591 break
592 592 finally:
593 593 stream.close()
594 594
595 595 if not path.startswith(self.rootmodule):
596 596 self.ui.debug('ignoring foreign branch %r\n' % path)
597 597 return None
598 598 return self.revid(dirent.created_rev, path)
599 599
600 600 def reparent(self, module):
601 601 """Reparent the svn transport and return the previous parent."""
602 602 if self.prevmodule == module:
603 603 return module
604 604 svnurl = self.baseurl + urllib.quote(module)
605 605 prevmodule = self.prevmodule
606 606 if prevmodule is None:
607 607 prevmodule = ''
608 608 self.ui.debug("reparent to %s\n" % svnurl)
609 609 svn.ra.reparent(self.ra, svnurl)
610 610 self.prevmodule = module
611 611 return prevmodule
612 612
613 613 def expandpaths(self, rev, paths, parents):
614 614 changed, removed = set(), set()
615 615 copies = {}
616 616
617 617 new_module, revnum = self.revsplit(rev)[1:]
618 618 if new_module != self.module:
619 619 self.module = new_module
620 620 self.reparent(self.module)
621 621
622 622 for path, ent in paths:
623 623 entrypath = self.getrelpath(path)
624 624
625 625 kind = self._checkpath(entrypath, revnum)
626 626 if kind == svn.core.svn_node_file:
627 627 changed.add(self.recode(entrypath))
628 628 if not ent.copyfrom_path or not parents:
629 629 continue
630 630 # Copy sources not in parent revisions cannot be
631 631 # represented, ignore their origin for now
632 632 pmodule, prevnum = self.revsplit(parents[0])[1:]
633 633 if ent.copyfrom_rev < prevnum:
634 634 continue
635 635 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
636 636 if not copyfrom_path:
637 637 continue
638 638 self.ui.debug("copied to %s from %s@%s\n" %
639 639 (entrypath, copyfrom_path, ent.copyfrom_rev))
640 640 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
641 641 elif kind == 0: # gone, but had better be a deleted *file*
642 642 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
643 643 pmodule, prevnum = self.revsplit(parents[0])[1:]
644 644 parentpath = pmodule + "/" + entrypath
645 645 fromkind = self._checkpath(entrypath, prevnum, pmodule)
646 646
647 647 if fromkind == svn.core.svn_node_file:
648 648 removed.add(self.recode(entrypath))
649 649 elif fromkind == svn.core.svn_node_dir:
650 650 oroot = parentpath.strip('/')
651 651 nroot = path.strip('/')
652 children = self._listfiles(oroot, prevnum)
652 children = self._iterfiles(oroot, prevnum)
653 653 for childpath in children:
654 654 childpath = childpath.replace(oroot, nroot)
655 655 childpath = self.getrelpath("/" + childpath, pmodule)
656 656 if childpath:
657 657 removed.add(self.recode(childpath))
658 658 else:
659 659 self.ui.debug('unknown path in revision %d: %s\n' % \
660 660 (revnum, path))
661 661 elif kind == svn.core.svn_node_dir:
662 662 if ent.action == 'M':
663 663 # If the directory just had a prop change,
664 664 # then we shouldn't need to look for its children.
665 665 continue
666 666 elif ent.action == 'R' and parents:
667 667 # If a directory is replacing a file, mark the previous
668 668 # file as deleted
669 669 pmodule, prevnum = self.revsplit(parents[0])[1:]
670 670 pkind = self._checkpath(entrypath, prevnum, pmodule)
671 671 if pkind == svn.core.svn_node_file:
672 672 removed.add(self.recode(entrypath))
673 673
674 children = sorted(self._listfiles(path, revnum))
675 for childpath in children:
674 for childpath in self._iterfiles(path, revnum):
676 675 childpath = self.getrelpath("/" + childpath)
677 676 if childpath:
678 677 changed.add(self.recode(childpath))
679 678
680 679 # Handle directory copies
681 680 if not ent.copyfrom_path or not parents:
682 681 continue
683 682 # Copy sources not in parent revisions cannot be
684 683 # represented, ignore their origin for now
685 684 pmodule, prevnum = self.revsplit(parents[0])[1:]
686 685 if ent.copyfrom_rev < prevnum:
687 686 continue
688 687 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
689 688 if not copyfrompath:
690 689 continue
691 690 self.ui.debug("mark %s came from %s:%d\n"
692 691 % (path, copyfrompath, ent.copyfrom_rev))
693 children = self._listfiles(ent.copyfrom_path, ent.copyfrom_rev)
694 children.sort()
692 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
695 693 for childpath in children:
696 694 childpath = self.getrelpath("/" + childpath, pmodule)
697 695 if not childpath:
698 696 continue
699 697 copytopath = path + childpath[len(copyfrompath):]
700 698 copytopath = self.getrelpath(copytopath)
701 699 copies[self.recode(copytopath)] = self.recode(childpath)
702 700
703 701 changed.update(removed)
704 702 return (list(changed), removed, copies)
705 703
706 704 def _fetch_revisions(self, from_revnum, to_revnum):
707 705 if from_revnum < to_revnum:
708 706 from_revnum, to_revnum = to_revnum, from_revnum
709 707
710 708 self.child_cset = None
711 709
712 710 def parselogentry(orig_paths, revnum, author, date, message):
713 711 """Return the parsed commit object or None, and True if
714 712 the revision is a branch root.
715 713 """
716 714 self.ui.debug("parsing revision %d (%d changes)\n" %
717 715 (revnum, len(orig_paths)))
718 716
719 717 branched = False
720 718 rev = self.revid(revnum)
721 719 # branch log might return entries for a parent we already have
722 720
723 721 if rev in self.commits or revnum < to_revnum:
724 722 return None, branched
725 723
726 724 parents = []
727 725 # check whether this revision is the start of a branch or part
728 726 # of a branch renaming
729 727 orig_paths = sorted(orig_paths.iteritems())
730 728 root_paths = [(p, e) for p, e in orig_paths
731 729 if self.module.startswith(p)]
732 730 if root_paths:
733 731 path, ent = root_paths[-1]
734 732 if ent.copyfrom_path:
735 733 branched = True
736 734 newpath = ent.copyfrom_path + self.module[len(path):]
737 735 # ent.copyfrom_rev may not be the actual last revision
738 736 previd = self.latest(newpath, ent.copyfrom_rev)
739 737 if previd is not None:
740 738 prevmodule, prevnum = self.revsplit(previd)[1:]
741 739 if prevnum >= self.startrev:
742 740 parents = [previd]
743 741 self.ui.note(
744 742 _('found parent of branch %s at %d: %s\n') %
745 743 (self.module, prevnum, prevmodule))
746 744 else:
747 745 self.ui.debug("no copyfrom path, don't know what to do.\n")
748 746
749 747 paths = []
750 748 # filter out unrelated paths
751 749 for path, ent in orig_paths:
752 750 if self.getrelpath(path) is None:
753 751 continue
754 752 paths.append((path, ent))
755 753
756 754 # Example SVN datetime. Includes microseconds.
757 755 # ISO-8601 conformant
758 756 # '2007-01-04T17:35:00.902377Z'
759 757 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
760 758
761 759 log = message and self.recode(message) or ''
762 760 author = author and self.recode(author) or ''
763 761 try:
764 762 branch = self.module.split("/")[-1]
765 763 if branch == 'trunk':
766 764 branch = ''
767 765 except IndexError:
768 766 branch = None
769 767
770 768 cset = commit(author=author,
771 769 date=util.datestr(date),
772 770 desc=log,
773 771 parents=parents,
774 772 branch=branch,
775 773 rev=rev)
776 774
777 775 self.commits[rev] = cset
778 776 # The parents list is *shared* among self.paths and the
779 777 # commit object. Both will be updated below.
780 778 self.paths[rev] = (paths, cset.parents)
781 779 if self.child_cset and not self.child_cset.parents:
782 780 self.child_cset.parents[:] = [rev]
783 781 self.child_cset = cset
784 782 return cset, branched
785 783
786 784 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
787 785 (self.module, from_revnum, to_revnum))
788 786
789 787 try:
790 788 firstcset = None
791 789 lastonbranch = False
792 790 stream = self._getlog([self.module], from_revnum, to_revnum)
793 791 try:
794 792 for entry in stream:
795 793 paths, revnum, author, date, message = entry
796 794 if revnum < self.startrev:
797 795 lastonbranch = True
798 796 break
799 797 if not paths:
800 798 self.ui.debug('revision %d has no entries\n' % revnum)
801 799 # If we ever leave the loop on an empty
802 800 # revision, do not try to get a parent branch
803 801 lastonbranch = lastonbranch or revnum == 0
804 802 continue
805 803 cset, lastonbranch = parselogentry(paths, revnum, author,
806 804 date, message)
807 805 if cset:
808 806 firstcset = cset
809 807 if lastonbranch:
810 808 break
811 809 finally:
812 810 stream.close()
813 811
814 812 if not lastonbranch and firstcset and not firstcset.parents:
815 813 # The first revision of the sequence (the last fetched one)
816 814 # has invalid parents if not a branch root. Find the parent
817 815 # revision now, if any.
818 816 try:
819 817 firstrevnum = self.revnum(firstcset.rev)
820 818 if firstrevnum > 1:
821 819 latest = self.latest(self.module, firstrevnum - 1)
822 820 if latest:
823 821 firstcset.parents.append(latest)
824 822 except SvnPathNotFound:
825 823 pass
826 824 except SubversionException, (inst, num):
827 825 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
828 826 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
829 827 raise
830 828
831 829 def _getfile(self, file, rev):
832 830 # TODO: ra.get_file transmits the whole file instead of diffs.
833 831 if file in self.removed:
834 832 raise IOError()
835 833 mode = ''
836 834 try:
837 835 new_module, revnum = self.revsplit(rev)[1:]
838 836 if self.module != new_module:
839 837 self.module = new_module
840 838 self.reparent(self.module)
841 839 io = StringIO()
842 840 info = svn.ra.get_file(self.ra, file, revnum, io)
843 841 data = io.getvalue()
844 842 # ra.get_files() seems to keep a reference on the input buffer
845 843 # preventing collection. Release it explicitely.
846 844 io.close()
847 845 if isinstance(info, list):
848 846 info = info[-1]
849 847 mode = ("svn:executable" in info) and 'x' or ''
850 848 mode = ("svn:special" in info) and 'l' or mode
851 849 except SubversionException, e:
852 850 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
853 851 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
854 852 if e.apr_err in notfound: # File not found
855 853 raise IOError()
856 854 raise
857 855 if mode == 'l':
858 856 link_prefix = "link "
859 857 if data.startswith(link_prefix):
860 858 data = data[len(link_prefix):]
861 859 return data, mode
862 860
863 def _listfiles(self, path, revnum):
864 """List all files in path at revnum, recursively."""
861 def _iterfiles(self, path, revnum):
862 """Enumerate all files in path at revnum, recursively."""
865 863 path = path.strip('/')
866 864 pool = Pool()
867 865 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
868 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
869 return [(path + '/' + p) for p, e in entries.iteritems()
870 if e.kind == svn.core.svn_node_file]
866 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
867 return ((path + '/' + p) for p, e in entries.iteritems()
868 if e.kind == svn.core.svn_node_file)
871 869
872 870 def getrelpath(self, path, module=None):
873 871 if module is None:
874 872 module = self.module
875 873 # Given the repository url of this wc, say
876 874 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
877 875 # extract the "entry" portion (a relative path) from what
878 876 # svn log --xml says, ie
879 877 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
880 878 # that is to say "tests/PloneTestCase.py"
881 879 if path.startswith(module):
882 880 relative = path.rstrip('/')[len(module):]
883 881 if relative.startswith('/'):
884 882 return relative[1:]
885 883 elif relative == '':
886 884 return relative
887 885
888 886 # The path is outside our tracked tree...
889 887 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
890 888 return None
891 889
892 890 def _checkpath(self, path, revnum, module=None):
893 891 if module is not None:
894 892 prevmodule = self.reparent('')
895 893 path = module + '/' + path
896 894 try:
897 895 # ra.check_path does not like leading slashes very much, it leads
898 896 # to PROPFIND subversion errors
899 897 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
900 898 finally:
901 899 if module is not None:
902 900 self.reparent(prevmodule)
903 901
904 902 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
905 903 strict_node_history=False):
906 904 # Normalize path names, svn >= 1.5 only wants paths relative to
907 905 # supplied URL
908 906 relpaths = []
909 907 for p in paths:
910 908 if not p.startswith('/'):
911 909 p = self.module + '/' + p
912 910 relpaths.append(p.strip('/'))
913 911 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
914 912 strict_node_history]
915 913 arg = encodeargs(args)
916 914 hgexe = util.hgexecutable()
917 915 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
918 916 stdin, stdout = util.popen2(cmd)
919 917 stdin.write(arg)
920 918 try:
921 919 stdin.close()
922 920 except IOError:
923 921 raise util.Abort(_('Mercurial failed to run itself, check'
924 922 ' hg executable is in PATH'))
925 923 return logstream(stdout)
926 924
927 925 pre_revprop_change = '''#!/bin/sh
928 926
929 927 REPOS="$1"
930 928 REV="$2"
931 929 USER="$3"
932 930 PROPNAME="$4"
933 931 ACTION="$5"
934 932
935 933 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
936 934 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
937 935 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
938 936
939 937 echo "Changing prohibited revision property" >&2
940 938 exit 1
941 939 '''
942 940
943 941 class svn_sink(converter_sink, commandline):
944 942 commit_re = re.compile(r'Committed revision (\d+).', re.M)
945 943
946 944 def prerun(self):
947 945 if self.wc:
948 946 os.chdir(self.wc)
949 947
950 948 def postrun(self):
951 949 if self.wc:
952 950 os.chdir(self.cwd)
953 951
954 952 def join(self, name):
955 953 return os.path.join(self.wc, '.svn', name)
956 954
957 955 def revmapfile(self):
958 956 return self.join('hg-shamap')
959 957
960 958 def authorfile(self):
961 959 return self.join('hg-authormap')
962 960
963 961 def __init__(self, ui, path):
964 962 converter_sink.__init__(self, ui, path)
965 963 commandline.__init__(self, ui, 'svn')
966 964 self.delete = []
967 965 self.setexec = []
968 966 self.delexec = []
969 967 self.copies = []
970 968 self.wc = None
971 969 self.cwd = os.getcwd()
972 970
973 971 path = os.path.realpath(path)
974 972
975 973 created = False
976 974 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
977 975 self.wc = path
978 976 self.run0('update')
979 977 else:
980 978 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
981 979
982 980 if os.path.isdir(os.path.dirname(path)):
983 981 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
984 982 ui.status(_('initializing svn repository %r\n') %
985 983 os.path.basename(path))
986 984 commandline(ui, 'svnadmin').run0('create', path)
987 985 created = path
988 986 path = util.normpath(path)
989 987 if not path.startswith('/'):
990 988 path = '/' + path
991 989 path = 'file://' + path
992 990
993 991 ui.status(_('initializing svn working copy %r\n')
994 992 % os.path.basename(wcpath))
995 993 self.run0('checkout', path, wcpath)
996 994
997 995 self.wc = wcpath
998 996 self.opener = util.opener(self.wc)
999 997 self.wopener = util.opener(self.wc)
1000 998 self.childmap = mapfile(ui, self.join('hg-childmap'))
1001 999 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1002 1000
1003 1001 if created:
1004 1002 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1005 1003 fp = open(hook, 'w')
1006 1004 fp.write(pre_revprop_change)
1007 1005 fp.close()
1008 1006 util.set_flags(hook, False, True)
1009 1007
1010 1008 xport = transport.SvnRaTransport(url=geturl(path))
1011 1009 self.uuid = svn.ra.get_uuid(xport.ra)
1012 1010
1013 1011 def wjoin(self, *names):
1014 1012 return os.path.join(self.wc, *names)
1015 1013
1016 1014 def putfile(self, filename, flags, data):
1017 1015 if 'l' in flags:
1018 1016 self.wopener.symlink(data, filename)
1019 1017 else:
1020 1018 try:
1021 1019 if os.path.islink(self.wjoin(filename)):
1022 1020 os.unlink(filename)
1023 1021 except OSError:
1024 1022 pass
1025 1023 self.wopener(filename, 'w').write(data)
1026 1024
1027 1025 if self.is_exec:
1028 1026 was_exec = self.is_exec(self.wjoin(filename))
1029 1027 else:
1030 1028 # On filesystems not supporting execute-bit, there is no way
1031 1029 # to know if it is set but asking subversion. Setting it
1032 1030 # systematically is just as expensive and much simpler.
1033 1031 was_exec = 'x' not in flags
1034 1032
1035 1033 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1036 1034 if was_exec:
1037 1035 if 'x' not in flags:
1038 1036 self.delexec.append(filename)
1039 1037 else:
1040 1038 if 'x' in flags:
1041 1039 self.setexec.append(filename)
1042 1040
1043 1041 def _copyfile(self, source, dest):
1044 1042 # SVN's copy command pukes if the destination file exists, but
1045 1043 # our copyfile method expects to record a copy that has
1046 1044 # already occurred. Cross the semantic gap.
1047 1045 wdest = self.wjoin(dest)
1048 1046 exists = os.path.exists(wdest)
1049 1047 if exists:
1050 1048 fd, tempname = tempfile.mkstemp(
1051 1049 prefix='hg-copy-', dir=os.path.dirname(wdest))
1052 1050 os.close(fd)
1053 1051 os.unlink(tempname)
1054 1052 os.rename(wdest, tempname)
1055 1053 try:
1056 1054 self.run0('copy', source, dest)
1057 1055 finally:
1058 1056 if exists:
1059 1057 try:
1060 1058 os.unlink(wdest)
1061 1059 except OSError:
1062 1060 pass
1063 1061 os.rename(tempname, wdest)
1064 1062
1065 1063 def dirs_of(self, files):
1066 1064 dirs = set()
1067 1065 for f in files:
1068 1066 if os.path.isdir(self.wjoin(f)):
1069 1067 dirs.add(f)
1070 1068 for i in strutil.rfindall(f, '/'):
1071 1069 dirs.add(f[:i])
1072 1070 return dirs
1073 1071
1074 1072 def add_dirs(self, files):
1075 1073 add_dirs = [d for d in sorted(self.dirs_of(files))
1076 1074 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1077 1075 if add_dirs:
1078 1076 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1079 1077 return add_dirs
1080 1078
1081 1079 def add_files(self, files):
1082 1080 if files:
1083 1081 self.xargs(files, 'add', quiet=True)
1084 1082 return files
1085 1083
1086 1084 def tidy_dirs(self, names):
1087 1085 deleted = []
1088 1086 for d in sorted(self.dirs_of(names), reverse=True):
1089 1087 wd = self.wjoin(d)
1090 1088 if os.listdir(wd) == '.svn':
1091 1089 self.run0('delete', d)
1092 1090 deleted.append(d)
1093 1091 return deleted
1094 1092
1095 1093 def addchild(self, parent, child):
1096 1094 self.childmap[parent] = child
1097 1095
1098 1096 def revid(self, rev):
1099 1097 return u"svn:%s@%s" % (self.uuid, rev)
1100 1098
1101 1099 def putcommit(self, files, copies, parents, commit, source, revmap):
1102 1100 # Apply changes to working copy
1103 1101 for f, v in files:
1104 1102 try:
1105 1103 data = source.getfile(f, v)
1106 1104 except IOError:
1107 1105 self.delete.append(f)
1108 1106 else:
1109 1107 e = source.getmode(f, v)
1110 1108 self.putfile(f, e, data)
1111 1109 if f in copies:
1112 1110 self.copies.append([copies[f], f])
1113 1111 files = [f[0] for f in files]
1114 1112
1115 1113 for parent in parents:
1116 1114 try:
1117 1115 return self.revid(self.childmap[parent])
1118 1116 except KeyError:
1119 1117 pass
1120 1118 entries = set(self.delete)
1121 1119 files = frozenset(files)
1122 1120 entries.update(self.add_dirs(files.difference(entries)))
1123 1121 if self.copies:
1124 1122 for s, d in self.copies:
1125 1123 self._copyfile(s, d)
1126 1124 self.copies = []
1127 1125 if self.delete:
1128 1126 self.xargs(self.delete, 'delete')
1129 1127 self.delete = []
1130 1128 entries.update(self.add_files(files.difference(entries)))
1131 1129 entries.update(self.tidy_dirs(entries))
1132 1130 if self.delexec:
1133 1131 self.xargs(self.delexec, 'propdel', 'svn:executable')
1134 1132 self.delexec = []
1135 1133 if self.setexec:
1136 1134 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1137 1135 self.setexec = []
1138 1136
1139 1137 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1140 1138 fp = os.fdopen(fd, 'w')
1141 1139 fp.write(commit.desc)
1142 1140 fp.close()
1143 1141 try:
1144 1142 output = self.run0('commit',
1145 1143 username=util.shortuser(commit.author),
1146 1144 file=messagefile,
1147 1145 encoding='utf-8')
1148 1146 try:
1149 1147 rev = self.commit_re.search(output).group(1)
1150 1148 except AttributeError:
1151 1149 if not files:
1152 1150 return parents[0]
1153 1151 self.ui.warn(_('unexpected svn output:\n'))
1154 1152 self.ui.warn(output)
1155 1153 raise util.Abort(_('unable to cope with svn output'))
1156 1154 if commit.rev:
1157 1155 self.run('propset', 'hg:convert-rev', commit.rev,
1158 1156 revprop=True, revision=rev)
1159 1157 if commit.branch and commit.branch != 'default':
1160 1158 self.run('propset', 'hg:convert-branch', commit.branch,
1161 1159 revprop=True, revision=rev)
1162 1160 for parent in parents:
1163 1161 self.addchild(parent, rev)
1164 1162 return self.revid(rev)
1165 1163 finally:
1166 1164 os.unlink(messagefile)
1167 1165
1168 1166 def puttags(self, tags):
1169 1167 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now