##// END OF EJS Templates
convert/svn: remove a useless recode() call in deletion path
Patrick Mezard -
r8883:ede2247e default
parent child Browse files
Show More
@@ -1,1209 +1,1203 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import locale
6 6 import os
7 7 import re
8 8 import sys
9 9 import cPickle as pickle
10 10 import tempfile
11 11 import urllib
12 12
13 13 from mercurial import strutil, util
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 return 'file://%s' % urllib.quote(path)
54 54 return path
55 55
56 56 def optrev(number):
57 57 optrev = svn.core.svn_opt_revision_t()
58 58 optrev.kind = svn.core.svn_opt_revision_number
59 59 optrev.value.number = number
60 60 return optrev
61 61
62 62 class changedpath(object):
63 63 def __init__(self, p):
64 64 self.copyfrom_path = p.copyfrom_path
65 65 self.copyfrom_rev = p.copyfrom_rev
66 66 self.action = p.action
67 67
68 68 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
69 69 strict_node_history=False):
70 70 protocol = -1
71 71 def receiver(orig_paths, revnum, author, date, message, pool):
72 72 if orig_paths is not None:
73 73 for k, v in orig_paths.iteritems():
74 74 orig_paths[k] = changedpath(v)
75 75 pickle.dump((orig_paths, revnum, author, date, message),
76 76 fp, protocol)
77 77
78 78 try:
79 79 # Use an ra of our own so that our parent can consume
80 80 # our results without confusing the server.
81 81 t = transport.SvnRaTransport(url=url)
82 82 svn.ra.get_log(t.ra, paths, start, end, limit,
83 83 discover_changed_paths,
84 84 strict_node_history,
85 85 receiver)
86 86 except SubversionException, (inst, num):
87 87 pickle.dump(num, fp, protocol)
88 88 except IOError:
89 89 # Caller may interrupt the iteration
90 90 pickle.dump(None, fp, protocol)
91 91 else:
92 92 pickle.dump(None, fp, protocol)
93 93 fp.close()
94 94 # With large history, cleanup process goes crazy and suddenly
95 95 # consumes *huge* amount of memory. The output file being closed,
96 96 # there is no need for clean termination.
97 97 os._exit(0)
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream(object):
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129
130 130 # Check to see if the given path is a local Subversion repo. Verify this by
131 131 # looking for several svn-specific files and directories in the given
132 132 # directory.
133 133 def filecheck(path, proto):
134 134 for x in ('locks', 'hooks', 'format', 'db', ):
135 135 if not os.path.exists(os.path.join(path, x)):
136 136 return False
137 137 return True
138 138
139 139 # Check to see if a given path is the root of an svn repo over http. We verify
140 140 # this by requesting a version-controlled URL we know can't exist and looking
141 141 # for the svn-specific "not found" XML.
142 142 def httpcheck(path, proto):
143 143 return ('<m:human-readable errcode="160013">' in
144 144 urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
145 145
146 146 protomap = {'http': httpcheck,
147 147 'https': httpcheck,
148 148 'file': filecheck,
149 149 }
150 150 def issvnurl(url):
151 151 try:
152 152 proto, path = url.split('://', 1)
153 153 path = urllib.url2pathname(path)
154 154 except ValueError:
155 155 proto = 'file'
156 156 path = os.path.abspath(url)
157 157 path = path.replace(os.sep, '/')
158 158 check = protomap.get(proto, lambda p, p2: False)
159 159 while '/' in path:
160 160 if check(path, proto):
161 161 return True
162 162 path = path.rsplit('/', 1)[0]
163 163 return False
164 164
165 165 # SVN conversion code stolen from bzr-svn and tailor
166 166 #
167 167 # Subversion looks like a versioned filesystem, branches structures
168 168 # are defined by conventions and not enforced by the tool. First,
169 169 # we define the potential branches (modules) as "trunk" and "branches"
170 170 # children directories. Revisions are then identified by their
171 171 # module and revision number (and a repository identifier).
172 172 #
173 173 # The revision graph is really a tree (or a forest). By default, a
174 174 # revision parent is the previous revision in the same module. If the
175 175 # module directory is copied/moved from another module then the
176 176 # revision is the module root and its parent the source revision in
177 177 # the parent module. A revision has at most one parent.
178 178 #
179 179 class svn_source(converter_source):
180 180 def __init__(self, ui, url, rev=None):
181 181 super(svn_source, self).__init__(ui, url, rev=rev)
182 182
183 183 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
184 184 (os.path.exists(url) and
185 185 os.path.exists(os.path.join(url, '.svn'))) or
186 186 issvnurl(url)):
187 187 raise NoRepo("%s does not look like a Subversion repo" % url)
188 188
189 189 try:
190 190 SubversionException
191 191 except NameError:
192 192 raise MissingTool(_('Subversion python bindings could not be loaded'))
193 193
194 194 try:
195 195 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
196 196 if version < (1, 4):
197 197 raise MissingTool(_('Subversion python bindings %d.%d found, '
198 198 '1.4 or later required') % version)
199 199 except AttributeError:
200 200 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
201 201 'or later required'))
202 202
203 203 self.lastrevs = {}
204 204
205 205 latest = None
206 206 try:
207 207 # Support file://path@rev syntax. Useful e.g. to convert
208 208 # deleted branches.
209 209 at = url.rfind('@')
210 210 if at >= 0:
211 211 latest = int(url[at+1:])
212 212 url = url[:at]
213 213 except ValueError:
214 214 pass
215 215 self.url = geturl(url)
216 216 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
217 217 try:
218 218 self.transport = transport.SvnRaTransport(url=self.url)
219 219 self.ra = self.transport.ra
220 220 self.ctx = self.transport.client
221 221 self.baseurl = svn.ra.get_repos_root(self.ra)
222 222 # Module is either empty or a repository path starting with
223 223 # a slash and not ending with a slash.
224 224 self.module = urllib.unquote(self.url[len(self.baseurl):])
225 225 self.prevmodule = None
226 226 self.rootmodule = self.module
227 227 self.commits = {}
228 228 self.paths = {}
229 229 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
230 230 except SubversionException:
231 231 ui.traceback()
232 232 raise NoRepo("%s does not look like a Subversion repo" % self.url)
233 233
234 234 if rev:
235 235 try:
236 236 latest = int(rev)
237 237 except ValueError:
238 238 raise util.Abort(_('svn: revision %s is not an integer') % rev)
239 239
240 240 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
241 241 try:
242 242 self.startrev = int(self.startrev)
243 243 if self.startrev < 0:
244 244 self.startrev = 0
245 245 except ValueError:
246 246 raise util.Abort(_('svn: start revision %s is not an integer')
247 247 % self.startrev)
248 248
249 249 try:
250 250 self.get_blacklist()
251 251 except IOError:
252 252 pass
253 253
254 254 self.head = self.latest(self.module, latest)
255 255 if not self.head:
256 256 raise util.Abort(_('no revision found in module %s') %
257 257 self.module.encode(self.encoding))
258 258 self.last_changed = self.revnum(self.head)
259 259
260 260 self._changescache = None
261 261
262 262 if os.path.exists(os.path.join(url, '.svn/entries')):
263 263 self.wc = url
264 264 else:
265 265 self.wc = None
266 266 self.convertfp = None
267 267
268 268 def setrevmap(self, revmap):
269 269 lastrevs = {}
270 270 for revid in revmap.iterkeys():
271 271 uuid, module, revnum = self.revsplit(revid)
272 272 lastrevnum = lastrevs.setdefault(module, revnum)
273 273 if revnum > lastrevnum:
274 274 lastrevs[module] = revnum
275 275 self.lastrevs = lastrevs
276 276
277 277 def exists(self, path, optrev):
278 278 try:
279 279 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
280 280 optrev, False, self.ctx)
281 281 return True
282 282 except SubversionException:
283 283 return False
284 284
285 285 def getheads(self):
286 286
287 287 def isdir(path, revnum):
288 288 kind = self._checkpath(path, revnum)
289 289 return kind == svn.core.svn_node_dir
290 290
291 291 def getcfgpath(name, rev):
292 292 cfgpath = self.ui.config('convert', 'svn.' + name)
293 293 if cfgpath is not None and cfgpath.strip() == '':
294 294 return None
295 295 path = (cfgpath or name).strip('/')
296 296 if not self.exists(path, rev):
297 297 if cfgpath:
298 298 raise util.Abort(_('expected %s to be at %r, but not found')
299 299 % (name, path))
300 300 return None
301 301 self.ui.note(_('found %s at %r\n') % (name, path))
302 302 return path
303 303
304 304 rev = optrev(self.last_changed)
305 305 oldmodule = ''
306 306 trunk = getcfgpath('trunk', rev)
307 307 self.tags = getcfgpath('tags', rev)
308 308 branches = getcfgpath('branches', rev)
309 309
310 310 # If the project has a trunk or branches, we will extract heads
311 311 # from them. We keep the project root otherwise.
312 312 if trunk:
313 313 oldmodule = self.module or ''
314 314 self.module += '/' + trunk
315 315 self.head = self.latest(self.module, self.last_changed)
316 316 if not self.head:
317 317 raise util.Abort(_('no revision found in module %s') %
318 318 self.module.encode(self.encoding))
319 319
320 320 # First head in the list is the module's head
321 321 self.heads = [self.head]
322 322 if self.tags is not None:
323 323 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
324 324
325 325 # Check if branches bring a few more heads to the list
326 326 if branches:
327 327 rpath = self.url.strip('/')
328 328 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
329 329 rev, False, self.ctx)
330 330 for branch in branchnames.keys():
331 331 module = '%s/%s/%s' % (oldmodule, branches, branch)
332 332 if not isdir(module, self.last_changed):
333 333 continue
334 334 brevid = self.latest(module, self.last_changed)
335 335 if not brevid:
336 336 self.ui.note(_('ignoring empty branch %s\n') %
337 337 branch.encode(self.encoding))
338 338 continue
339 339 self.ui.note(_('found branch %s at %d\n') %
340 340 (branch, self.revnum(brevid)))
341 341 self.heads.append(brevid)
342 342
343 343 if self.startrev and self.heads:
344 344 if len(self.heads) > 1:
345 345 raise util.Abort(_('svn: start revision is not supported '
346 346 'with more than one branch'))
347 347 revnum = self.revnum(self.heads[0])
348 348 if revnum < self.startrev:
349 349 raise util.Abort(_('svn: no revision found after start revision %d')
350 350 % self.startrev)
351 351
352 352 return self.heads
353 353
354 354 def getfile(self, file, rev):
355 355 data, mode = self._getfile(file, rev)
356 356 self.modecache[(file, rev)] = mode
357 357 return data
358 358
359 359 def getmode(self, file, rev):
360 360 return self.modecache[(file, rev)]
361 361
362 362 def getchanges(self, rev):
363 363 if self._changescache and self._changescache[0] == rev:
364 364 return self._changescache[1]
365 365 self._changescache = None
366 366 self.modecache = {}
367 367 (paths, parents) = self.paths[rev]
368 368 if parents:
369 369 files, copies = self.expandpaths(rev, paths, parents)
370 370 else:
371 371 # Perform a full checkout on roots
372 372 uuid, module, revnum = self.revsplit(rev)
373 373 entries = svn.client.ls(self.baseurl + urllib.quote(module),
374 374 optrev(revnum), True, self.ctx)
375 375 files = [n for n,e in entries.iteritems()
376 376 if e.kind == svn.core.svn_node_file]
377 377 copies = {}
378 378
379 379 files.sort()
380 380 files = zip(files, [rev] * len(files))
381 381
382 382 # caller caches the result, so free it here to release memory
383 383 del self.paths[rev]
384 384 return (files, copies)
385 385
386 386 def getchangedfiles(self, rev, i):
387 387 changes = self.getchanges(rev)
388 388 self._changescache = (rev, changes)
389 389 return [f[0] for f in changes[0]]
390 390
391 391 def getcommit(self, rev):
392 392 if rev not in self.commits:
393 393 uuid, module, revnum = self.revsplit(rev)
394 394 self.module = module
395 395 self.reparent(module)
396 396 # We assume that:
397 397 # - requests for revisions after "stop" come from the
398 398 # revision graph backward traversal. Cache all of them
399 399 # down to stop, they will be used eventually.
400 400 # - requests for revisions before "stop" come to get
401 401 # isolated branches parents. Just fetch what is needed.
402 402 stop = self.lastrevs.get(module, 0)
403 403 if revnum < stop:
404 404 stop = revnum + 1
405 405 self._fetch_revisions(revnum, stop)
406 406 commit = self.commits[rev]
407 407 # caller caches the result, so free it here to release memory
408 408 del self.commits[rev]
409 409 return commit
410 410
411 411 def gettags(self):
412 412 tags = {}
413 413 if self.tags is None:
414 414 return tags
415 415
416 416 # svn tags are just a convention, project branches left in a
417 417 # 'tags' directory. There is no other relationship than
418 418 # ancestry, which is expensive to discover and makes them hard
419 419 # to update incrementally. Worse, past revisions may be
420 420 # referenced by tags far away in the future, requiring a deep
421 421 # history traversal on every calculation. Current code
422 422 # performs a single backward traversal, tracking moves within
423 423 # the tags directory (tag renaming) and recording a new tag
424 424 # everytime a project is copied from outside the tags
425 425 # directory. It also lists deleted tags, this behaviour may
426 426 # change in the future.
427 427 pendings = []
428 428 tagspath = self.tags
429 429 start = svn.ra.get_latest_revnum(self.ra)
430 430 try:
431 431 for entry in self._getlog([self.tags], start, self.startrev):
432 432 origpaths, revnum, author, date, message = entry
433 433 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
434 434 in origpaths.iteritems() if e.copyfrom_path]
435 435 # Apply moves/copies from more specific to general
436 436 copies.sort(reverse=True)
437 437
438 438 srctagspath = tagspath
439 439 if copies and copies[-1][2] == tagspath:
440 440 # Track tags directory moves
441 441 srctagspath = copies.pop()[0]
442 442
443 443 for source, sourcerev, dest in copies:
444 444 if not dest.startswith(tagspath + '/'):
445 445 continue
446 446 for tag in pendings:
447 447 if tag[0].startswith(dest):
448 448 tagpath = source + tag[0][len(dest):]
449 449 tag[:2] = [tagpath, sourcerev]
450 450 break
451 451 else:
452 452 pendings.append([source, sourcerev, dest])
453 453
454 454 # Filter out tags with children coming from different
455 455 # parts of the repository like:
456 456 # /tags/tag.1 (from /trunk:10)
457 457 # /tags/tag.1/foo (from /branches/foo:12)
458 458 # Here/tags/tag.1 discarded as well as its children.
459 459 # It happens with tools like cvs2svn. Such tags cannot
460 460 # be represented in mercurial.
461 461 addeds = dict((p, e.copyfrom_path) for p, e
462 462 in origpaths.iteritems() if e.action == 'A')
463 463 badroots = set()
464 464 for destroot in addeds:
465 465 for source, sourcerev, dest in pendings:
466 466 if (not dest.startswith(destroot + '/')
467 467 or source.startswith(addeds[destroot] + '/')):
468 468 continue
469 469 badroots.add(destroot)
470 470 break
471 471
472 472 for badroot in badroots:
473 473 pendings = [p for p in pendings if p[2] != badroot
474 474 and not p[2].startswith(badroot + '/')]
475 475
476 476 # Tell tag renamings from tag creations
477 477 remainings = []
478 478 for source, sourcerev, dest in pendings:
479 479 tagname = dest.split('/')[-1]
480 480 if source.startswith(srctagspath):
481 481 remainings.append([source, sourcerev, tagname])
482 482 continue
483 483 if tagname in tags:
484 484 # Keep the latest tag value
485 485 continue
486 486 # From revision may be fake, get one with changes
487 487 try:
488 488 tagid = self.latest(source, sourcerev)
489 489 if tagid and tagname not in tags:
490 490 tags[tagname] = tagid
491 491 except SvnPathNotFound:
492 492 # It happens when we are following directories
493 493 # we assumed were copied with their parents
494 494 # but were really created in the tag
495 495 # directory.
496 496 pass
497 497 pendings = remainings
498 498 tagspath = srctagspath
499 499
500 500 except SubversionException:
501 501 self.ui.note(_('no tags found at revision %d\n') % start)
502 502 return tags
503 503
504 504 def converted(self, rev, destrev):
505 505 if not self.wc:
506 506 return
507 507 if self.convertfp is None:
508 508 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
509 509 'a')
510 510 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
511 511 self.convertfp.flush()
512 512
513 513 def revid(self, revnum, module=None):
514 514 if not module:
515 515 module = self.module
516 516 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
517 517 revnum)
518 518
519 519 def revnum(self, rev):
520 520 return int(rev.split('@')[-1])
521 521
522 522 def revsplit(self, rev):
523 523 url, revnum = rev.encode(self.encoding).rsplit('@', 1)
524 524 revnum = int(revnum)
525 525 parts = url.split('/', 1)
526 526 uuid = parts.pop(0)[4:]
527 527 mod = ''
528 528 if parts:
529 529 mod = '/' + parts[0]
530 530 return uuid, mod, revnum
531 531
532 532 def latest(self, path, stop=0):
533 533 """Find the latest revid affecting path, up to stop. It may return
534 534 a revision in a different module, since a branch may be moved without
535 535 a change being reported. Return None if computed module does not
536 536 belong to rootmodule subtree.
537 537 """
538 538 if not path.startswith(self.rootmodule):
539 539 # Requests on foreign branches may be forbidden at server level
540 540 self.ui.debug(_('ignoring foreign branch %r\n') % path)
541 541 return None
542 542
543 543 if not stop:
544 544 stop = svn.ra.get_latest_revnum(self.ra)
545 545 try:
546 546 prevmodule = self.reparent('')
547 547 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
548 548 self.reparent(prevmodule)
549 549 except SubversionException:
550 550 dirent = None
551 551 if not dirent:
552 552 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
553 553
554 554 # stat() gives us the previous revision on this line of
555 555 # development, but it might be in *another module*. Fetch the
556 556 # log and detect renames down to the latest revision.
557 557 stream = self._getlog([path], stop, dirent.created_rev)
558 558 try:
559 559 for entry in stream:
560 560 paths, revnum, author, date, message = entry
561 561 if revnum <= dirent.created_rev:
562 562 break
563 563
564 564 for p in paths:
565 565 if not path.startswith(p) or not paths[p].copyfrom_path:
566 566 continue
567 567 newpath = paths[p].copyfrom_path + path[len(p):]
568 568 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
569 569 (path, newpath, revnum))
570 570 path = newpath
571 571 break
572 572 finally:
573 573 stream.close()
574 574
575 575 if not path.startswith(self.rootmodule):
576 576 self.ui.debug(_('ignoring foreign branch %r\n') % path)
577 577 return None
578 578 return self.revid(dirent.created_rev, path)
579 579
580 580 def get_blacklist(self):
581 581 """Avoid certain revision numbers.
582 582 It is not uncommon for two nearby revisions to cancel each other
583 583 out, e.g. 'I copied trunk into a subdirectory of itself instead
584 584 of making a branch'. The converted repository is significantly
585 585 smaller if we ignore such revisions.
586 586 """
587 587 self.blacklist = set()
588 588 blacklist = self.blacklist
589 589 for line in file("blacklist.txt", "r"):
590 590 if not line.startswith("#"):
591 591 try:
592 592 svn_rev = int(line.strip())
593 593 blacklist.add(svn_rev)
594 594 except ValueError:
595 595 pass # not an integer or a comment
596 596
597 597 def is_blacklisted(self, svn_rev):
598 598 return svn_rev in self.blacklist
599 599
600 600 def reparent(self, module):
601 601 """Reparent the svn transport and return the previous parent."""
602 602 if self.prevmodule == module:
603 603 return module
604 604 svnurl = self.baseurl + urllib.quote(module)
605 605 prevmodule = self.prevmodule
606 606 if prevmodule is None:
607 607 prevmodule = ''
608 608 self.ui.debug(_("reparent to %s\n") % svnurl)
609 609 svn.ra.reparent(self.ra, svnurl)
610 610 self.prevmodule = module
611 611 return prevmodule
612 612
613 613 def expandpaths(self, rev, paths, parents):
614 614 entries = []
615 615 # Map of entrypath, revision for finding source of deleted
616 616 # revisions.
617 617 copyfrom = {}
618 618 copies = {}
619 619
620 620 new_module, revnum = self.revsplit(rev)[1:]
621 621 if new_module != self.module:
622 622 self.module = new_module
623 623 self.reparent(self.module)
624 624
625 625 for path, ent in paths:
626 626 entrypath = self.getrelpath(path)
627 627 entry = entrypath.decode(self.encoding)
628 628
629 629 kind = self._checkpath(entrypath, revnum)
630 630 if kind == svn.core.svn_node_file:
631 631 entries.append(self.recode(entry))
632 632 if not ent.copyfrom_path or not parents:
633 633 continue
634 634 # Copy sources not in parent revisions cannot be
635 635 # represented, ignore their origin for now
636 636 pmodule, prevnum = self.revsplit(parents[0])[1:]
637 637 if ent.copyfrom_rev < prevnum:
638 638 continue
639 639 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
640 640 if not copyfrom_path:
641 641 continue
642 642 self.ui.debug(_("copied to %s from %s@%s\n") %
643 643 (entrypath, copyfrom_path, ent.copyfrom_rev))
644 644 copies[self.recode(entry)] = self.recode(copyfrom_path)
645 645 elif kind == 0: # gone, but had better be a deleted *file*
646 646 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
647 647
648 648 # if a branch is created but entries are removed in
649 649 # the same changeset, get the right fromrev
650 650 # parents cannot be empty here, you cannot remove
651 651 # things from a root revision.
652 652 uuid, old_module, fromrev = self.revsplit(parents[0])
653 653
654 654 basepath = old_module + "/" + self.getrelpath(path)
655 655 entrypath = basepath
656 656
657 657 def lookup_parts(p):
658 658 rc = None
659 659 parts = p.split("/")
660 660 for i in range(len(parts)):
661 661 part = "/".join(parts[:i])
662 662 info = part, copyfrom.get(part, None)
663 663 if info[1] is not None:
664 664 self.ui.debug(_("found parent directory %s\n") % info[1])
665 665 rc = info
666 666 return rc
667 667
668 668 self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
669 669
670 670 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
671 671
672 672 # need to remove fragment from lookup_parts and
673 673 # replace with copyfrom_path
674 674 if frompath is not None:
675 675 self.ui.debug(_("munge-o-matic\n"))
676 676 self.ui.debug(entrypath + '\n')
677 677 self.ui.debug(entrypath[len(frompath):] + '\n')
678 678 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
679 679 fromrev = froment.copyfrom_rev
680 680 self.ui.debug(_("info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
681 681
682 682 # We can avoid the reparent calls if the module has
683 683 # not changed but it probably does not worth the pain.
684 684 prevmodule = self.reparent('')
685 685 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
686 686 self.reparent(prevmodule)
687 687
688 688 if fromkind == svn.core.svn_node_file:
689 689 entries.append(self.recode(entry))
690 690 elif fromkind == svn.core.svn_node_dir:
691 691 # Sometimes this is tricky. For example: in The
692 692 # Subversion Repository revision 6940 a dir was
693 693 # copied and one of its files was deleted from the
694 694 # new location in the same commit. This code can't
695 695 # deal with that yet.
696 696 if ent.action == 'C':
697 697 children = self._find_children(path, fromrev)
698 698 else:
699 699 oroot = entrypath.strip('/')
700 700 nroot = path.strip('/')
701 701 children = self._find_children(oroot, fromrev)
702 702 children = [s.replace(oroot,nroot) for s in children]
703 # Mark all [files, not directories] as deleted.
703
704 704 for child in children:
705 # Can we move a child directory and its
706 # parent in the same commit? (probably can). Could
707 # cause problems if instead of revnum -1,
708 # we have to look in (copyfrom_path, revnum - 1)
709 entrypath = self.getrelpath("/" + child, module=old_module)
710 if entrypath:
711 entry = self.recode(entrypath.decode(self.encoding))
712 if entry in copies:
713 # deleted file within a copy
714 del copies[entry]
715 else:
716 entries.append(entry)
705 entrypath = self.getrelpath("/" + child, old_module)
706 if not entrypath:
707 continue
708 if entrypath in copies:
709 del copies[entrypath]
710 entries.append(entrypath)
717 711 else:
718 712 self.ui.debug(_('unknown path in revision %d: %s\n') % \
719 713 (revnum, path))
720 714 elif kind == svn.core.svn_node_dir:
721 715 # If the directory just had a prop change,
722 716 # then we shouldn't need to look for its children.
723 717 if ent.action == 'M':
724 718 continue
725 719
726 720 children = sorted(self._find_children(path, revnum))
727 721 for child in children:
728 722 # Can we move a child directory and its
729 723 # parent in the same commit? (probably can). Could
730 724 # cause problems if instead of revnum -1,
731 725 # we have to look in (copyfrom_path, revnum - 1)
732 726 entrypath = self.getrelpath("/" + child)
733 727 if entrypath:
734 728 # Need to filter out directories here...
735 729 kind = self._checkpath(entrypath, revnum)
736 730 if kind != svn.core.svn_node_dir:
737 731 entries.append(self.recode(entrypath))
738 732
739 733 # Handle directory copies
740 734 if not ent.copyfrom_path or not parents:
741 735 continue
742 736 # Copy sources not in parent revisions cannot be
743 737 # represented, ignore their origin for now
744 738 pmodule, prevnum = self.revsplit(parents[0])[1:]
745 739 if ent.copyfrom_rev < prevnum:
746 740 continue
747 741 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
748 742 if not copyfrompath:
749 743 continue
750 744 copyfrom[path] = ent
751 745 self.ui.debug(_("mark %s came from %s:%d\n")
752 746 % (path, copyfrompath, ent.copyfrom_rev))
753 747 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
754 748 children.sort()
755 749 for child in children:
756 750 entrypath = self.getrelpath("/" + child, pmodule)
757 751 if not entrypath:
758 752 continue
759 753 copytopath = path + entrypath[len(copyfrompath):]
760 754 copytopath = self.getrelpath(copytopath)
761 755 copies[self.recode(copytopath)] = self.recode(entrypath)
762 756
763 757 return (list(set(entries)), copies)
764 758
765 759 def _fetch_revisions(self, from_revnum, to_revnum):
766 760 if from_revnum < to_revnum:
767 761 from_revnum, to_revnum = to_revnum, from_revnum
768 762
769 763 self.child_cset = None
770 764
771 765 def parselogentry(orig_paths, revnum, author, date, message):
772 766 """Return the parsed commit object or None, and True if
773 767 the revision is a branch root.
774 768 """
775 769 self.ui.debug(_("parsing revision %d (%d changes)\n") %
776 770 (revnum, len(orig_paths)))
777 771
778 772 branched = False
779 773 rev = self.revid(revnum)
780 774 # branch log might return entries for a parent we already have
781 775
782 776 if rev in self.commits or revnum < to_revnum:
783 777 return None, branched
784 778
785 779 parents = []
786 780 # check whether this revision is the start of a branch or part
787 781 # of a branch renaming
788 782 orig_paths = sorted(orig_paths.iteritems())
789 783 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
790 784 if root_paths:
791 785 path, ent = root_paths[-1]
792 786 if ent.copyfrom_path:
793 787 branched = True
794 788 newpath = ent.copyfrom_path + self.module[len(path):]
795 789 # ent.copyfrom_rev may not be the actual last revision
796 790 previd = self.latest(newpath, ent.copyfrom_rev)
797 791 if previd is not None:
798 792 prevmodule, prevnum = self.revsplit(previd)[1:]
799 793 if prevnum >= self.startrev:
800 794 parents = [previd]
801 795 self.ui.note(_('found parent of branch %s at %d: %s\n') %
802 796 (self.module, prevnum, prevmodule))
803 797 else:
804 798 self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
805 799
806 800 paths = []
807 801 # filter out unrelated paths
808 802 for path, ent in orig_paths:
809 803 if self.getrelpath(path) is None:
810 804 continue
811 805 paths.append((path, ent))
812 806
813 807 # Example SVN datetime. Includes microseconds.
814 808 # ISO-8601 conformant
815 809 # '2007-01-04T17:35:00.902377Z'
816 810 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
817 811
818 812 log = message and self.recode(message) or ''
819 813 author = author and self.recode(author) or ''
820 814 try:
821 815 branch = self.module.split("/")[-1]
822 816 if branch == 'trunk':
823 817 branch = ''
824 818 except IndexError:
825 819 branch = None
826 820
827 821 cset = commit(author=author,
828 822 date=util.datestr(date),
829 823 desc=log,
830 824 parents=parents,
831 825 branch=branch,
832 826 rev=rev.encode('utf-8'))
833 827
834 828 self.commits[rev] = cset
835 829 # The parents list is *shared* among self.paths and the
836 830 # commit object. Both will be updated below.
837 831 self.paths[rev] = (paths, cset.parents)
838 832 if self.child_cset and not self.child_cset.parents:
839 833 self.child_cset.parents[:] = [rev]
840 834 self.child_cset = cset
841 835 return cset, branched
842 836
843 837 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
844 838 (self.module, from_revnum, to_revnum))
845 839
846 840 try:
847 841 firstcset = None
848 842 lastonbranch = False
849 843 stream = self._getlog([self.module], from_revnum, to_revnum)
850 844 try:
851 845 for entry in stream:
852 846 paths, revnum, author, date, message = entry
853 847 if revnum < self.startrev:
854 848 lastonbranch = True
855 849 break
856 850 if self.is_blacklisted(revnum):
857 851 self.ui.note(_('skipping blacklisted revision %d\n')
858 852 % revnum)
859 853 continue
860 854 if not paths:
861 855 self.ui.debug(_('revision %d has no entries\n') % revnum)
862 856 continue
863 857 cset, lastonbranch = parselogentry(paths, revnum, author,
864 858 date, message)
865 859 if cset:
866 860 firstcset = cset
867 861 if lastonbranch:
868 862 break
869 863 finally:
870 864 stream.close()
871 865
872 866 if not lastonbranch and firstcset and not firstcset.parents:
873 867 # The first revision of the sequence (the last fetched one)
874 868 # has invalid parents if not a branch root. Find the parent
875 869 # revision now, if any.
876 870 try:
877 871 firstrevnum = self.revnum(firstcset.rev)
878 872 if firstrevnum > 1:
879 873 latest = self.latest(self.module, firstrevnum - 1)
880 874 if latest:
881 875 firstcset.parents.append(latest)
882 876 except SvnPathNotFound:
883 877 pass
884 878 except SubversionException, (inst, num):
885 879 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
886 880 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
887 881 raise
888 882
889 883 def _getfile(self, file, rev):
890 884 # TODO: ra.get_file transmits the whole file instead of diffs.
891 885 mode = ''
892 886 try:
893 887 new_module, revnum = self.revsplit(rev)[1:]
894 888 if self.module != new_module:
895 889 self.module = new_module
896 890 self.reparent(self.module)
897 891 io = StringIO()
898 892 info = svn.ra.get_file(self.ra, file, revnum, io)
899 893 data = io.getvalue()
900 894 # ra.get_files() seems to keep a reference on the input buffer
901 895 # preventing collection. Release it explicitely.
902 896 io.close()
903 897 if isinstance(info, list):
904 898 info = info[-1]
905 899 mode = ("svn:executable" in info) and 'x' or ''
906 900 mode = ("svn:special" in info) and 'l' or mode
907 901 except SubversionException, e:
908 902 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
909 903 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
910 904 if e.apr_err in notfound: # File not found
911 905 raise IOError()
912 906 raise
913 907 if mode == 'l':
914 908 link_prefix = "link "
915 909 if data.startswith(link_prefix):
916 910 data = data[len(link_prefix):]
917 911 return data, mode
918 912
919 913 def _find_children(self, path, revnum):
920 914 path = path.strip('/')
921 915 pool = Pool()
922 916 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
923 917 return ['%s/%s' % (path, x) for x in
924 918 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
925 919
926 920 def getrelpath(self, path, module=None):
927 921 if module is None:
928 922 module = self.module
929 923 # Given the repository url of this wc, say
930 924 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
931 925 # extract the "entry" portion (a relative path) from what
932 926 # svn log --xml says, ie
933 927 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
934 928 # that is to say "tests/PloneTestCase.py"
935 929 if path.startswith(module):
936 930 relative = path.rstrip('/')[len(module):]
937 931 if relative.startswith('/'):
938 932 return relative[1:]
939 933 elif relative == '':
940 934 return relative
941 935
942 936 # The path is outside our tracked tree...
943 937 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
944 938 return None
945 939
946 940 def _checkpath(self, path, revnum):
947 941 # ra.check_path does not like leading slashes very much, it leads
948 942 # to PROPFIND subversion errors
949 943 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
950 944
951 945 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
952 946 strict_node_history=False):
953 947 # Normalize path names, svn >= 1.5 only wants paths relative to
954 948 # supplied URL
955 949 relpaths = []
956 950 for p in paths:
957 951 if not p.startswith('/'):
958 952 p = self.module + '/' + p
959 953 relpaths.append(p.strip('/'))
960 954 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
961 955 strict_node_history]
962 956 arg = encodeargs(args)
963 957 hgexe = util.hgexecutable()
964 958 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
965 959 stdin, stdout = util.popen2(cmd)
966 960 stdin.write(arg)
967 961 stdin.close()
968 962 return logstream(stdout)
969 963
970 964 pre_revprop_change = '''#!/bin/sh
971 965
972 966 REPOS="$1"
973 967 REV="$2"
974 968 USER="$3"
975 969 PROPNAME="$4"
976 970 ACTION="$5"
977 971
978 972 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
979 973 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
980 974 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
981 975
982 976 echo "Changing prohibited revision property" >&2
983 977 exit 1
984 978 '''
985 979
986 980 class svn_sink(converter_sink, commandline):
987 981 commit_re = re.compile(r'Committed revision (\d+).', re.M)
988 982
989 983 def prerun(self):
990 984 if self.wc:
991 985 os.chdir(self.wc)
992 986
993 987 def postrun(self):
994 988 if self.wc:
995 989 os.chdir(self.cwd)
996 990
997 991 def join(self, name):
998 992 return os.path.join(self.wc, '.svn', name)
999 993
1000 994 def revmapfile(self):
1001 995 return self.join('hg-shamap')
1002 996
1003 997 def authorfile(self):
1004 998 return self.join('hg-authormap')
1005 999
1006 1000 def __init__(self, ui, path):
1007 1001 converter_sink.__init__(self, ui, path)
1008 1002 commandline.__init__(self, ui, 'svn')
1009 1003 self.delete = []
1010 1004 self.setexec = []
1011 1005 self.delexec = []
1012 1006 self.copies = []
1013 1007 self.wc = None
1014 1008 self.cwd = os.getcwd()
1015 1009
1016 1010 path = os.path.realpath(path)
1017 1011
1018 1012 created = False
1019 1013 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
1020 1014 self.wc = path
1021 1015 self.run0('update')
1022 1016 else:
1023 1017 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
1024 1018
1025 1019 if os.path.isdir(os.path.dirname(path)):
1026 1020 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
1027 1021 ui.status(_('initializing svn repo %r\n') %
1028 1022 os.path.basename(path))
1029 1023 commandline(ui, 'svnadmin').run0('create', path)
1030 1024 created = path
1031 1025 path = util.normpath(path)
1032 1026 if not path.startswith('/'):
1033 1027 path = '/' + path
1034 1028 path = 'file://' + path
1035 1029
1036 1030 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
1037 1031 self.run0('checkout', path, wcpath)
1038 1032
1039 1033 self.wc = wcpath
1040 1034 self.opener = util.opener(self.wc)
1041 1035 self.wopener = util.opener(self.wc)
1042 1036 self.childmap = mapfile(ui, self.join('hg-childmap'))
1043 1037 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1044 1038
1045 1039 if created:
1046 1040 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1047 1041 fp = open(hook, 'w')
1048 1042 fp.write(pre_revprop_change)
1049 1043 fp.close()
1050 1044 util.set_flags(hook, False, True)
1051 1045
1052 1046 xport = transport.SvnRaTransport(url=geturl(path))
1053 1047 self.uuid = svn.ra.get_uuid(xport.ra)
1054 1048
1055 1049 def wjoin(self, *names):
1056 1050 return os.path.join(self.wc, *names)
1057 1051
1058 1052 def putfile(self, filename, flags, data):
1059 1053 if 'l' in flags:
1060 1054 self.wopener.symlink(data, filename)
1061 1055 else:
1062 1056 try:
1063 1057 if os.path.islink(self.wjoin(filename)):
1064 1058 os.unlink(filename)
1065 1059 except OSError:
1066 1060 pass
1067 1061 self.wopener(filename, 'w').write(data)
1068 1062
1069 1063 if self.is_exec:
1070 1064 was_exec = self.is_exec(self.wjoin(filename))
1071 1065 else:
1072 1066 # On filesystems not supporting execute-bit, there is no way
1073 1067 # to know if it is set but asking subversion. Setting it
1074 1068 # systematically is just as expensive and much simpler.
1075 1069 was_exec = 'x' not in flags
1076 1070
1077 1071 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1078 1072 if was_exec:
1079 1073 if 'x' not in flags:
1080 1074 self.delexec.append(filename)
1081 1075 else:
1082 1076 if 'x' in flags:
1083 1077 self.setexec.append(filename)
1084 1078
1085 1079 def _copyfile(self, source, dest):
1086 1080 # SVN's copy command pukes if the destination file exists, but
1087 1081 # our copyfile method expects to record a copy that has
1088 1082 # already occurred. Cross the semantic gap.
1089 1083 wdest = self.wjoin(dest)
1090 1084 exists = os.path.exists(wdest)
1091 1085 if exists:
1092 1086 fd, tempname = tempfile.mkstemp(
1093 1087 prefix='hg-copy-', dir=os.path.dirname(wdest))
1094 1088 os.close(fd)
1095 1089 os.unlink(tempname)
1096 1090 os.rename(wdest, tempname)
1097 1091 try:
1098 1092 self.run0('copy', source, dest)
1099 1093 finally:
1100 1094 if exists:
1101 1095 try:
1102 1096 os.unlink(wdest)
1103 1097 except OSError:
1104 1098 pass
1105 1099 os.rename(tempname, wdest)
1106 1100
1107 1101 def dirs_of(self, files):
1108 1102 dirs = set()
1109 1103 for f in files:
1110 1104 if os.path.isdir(self.wjoin(f)):
1111 1105 dirs.add(f)
1112 1106 for i in strutil.rfindall(f, '/'):
1113 1107 dirs.add(f[:i])
1114 1108 return dirs
1115 1109
1116 1110 def add_dirs(self, files):
1117 1111 add_dirs = [d for d in sorted(self.dirs_of(files))
1118 1112 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1119 1113 if add_dirs:
1120 1114 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1121 1115 return add_dirs
1122 1116
1123 1117 def add_files(self, files):
1124 1118 if files:
1125 1119 self.xargs(files, 'add', quiet=True)
1126 1120 return files
1127 1121
1128 1122 def tidy_dirs(self, names):
1129 1123 deleted = []
1130 1124 for d in sorted(self.dirs_of(names), reverse=True):
1131 1125 wd = self.wjoin(d)
1132 1126 if os.listdir(wd) == '.svn':
1133 1127 self.run0('delete', d)
1134 1128 deleted.append(d)
1135 1129 return deleted
1136 1130
1137 1131 def addchild(self, parent, child):
1138 1132 self.childmap[parent] = child
1139 1133
1140 1134 def revid(self, rev):
1141 1135 return u"svn:%s@%s" % (self.uuid, rev)
1142 1136
1143 1137 def putcommit(self, files, copies, parents, commit, source, revmap):
1144 1138 # Apply changes to working copy
1145 1139 for f, v in files:
1146 1140 try:
1147 1141 data = source.getfile(f, v)
1148 1142 except IOError:
1149 1143 self.delete.append(f)
1150 1144 else:
1151 1145 e = source.getmode(f, v)
1152 1146 self.putfile(f, e, data)
1153 1147 if f in copies:
1154 1148 self.copies.append([copies[f], f])
1155 1149 files = [f[0] for f in files]
1156 1150
1157 1151 for parent in parents:
1158 1152 try:
1159 1153 return self.revid(self.childmap[parent])
1160 1154 except KeyError:
1161 1155 pass
1162 1156 entries = set(self.delete)
1163 1157 files = frozenset(files)
1164 1158 entries.update(self.add_dirs(files.difference(entries)))
1165 1159 if self.copies:
1166 1160 for s, d in self.copies:
1167 1161 self._copyfile(s, d)
1168 1162 self.copies = []
1169 1163 if self.delete:
1170 1164 self.xargs(self.delete, 'delete')
1171 1165 self.delete = []
1172 1166 entries.update(self.add_files(files.difference(entries)))
1173 1167 entries.update(self.tidy_dirs(entries))
1174 1168 if self.delexec:
1175 1169 self.xargs(self.delexec, 'propdel', 'svn:executable')
1176 1170 self.delexec = []
1177 1171 if self.setexec:
1178 1172 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1179 1173 self.setexec = []
1180 1174
1181 1175 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1182 1176 fp = os.fdopen(fd, 'w')
1183 1177 fp.write(commit.desc)
1184 1178 fp.close()
1185 1179 try:
1186 1180 output = self.run0('commit',
1187 1181 username=util.shortuser(commit.author),
1188 1182 file=messagefile,
1189 1183 encoding='utf-8')
1190 1184 try:
1191 1185 rev = self.commit_re.search(output).group(1)
1192 1186 except AttributeError:
1193 1187 self.ui.warn(_('unexpected svn output:\n'))
1194 1188 self.ui.warn(output)
1195 1189 raise util.Abort(_('unable to cope with svn output'))
1196 1190 if commit.rev:
1197 1191 self.run('propset', 'hg:convert-rev', commit.rev,
1198 1192 revprop=True, revision=rev)
1199 1193 if commit.branch and commit.branch != 'default':
1200 1194 self.run('propset', 'hg:convert-branch', commit.branch,
1201 1195 revprop=True, revision=rev)
1202 1196 for parent in parents:
1203 1197 self.addchild(parent, rev)
1204 1198 return self.revid(rev)
1205 1199 finally:
1206 1200 os.unlink(messagefile)
1207 1201
1208 1202 def puttags(self, tags):
1209 1203 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now