##// END OF EJS Templates
convert: more cleanup in svn directory copy handling
Patrick Mezard -
r6542:e7810e61 default
parent child Browse files
Show More
@@ -1,1114 +1,1116
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98 # With large history, cleanup process goes crazy and suddenly
99 99 # consumes *huge* amount of memory. The output file being closed,
100 100 # there is no need for clean termination.
101 101 os._exit(0)
102 102
103 103 def debugsvnlog(ui, **opts):
104 104 """Fetch SVN log in a subprocess and channel them back to parent to
105 105 avoid memory collection issues.
106 106 """
107 107 util.set_binary(sys.stdin)
108 108 util.set_binary(sys.stdout)
109 109 args = decodeargs(sys.stdin.read())
110 110 get_log_child(sys.stdout, *args)
111 111
112 112 class logstream:
113 113 """Interruptible revision log iterator."""
114 114 def __init__(self, stdout):
115 115 self._stdout = stdout
116 116
117 117 def __iter__(self):
118 118 while True:
119 119 entry = pickle.load(self._stdout)
120 120 try:
121 121 orig_paths, revnum, author, date, message = entry
122 122 except:
123 123 if entry is None:
124 124 break
125 125 raise SubversionException("child raised exception", entry)
126 126 yield entry
127 127
128 128 def close(self):
129 129 if self._stdout:
130 130 self._stdout.close()
131 131 self._stdout = None
132 132
133 133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
134 134 strict_node_history=False):
135 135 args = [url, paths, start, end, limit, discover_changed_paths,
136 136 strict_node_history]
137 137 arg = encodeargs(args)
138 138 hgexe = util.hgexecutable()
139 139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
140 140 stdin, stdout = os.popen2(cmd, 'b')
141 141 stdin.write(arg)
142 142 stdin.close()
143 143 return logstream(stdout)
144 144
145 145 # SVN conversion code stolen from bzr-svn and tailor
146 146 #
147 147 # Subversion looks like a versioned filesystem, branches structures
148 148 # are defined by conventions and not enforced by the tool. First,
149 149 # we define the potential branches (modules) as "trunk" and "branches"
150 150 # children directories. Revisions are then identified by their
151 151 # module and revision number (and a repository identifier).
152 152 #
153 153 # The revision graph is really a tree (or a forest). By default, a
154 154 # revision parent is the previous revision in the same module. If the
155 155 # module directory is copied/moved from another module then the
156 156 # revision is the module root and its parent the source revision in
157 157 # the parent module. A revision has at most one parent.
158 158 #
159 159 class svn_source(converter_source):
160 160 def __init__(self, ui, url, rev=None):
161 161 super(svn_source, self).__init__(ui, url, rev=rev)
162 162
163 163 try:
164 164 SubversionException
165 165 except NameError:
166 166 raise NoRepo('Subversion python bindings could not be loaded')
167 167
168 168 self.encoding = locale.getpreferredencoding()
169 169 self.lastrevs = {}
170 170
171 171 latest = None
172 172 try:
173 173 # Support file://path@rev syntax. Useful e.g. to convert
174 174 # deleted branches.
175 175 at = url.rfind('@')
176 176 if at >= 0:
177 177 latest = int(url[at+1:])
178 178 url = url[:at]
179 179 except ValueError, e:
180 180 pass
181 181 self.url = geturl(url)
182 182 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
183 183 try:
184 184 self.transport = transport.SvnRaTransport(url=self.url)
185 185 self.ra = self.transport.ra
186 186 self.ctx = self.transport.client
187 187 self.base = svn.ra.get_repos_root(self.ra)
188 188 # Module is either empty or a repository path starting with
189 189 # a slash and not ending with a slash.
190 190 self.module = self.url[len(self.base):]
191 191 self.rootmodule = self.module
192 192 self.commits = {}
193 193 self.paths = {}
194 194 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
195 195 except SubversionException, e:
196 196 ui.print_exc()
197 197 raise NoRepo("%s does not look like a Subversion repo" % self.url)
198 198
199 199 if rev:
200 200 try:
201 201 latest = int(rev)
202 202 except ValueError:
203 203 raise util.Abort('svn: revision %s is not an integer' % rev)
204 204
205 205 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
206 206 try:
207 207 self.startrev = int(self.startrev)
208 208 if self.startrev < 0:
209 209 self.startrev = 0
210 210 except ValueError:
211 211 raise util.Abort(_('svn: start revision %s is not an integer')
212 212 % self.startrev)
213 213
214 214 try:
215 215 self.get_blacklist()
216 216 except IOError, e:
217 217 pass
218 218
219 219 self.head = self.latest(self.module, latest)
220 220 if not self.head:
221 221 raise util.Abort(_('no revision found in module %s') %
222 222 self.module.encode(self.encoding))
223 223 self.last_changed = self.revnum(self.head)
224 224
225 225 self._changescache = None
226 226
227 227 if os.path.exists(os.path.join(url, '.svn/entries')):
228 228 self.wc = url
229 229 else:
230 230 self.wc = None
231 231 self.convertfp = None
232 232
233 233 def setrevmap(self, revmap):
234 234 lastrevs = {}
235 235 for revid in revmap.iterkeys():
236 236 uuid, module, revnum = self.revsplit(revid)
237 237 lastrevnum = lastrevs.setdefault(module, revnum)
238 238 if revnum > lastrevnum:
239 239 lastrevs[module] = revnum
240 240 self.lastrevs = lastrevs
241 241
242 242 def exists(self, path, optrev):
243 243 try:
244 244 svn.client.ls(self.url.rstrip('/') + '/' + path,
245 245 optrev, False, self.ctx)
246 246 return True
247 247 except SubversionException, err:
248 248 return False
249 249
250 250 def getheads(self):
251 251
252 252 def isdir(path, revnum):
253 253 kind = svn.ra.check_path(self.ra, path, revnum)
254 254 return kind == svn.core.svn_node_dir
255 255
256 256 def getcfgpath(name, rev):
257 257 cfgpath = self.ui.config('convert', 'svn.' + name)
258 258 if cfgpath is not None and cfgpath.strip() == '':
259 259 return None
260 260 path = (cfgpath or name).strip('/')
261 261 if not self.exists(path, rev):
262 262 if cfgpath:
263 263 raise util.Abort(_('expected %s to be at %r, but not found')
264 264 % (name, path))
265 265 return None
266 266 self.ui.note(_('found %s at %r\n') % (name, path))
267 267 return path
268 268
269 269 rev = optrev(self.last_changed)
270 270 oldmodule = ''
271 271 trunk = getcfgpath('trunk', rev)
272 272 self.tags = getcfgpath('tags', rev)
273 273 branches = getcfgpath('branches', rev)
274 274
275 275 # If the project has a trunk or branches, we will extract heads
276 276 # from them. We keep the project root otherwise.
277 277 if trunk:
278 278 oldmodule = self.module or ''
279 279 self.module += '/' + trunk
280 280 self.head = self.latest(self.module, self.last_changed)
281 281 if not self.head:
282 282 raise util.Abort(_('no revision found in module %s') %
283 283 self.module.encode(self.encoding))
284 284
285 285 # First head in the list is the module's head
286 286 self.heads = [self.head]
287 287 if self.tags is not None:
288 288 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
289 289
290 290 # Check if branches bring a few more heads to the list
291 291 if branches:
292 292 rpath = self.url.strip('/')
293 293 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
294 294 self.ctx)
295 295 for branch in branchnames.keys():
296 296 module = '%s/%s/%s' % (oldmodule, branches, branch)
297 297 if not isdir(module, self.last_changed):
298 298 continue
299 299 brevid = self.latest(module, self.last_changed)
300 300 if not brevid:
301 301 self.ui.note(_('ignoring empty branch %s\n') %
302 302 branch.encode(self.encoding))
303 303 continue
304 304 self.ui.note('found branch %s at %d\n' %
305 305 (branch, self.revnum(brevid)))
306 306 self.heads.append(brevid)
307 307
308 308 if self.startrev and self.heads:
309 309 if len(self.heads) > 1:
310 310 raise util.Abort(_('svn: start revision is not supported with '
311 311 'with more than one branch'))
312 312 revnum = self.revnum(self.heads[0])
313 313 if revnum < self.startrev:
314 314 raise util.Abort(_('svn: no revision found after start revision %d')
315 315 % self.startrev)
316 316
317 317 return self.heads
318 318
319 319 def getfile(self, file, rev):
320 320 data, mode = self._getfile(file, rev)
321 321 self.modecache[(file, rev)] = mode
322 322 return data
323 323
324 324 def getmode(self, file, rev):
325 325 return self.modecache[(file, rev)]
326 326
327 327 def getchanges(self, rev):
328 328 if self._changescache and self._changescache[0] == rev:
329 329 return self._changescache[1]
330 330 self._changescache = None
331 331 self.modecache = {}
332 332 (paths, parents) = self.paths[rev]
333 333 if parents:
334 334 files, copies = self.expandpaths(rev, paths, parents)
335 335 else:
336 336 # Perform a full checkout on roots
337 337 uuid, module, revnum = self.revsplit(rev)
338 338 entries = svn.client.ls(self.base + module, optrev(revnum),
339 339 True, self.ctx)
340 340 files = [n for n,e in entries.iteritems()
341 341 if e.kind == svn.core.svn_node_file]
342 342 copies = {}
343 343
344 344 files.sort()
345 345 files = zip(files, [rev] * len(files))
346 346
347 347 # caller caches the result, so free it here to release memory
348 348 del self.paths[rev]
349 349 return (files, copies)
350 350
351 351 def getchangedfiles(self, rev, i):
352 352 changes = self.getchanges(rev)
353 353 self._changescache = (rev, changes)
354 354 return [f[0] for f in changes[0]]
355 355
356 356 def getcommit(self, rev):
357 357 if rev not in self.commits:
358 358 uuid, module, revnum = self.revsplit(rev)
359 359 self.module = module
360 360 self.reparent(module)
361 361 # We assume that:
362 362 # - requests for revisions after "stop" come from the
363 363 # revision graph backward traversal. Cache all of them
364 364 # down to stop, they will be used eventually.
365 365 # - requests for revisions before "stop" come to get
366 366 # isolated branches parents. Just fetch what is needed.
367 367 stop = self.lastrevs.get(module, 0)
368 368 if revnum < stop:
369 369 stop = revnum + 1
370 370 self._fetch_revisions(revnum, stop)
371 371 commit = self.commits[rev]
372 372 # caller caches the result, so free it here to release memory
373 373 del self.commits[rev]
374 374 return commit
375 375
376 376 def gettags(self):
377 377 tags = {}
378 378 if self.tags is None:
379 379 return tags
380 380
381 381 # svn tags are just a convention, project branches left in a
382 382 # 'tags' directory. There is no other relationship than
383 383 # ancestry, which is expensive to discover and makes them hard
384 384 # to update incrementally. Worse, past revisions may be
385 385 # referenced by tags far away in the future, requiring a deep
386 386 # history traversal on every calculation. Current code
387 387 # performs a single backward traversal, tracking moves within
388 388 # the tags directory (tag renaming) and recording a new tag
389 389 # everytime a project is copied from outside the tags
390 390 # directory. It also lists deleted tags, this behaviour may
391 391 # change in the future.
392 392 pendings = []
393 393 tagspath = self.tags
394 394 start = svn.ra.get_latest_revnum(self.ra)
395 395 try:
396 396 for entry in get_log(self.url, [self.tags], start, self.startrev):
397 397 origpaths, revnum, author, date, message = entry
398 398 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
399 399 in origpaths.iteritems() if e.copyfrom_path]
400 400 copies.sort()
401 401 # Apply moves/copies from more specific to general
402 402 copies.reverse()
403 403
404 404 srctagspath = tagspath
405 405 if copies and copies[-1][2] == tagspath:
406 406 # Track tags directory moves
407 407 srctagspath = copies.pop()[0]
408 408
409 409 for source, sourcerev, dest in copies:
410 410 if not dest.startswith(tagspath + '/'):
411 411 continue
412 412 for tag in pendings:
413 413 if tag[0].startswith(dest):
414 414 tagpath = source + tag[0][len(dest):]
415 415 tag[:2] = [tagpath, sourcerev]
416 416 break
417 417 else:
418 418 pendings.append([source, sourcerev, dest.split('/')[-1]])
419 419
420 420 # Tell tag renamings from tag creations
421 421 remainings = []
422 422 for source, sourcerev, tagname in pendings:
423 423 if source.startswith(srctagspath):
424 424 remainings.append([source, sourcerev, tagname])
425 425 continue
426 426 # From revision may be fake, get one with changes
427 427 tagid = self.latest(source, sourcerev)
428 428 if tagid:
429 429 tags[tagname] = tagid
430 430 pendings = remainings
431 431 tagspath = srctagspath
432 432
433 433 except SubversionException, (inst, num):
434 434 self.ui.note('no tags found at revision %d\n' % start)
435 435 return tags
436 436
437 437 def converted(self, rev, destrev):
438 438 if not self.wc:
439 439 return
440 440 if self.convertfp is None:
441 441 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
442 442 'a')
443 443 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
444 444 self.convertfp.flush()
445 445
446 446 # -- helper functions --
447 447
448 448 def revid(self, revnum, module=None):
449 449 if not module:
450 450 module = self.module
451 451 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
452 452 revnum)
453 453
454 454 def revnum(self, rev):
455 455 return int(rev.split('@')[-1])
456 456
457 457 def revsplit(self, rev):
458 458 url, revnum = rev.encode(self.encoding).split('@', 1)
459 459 revnum = int(revnum)
460 460 parts = url.split('/', 1)
461 461 uuid = parts.pop(0)[4:]
462 462 mod = ''
463 463 if parts:
464 464 mod = '/' + parts[0]
465 465 return uuid, mod, revnum
466 466
467 467 def latest(self, path, stop=0):
468 468 """Find the latest revid affecting path, up to stop. It may return
469 469 a revision in a different module, since a branch may be moved without
470 470 a change being reported. Return None if computed module does not
471 471 belong to rootmodule subtree.
472 472 """
473 473 if not path.startswith(self.rootmodule):
474 474 # Requests on foreign branches may be forbidden at server level
475 475 self.ui.debug(_('ignoring foreign branch %r\n') % path)
476 476 return None
477 477
478 478 if not stop:
479 479 stop = svn.ra.get_latest_revnum(self.ra)
480 480 try:
481 481 self.reparent('')
482 482 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
483 483 self.reparent(self.module)
484 484 except SubversionException:
485 485 dirent = None
486 486 if not dirent:
487 487 raise util.Abort('%s not found up to revision %d' % (path, stop))
488 488
489 489 # stat() gives us the previous revision on this line of development, but
490 490 # it might be in *another module*. Fetch the log and detect renames down
491 491 # to the latest revision.
492 492 stream = get_log(self.url, [path], stop, dirent.created_rev)
493 493 try:
494 494 for entry in stream:
495 495 paths, revnum, author, date, message = entry
496 496 if revnum <= dirent.created_rev:
497 497 break
498 498
499 499 for p in paths:
500 500 if not path.startswith(p) or not paths[p].copyfrom_path:
501 501 continue
502 502 newpath = paths[p].copyfrom_path + path[len(p):]
503 503 self.ui.debug("branch renamed from %s to %s at %d\n" %
504 504 (path, newpath, revnum))
505 505 path = newpath
506 506 break
507 507 finally:
508 508 stream.close()
509 509
510 510 if not path.startswith(self.rootmodule):
511 511 self.ui.debug(_('ignoring foreign branch %r\n') % path)
512 512 return None
513 513 return self.revid(dirent.created_rev, path)
514 514
515 515 def get_blacklist(self):
516 516 """Avoid certain revision numbers.
517 517 It is not uncommon for two nearby revisions to cancel each other
518 518 out, e.g. 'I copied trunk into a subdirectory of itself instead
519 519 of making a branch'. The converted repository is significantly
520 520 smaller if we ignore such revisions."""
521 521 self.blacklist = util.set()
522 522 blacklist = self.blacklist
523 523 for line in file("blacklist.txt", "r"):
524 524 if not line.startswith("#"):
525 525 try:
526 526 svn_rev = int(line.strip())
527 527 blacklist.add(svn_rev)
528 528 except ValueError, e:
529 529 pass # not an integer or a comment
530 530
531 531 def is_blacklisted(self, svn_rev):
532 532 return svn_rev in self.blacklist
533 533
534 534 def reparent(self, module):
535 535 svn_url = self.base + module
536 536 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
537 537 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
538 538
539 539 def expandpaths(self, rev, paths, parents):
540 540 entries = []
541 541 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
542 542 copies = {}
543 543
544 544 new_module, revnum = self.revsplit(rev)[1:]
545 545 if new_module != self.module:
546 546 self.module = new_module
547 547 self.reparent(self.module)
548 548
549 549 for path, ent in paths:
550 550 entrypath = self.getrelpath(path)
551 551 entry = entrypath.decode(self.encoding)
552 552
553 553 kind = svn.ra.check_path(self.ra, entrypath, revnum)
554 554 if kind == svn.core.svn_node_file:
555 555 if ent.copyfrom_path:
556 556 copyfrom_path = self.getrelpath(ent.copyfrom_path)
557 557 if copyfrom_path:
558 558 self.ui.debug("Copied to %s from %s@%s\n" %
559 559 (entrypath, copyfrom_path,
560 560 ent.copyfrom_rev))
561 561 # It's probably important for hg that the source
562 562 # exists in the revision's parent, not just the
563 563 # ent.copyfrom_rev
564 564 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
565 565 if fromkind != 0:
566 566 copies[self.recode(entry)] = self.recode(copyfrom_path)
567 567 entries.append(self.recode(entry))
568 568 elif kind == 0: # gone, but had better be a deleted *file*
569 569 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
570 570
571 571 # if a branch is created but entries are removed in the same
572 572 # changeset, get the right fromrev
573 573 # parents cannot be empty here, you cannot remove things from
574 574 # a root revision.
575 575 uuid, old_module, fromrev = self.revsplit(parents[0])
576 576
577 577 basepath = old_module + "/" + self.getrelpath(path)
578 578 entrypath = basepath
579 579
580 580 def lookup_parts(p):
581 581 rc = None
582 582 parts = p.split("/")
583 583 for i in range(len(parts)):
584 584 part = "/".join(parts[:i])
585 585 info = part, copyfrom.get(part, None)
586 586 if info[1] is not None:
587 587 self.ui.debug("Found parent directory %s\n" % info[1])
588 588 rc = info
589 589 return rc
590 590
591 591 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
592 592
593 593 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
594 594
595 595 # need to remove fragment from lookup_parts and replace with copyfrom_path
596 596 if frompath is not None:
597 597 self.ui.debug("munge-o-matic\n")
598 598 self.ui.debug(entrypath + '\n')
599 599 self.ui.debug(entrypath[len(frompath):] + '\n')
600 600 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
601 601 fromrev = froment.copyfrom_rev
602 602 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
603 603
604 604 # We can avoid the reparent calls if the module has not changed
605 605 # but it probably does not worth the pain.
606 606 self.reparent('')
607 607 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
608 608 self.reparent(self.module)
609 609
610 610 if fromkind == svn.core.svn_node_file: # a deleted file
611 611 entries.append(self.recode(entry))
612 612 elif fromkind == svn.core.svn_node_dir:
613 613 # print "Deleted/moved non-file:", revnum, path, ent
614 614 # children = self._find_children(path, revnum - 1)
615 615 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
616 616 # Sometimes this is tricky. For example: in
617 617 # The Subversion Repository revision 6940 a dir
618 618 # was copied and one of its files was deleted
619 619 # from the new location in the same commit. This
620 620 # code can't deal with that yet.
621 621 if ent.action == 'C':
622 622 children = self._find_children(path, fromrev)
623 623 else:
624 624 oroot = entrypath.strip('/')
625 625 nroot = path.strip('/')
626 626 children = self._find_children(oroot, fromrev)
627 627 children = [s.replace(oroot,nroot) for s in children]
628 628 # Mark all [files, not directories] as deleted.
629 629 for child in children:
630 630 # Can we move a child directory and its
631 631 # parent in the same commit? (probably can). Could
632 632 # cause problems if instead of revnum -1,
633 633 # we have to look in (copyfrom_path, revnum - 1)
634 634 entrypath = self.getrelpath("/" + child, module=old_module)
635 635 if entrypath:
636 636 entry = self.recode(entrypath.decode(self.encoding))
637 637 if entry in copies:
638 638 # deleted file within a copy
639 639 del copies[entry]
640 640 else:
641 641 entries.append(entry)
642 642 else:
643 643 self.ui.debug('unknown path in revision %d: %s\n' % \
644 644 (revnum, path))
645 645 elif kind == svn.core.svn_node_dir:
646 646 # Should probably synthesize normal file entries
647 647 # and handle as above to clean up copy/rename handling.
648 648
649 649 # If the directory just had a prop change,
650 650 # then we shouldn't need to look for its children.
651 651 if ent.action == 'M':
652 652 continue
653 653
654 654 # Also this could create duplicate entries. Not sure
655 655 # whether this will matter. Maybe should make entries a set.
656 656 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
657 657 # This will fail if a directory was copied
658 658 # from another branch and then some of its files
659 659 # were deleted in the same transaction.
660 660 children = self._find_children(path, revnum)
661 661 children.sort()
662 662 for child in children:
663 663 # Can we move a child directory and its
664 664 # parent in the same commit? (probably can). Could
665 665 # cause problems if instead of revnum -1,
666 666 # we have to look in (copyfrom_path, revnum - 1)
667 667 entrypath = self.getrelpath("/" + child)
668 668 # print child, self.module, entrypath
669 669 if entrypath:
670 670 # Need to filter out directories here...
671 671 kind = svn.ra.check_path(self.ra, entrypath, revnum)
672 672 if kind != svn.core.svn_node_dir:
673 673 entries.append(self.recode(entrypath))
674 674
675 675 # Copies here (must copy all from source)
676 676 # Probably not a real problem for us if
677 677 # source does not exist
678 if ent.copyfrom_path:
678 if not ent.copyfrom_path:
679 continue
679 680 copyfrompath = self.getrelpath(ent.copyfrom_path.decode(self.encoding))
680 if copyfrompath:
681 if not copyfrompath:
682 continue
681 683 copyfrom[path] = ent
682 684 self.ui.debug("mark %s came from %s:%d\n"
683 685 % (path, copyfrompath, ent.copyfrom_rev))
684 686
685 687 # Good, /probably/ a regular copy. Really should check
686 688 # to see whether the parent revision actually contains
687 689 # the directory in question.
688 690 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
689 691 children.sort()
690 692 for child in children:
691 693 entrypath = self.getrelpath("/" + child)
692 694 if not entrypath:
693 695 continue
694 696 entry = entrypath.decode(self.encoding)
695 697 copytopath = path + entry[len(copyfrompath):]
696 698 copytopath = self.getrelpath(copytopath)
697 699 copies[self.recode(copytopath)] = self.recode(entry)
698 700
699 701 return (util.unique(entries), copies)
700 702
701 703 def _fetch_revisions(self, from_revnum, to_revnum):
702 704 if from_revnum < to_revnum:
703 705 from_revnum, to_revnum = to_revnum, from_revnum
704 706
705 707 self.child_cset = None
706 708 def parselogentry(orig_paths, revnum, author, date, message):
707 709 """Return the parsed commit object or None, and True if
708 710 the revision is a branch root.
709 711 """
710 712 self.ui.debug("parsing revision %d (%d changes)\n" %
711 713 (revnum, len(orig_paths)))
712 714
713 715 branched = False
714 716 rev = self.revid(revnum)
715 717 # branch log might return entries for a parent we already have
716 718
717 719 if (rev in self.commits or revnum < to_revnum):
718 720 return None, branched
719 721
720 722 parents = []
721 723 # check whether this revision is the start of a branch or part
722 724 # of a branch renaming
723 725 orig_paths = orig_paths.items()
724 726 orig_paths.sort()
725 727 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
726 728 if root_paths:
727 729 path, ent = root_paths[-1]
728 730 if ent.copyfrom_path:
729 731 branched = True
730 732 newpath = ent.copyfrom_path + self.module[len(path):]
731 733 # ent.copyfrom_rev may not be the actual last revision
732 734 previd = self.latest(newpath, ent.copyfrom_rev)
733 735 if previd is not None:
734 736 prevmodule, prevnum = self.revsplit(previd)[1:]
735 737 if prevnum >= self.startrev:
736 738 parents = [previd]
737 739 self.ui.note('found parent of branch %s at %d: %s\n' %
738 740 (self.module, prevnum, prevmodule))
739 741 else:
740 742 self.ui.debug("No copyfrom path, don't know what to do.\n")
741 743
742 744 paths = []
743 745 # filter out unrelated paths
744 746 for path, ent in orig_paths:
745 747 if self.getrelpath(path) is None:
746 748 continue
747 749 paths.append((path, ent))
748 750
749 751 # Example SVN datetime. Includes microseconds.
750 752 # ISO-8601 conformant
751 753 # '2007-01-04T17:35:00.902377Z'
752 754 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
753 755
754 756 log = message and self.recode(message) or ''
755 757 author = author and self.recode(author) or ''
756 758 try:
757 759 branch = self.module.split("/")[-1]
758 760 if branch == 'trunk':
759 761 branch = ''
760 762 except IndexError:
761 763 branch = None
762 764
763 765 cset = commit(author=author,
764 766 date=util.datestr(date),
765 767 desc=log,
766 768 parents=parents,
767 769 branch=branch,
768 770 rev=rev.encode('utf-8'))
769 771
770 772 self.commits[rev] = cset
771 773 # The parents list is *shared* among self.paths and the
772 774 # commit object. Both will be updated below.
773 775 self.paths[rev] = (paths, cset.parents)
774 776 if self.child_cset and not self.child_cset.parents:
775 777 self.child_cset.parents[:] = [rev]
776 778 self.child_cset = cset
777 779 return cset, branched
778 780
779 781 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
780 782 (self.module, from_revnum, to_revnum))
781 783
782 784 try:
783 785 firstcset = None
784 786 lastonbranch = False
785 787 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
786 788 try:
787 789 for entry in stream:
788 790 paths, revnum, author, date, message = entry
789 791 if revnum < self.startrev:
790 792 lastonbranch = True
791 793 break
792 794 if self.is_blacklisted(revnum):
793 795 self.ui.note('skipping blacklisted revision %d\n'
794 796 % revnum)
795 797 continue
796 798 if paths is None:
797 799 self.ui.debug('revision %d has no entries\n' % revnum)
798 800 continue
799 801 cset, lastonbranch = parselogentry(paths, revnum, author,
800 802 date, message)
801 803 if cset:
802 804 firstcset = cset
803 805 if lastonbranch:
804 806 break
805 807 finally:
806 808 stream.close()
807 809
808 810 if not lastonbranch and firstcset and not firstcset.parents:
809 811 # The first revision of the sequence (the last fetched one)
810 812 # has invalid parents if not a branch root. Find the parent
811 813 # revision now, if any.
812 814 try:
813 815 firstrevnum = self.revnum(firstcset.rev)
814 816 if firstrevnum > 1:
815 817 latest = self.latest(self.module, firstrevnum - 1)
816 818 if latest:
817 819 firstcset.parents.append(latest)
818 820 except util.Abort:
819 821 pass
820 822 except SubversionException, (inst, num):
821 823 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
822 824 raise util.Abort('svn: branch has no revision %s' % to_revnum)
823 825 raise
824 826
825 827 def _getfile(self, file, rev):
826 828 io = StringIO()
827 829 # TODO: ra.get_file transmits the whole file instead of diffs.
828 830 mode = ''
829 831 try:
830 832 new_module, revnum = self.revsplit(rev)[1:]
831 833 if self.module != new_module:
832 834 self.module = new_module
833 835 self.reparent(self.module)
834 836 info = svn.ra.get_file(self.ra, file, revnum, io)
835 837 if isinstance(info, list):
836 838 info = info[-1]
837 839 mode = ("svn:executable" in info) and 'x' or ''
838 840 mode = ("svn:special" in info) and 'l' or mode
839 841 except SubversionException, e:
840 842 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
841 843 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
842 844 if e.apr_err in notfound: # File not found
843 845 raise IOError()
844 846 raise
845 847 data = io.getvalue()
846 848 if mode == 'l':
847 849 link_prefix = "link "
848 850 if data.startswith(link_prefix):
849 851 data = data[len(link_prefix):]
850 852 return data, mode
851 853
852 854 def _find_children(self, path, revnum):
853 855 path = path.strip('/')
854 856 pool = Pool()
855 857 rpath = '/'.join([self.base, path]).strip('/')
856 858 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
857 859
858 860 def getrelpath(self, path, module=None):
859 861 if module is None:
860 862 module = self.module
861 863 # Given the repository url of this wc, say
862 864 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
863 865 # extract the "entry" portion (a relative path) from what
864 866 # svn log --xml says, ie
865 867 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
866 868 # that is to say "tests/PloneTestCase.py"
867 869 if path.startswith(module):
868 870 relative = path.rstrip('/')[len(module):]
869 871 if relative.startswith('/'):
870 872 return relative[1:]
871 873 elif relative == '':
872 874 return relative
873 875
874 876 # The path is outside our tracked tree...
875 877 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
876 878 return None
877 879
878 880 pre_revprop_change = '''#!/bin/sh
879 881
880 882 REPOS="$1"
881 883 REV="$2"
882 884 USER="$3"
883 885 PROPNAME="$4"
884 886 ACTION="$5"
885 887
886 888 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
887 889 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
888 890 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
889 891
890 892 echo "Changing prohibited revision property" >&2
891 893 exit 1
892 894 '''
893 895
894 896 class svn_sink(converter_sink, commandline):
895 897 commit_re = re.compile(r'Committed revision (\d+).', re.M)
896 898
897 899 def prerun(self):
898 900 if self.wc:
899 901 os.chdir(self.wc)
900 902
901 903 def postrun(self):
902 904 if self.wc:
903 905 os.chdir(self.cwd)
904 906
905 907 def join(self, name):
906 908 return os.path.join(self.wc, '.svn', name)
907 909
908 910 def revmapfile(self):
909 911 return self.join('hg-shamap')
910 912
911 913 def authorfile(self):
912 914 return self.join('hg-authormap')
913 915
914 916 def __init__(self, ui, path):
915 917 converter_sink.__init__(self, ui, path)
916 918 commandline.__init__(self, ui, 'svn')
917 919 self.delete = []
918 920 self.setexec = []
919 921 self.delexec = []
920 922 self.copies = []
921 923 self.wc = None
922 924 self.cwd = os.getcwd()
923 925
924 926 path = os.path.realpath(path)
925 927
926 928 created = False
927 929 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
928 930 self.wc = path
929 931 self.run0('update')
930 932 else:
931 933 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
932 934
933 935 if os.path.isdir(os.path.dirname(path)):
934 936 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
935 937 ui.status(_('initializing svn repo %r\n') %
936 938 os.path.basename(path))
937 939 commandline(ui, 'svnadmin').run0('create', path)
938 940 created = path
939 941 path = util.normpath(path)
940 942 if not path.startswith('/'):
941 943 path = '/' + path
942 944 path = 'file://' + path
943 945
944 946 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
945 947 self.run0('checkout', path, wcpath)
946 948
947 949 self.wc = wcpath
948 950 self.opener = util.opener(self.wc)
949 951 self.wopener = util.opener(self.wc)
950 952 self.childmap = mapfile(ui, self.join('hg-childmap'))
951 953 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
952 954
953 955 if created:
954 956 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
955 957 fp = open(hook, 'w')
956 958 fp.write(pre_revprop_change)
957 959 fp.close()
958 960 util.set_flags(hook, "x")
959 961
960 962 xport = transport.SvnRaTransport(url=geturl(path))
961 963 self.uuid = svn.ra.get_uuid(xport.ra)
962 964
963 965 def wjoin(self, *names):
964 966 return os.path.join(self.wc, *names)
965 967
966 968 def putfile(self, filename, flags, data):
967 969 if 'l' in flags:
968 970 self.wopener.symlink(data, filename)
969 971 else:
970 972 try:
971 973 if os.path.islink(self.wjoin(filename)):
972 974 os.unlink(filename)
973 975 except OSError:
974 976 pass
975 977 self.wopener(filename, 'w').write(data)
976 978
977 979 if self.is_exec:
978 980 was_exec = self.is_exec(self.wjoin(filename))
979 981 else:
980 982 # On filesystems not supporting execute-bit, there is no way
981 983 # to know if it is set but asking subversion. Setting it
982 984 # systematically is just as expensive and much simpler.
983 985 was_exec = 'x' not in flags
984 986
985 987 util.set_flags(self.wjoin(filename), flags)
986 988 if was_exec:
987 989 if 'x' not in flags:
988 990 self.delexec.append(filename)
989 991 else:
990 992 if 'x' in flags:
991 993 self.setexec.append(filename)
992 994
993 995 def delfile(self, name):
994 996 self.delete.append(name)
995 997
996 998 def copyfile(self, source, dest):
997 999 self.copies.append([source, dest])
998 1000
999 1001 def _copyfile(self, source, dest):
1000 1002 # SVN's copy command pukes if the destination file exists, but
1001 1003 # our copyfile method expects to record a copy that has
1002 1004 # already occurred. Cross the semantic gap.
1003 1005 wdest = self.wjoin(dest)
1004 1006 exists = os.path.exists(wdest)
1005 1007 if exists:
1006 1008 fd, tempname = tempfile.mkstemp(
1007 1009 prefix='hg-copy-', dir=os.path.dirname(wdest))
1008 1010 os.close(fd)
1009 1011 os.unlink(tempname)
1010 1012 os.rename(wdest, tempname)
1011 1013 try:
1012 1014 self.run0('copy', source, dest)
1013 1015 finally:
1014 1016 if exists:
1015 1017 try:
1016 1018 os.unlink(wdest)
1017 1019 except OSError:
1018 1020 pass
1019 1021 os.rename(tempname, wdest)
1020 1022
1021 1023 def dirs_of(self, files):
1022 1024 dirs = util.set()
1023 1025 for f in files:
1024 1026 if os.path.isdir(self.wjoin(f)):
1025 1027 dirs.add(f)
1026 1028 for i in strutil.rfindall(f, '/'):
1027 1029 dirs.add(f[:i])
1028 1030 return dirs
1029 1031
1030 1032 def add_dirs(self, files):
1031 1033 add_dirs = [d for d in self.dirs_of(files)
1032 1034 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1033 1035 if add_dirs:
1034 1036 add_dirs.sort()
1035 1037 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1036 1038 return add_dirs
1037 1039
1038 1040 def add_files(self, files):
1039 1041 if files:
1040 1042 self.xargs(files, 'add', quiet=True)
1041 1043 return files
1042 1044
1043 1045 def tidy_dirs(self, names):
1044 1046 dirs = list(self.dirs_of(names))
1045 1047 dirs.sort()
1046 1048 dirs.reverse()
1047 1049 deleted = []
1048 1050 for d in dirs:
1049 1051 wd = self.wjoin(d)
1050 1052 if os.listdir(wd) == '.svn':
1051 1053 self.run0('delete', d)
1052 1054 deleted.append(d)
1053 1055 return deleted
1054 1056
1055 1057 def addchild(self, parent, child):
1056 1058 self.childmap[parent] = child
1057 1059
1058 1060 def revid(self, rev):
1059 1061 return u"svn:%s@%s" % (self.uuid, rev)
1060 1062
1061 1063 def putcommit(self, files, parents, commit):
1062 1064 for parent in parents:
1063 1065 try:
1064 1066 return self.revid(self.childmap[parent])
1065 1067 except KeyError:
1066 1068 pass
1067 1069 entries = util.set(self.delete)
1068 1070 files = util.frozenset(files)
1069 1071 entries.update(self.add_dirs(files.difference(entries)))
1070 1072 if self.copies:
1071 1073 for s, d in self.copies:
1072 1074 self._copyfile(s, d)
1073 1075 self.copies = []
1074 1076 if self.delete:
1075 1077 self.xargs(self.delete, 'delete')
1076 1078 self.delete = []
1077 1079 entries.update(self.add_files(files.difference(entries)))
1078 1080 entries.update(self.tidy_dirs(entries))
1079 1081 if self.delexec:
1080 1082 self.xargs(self.delexec, 'propdel', 'svn:executable')
1081 1083 self.delexec = []
1082 1084 if self.setexec:
1083 1085 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1084 1086 self.setexec = []
1085 1087
1086 1088 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1087 1089 fp = os.fdopen(fd, 'w')
1088 1090 fp.write(commit.desc)
1089 1091 fp.close()
1090 1092 try:
1091 1093 output = self.run0('commit',
1092 1094 username=util.shortuser(commit.author),
1093 1095 file=messagefile,
1094 1096 encoding='utf-8')
1095 1097 try:
1096 1098 rev = self.commit_re.search(output).group(1)
1097 1099 except AttributeError:
1098 1100 self.ui.warn(_('unexpected svn output:\n'))
1099 1101 self.ui.warn(output)
1100 1102 raise util.Abort(_('unable to cope with svn output'))
1101 1103 if commit.rev:
1102 1104 self.run('propset', 'hg:convert-rev', commit.rev,
1103 1105 revprop=True, revision=rev)
1104 1106 if commit.branch and commit.branch != 'default':
1105 1107 self.run('propset', 'hg:convert-branch', commit.branch,
1106 1108 revprop=True, revision=rev)
1107 1109 for parent in parents:
1108 1110 self.addchild(parent, rev)
1109 1111 return self.revid(rev)
1110 1112 finally:
1111 1113 os.unlink(messagefile)
1112 1114
1113 1115 def puttags(self, tags):
1114 1116 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now