##// END OF EJS Templates
convert: prevent svn branches to leave the root module tree
Patrick Mezard -
r5957:971a17af default
parent child Browse files
Show More
@@ -1,1018 +1,1038 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream:
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
130 130 strict_node_history=False):
131 131 args = [url, paths, start, end, limit, discover_changed_paths,
132 132 strict_node_history]
133 133 arg = encodeargs(args)
134 134 hgexe = util.hgexecutable()
135 135 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
136 136 stdin, stdout = os.popen2(cmd, 'b')
137 137 stdin.write(arg)
138 138 stdin.close()
139 139 return logstream(stdout)
140 140
141 141 # SVN conversion code stolen from bzr-svn and tailor
142 142 #
143 143 # Subversion looks like a versioned filesystem, branches structures
144 144 # are defined by conventions and not enforced by the tool. First,
145 145 # we define the potential branches (modules) as "trunk" and "branches"
146 146 # children directories. Revisions are then identified by their
147 147 # module and revision number (and a repository identifier).
148 148 #
149 149 # The revision graph is really a tree (or a forest). By default, a
150 150 # revision parent is the previous revision in the same module. If the
151 151 # module directory is copied/moved from another module then the
152 152 # revision is the module root and its parent the source revision in
153 153 # the parent module. A revision has at most one parent.
154 154 #
155 155 class svn_source(converter_source):
156 156 def __init__(self, ui, url, rev=None):
157 157 super(svn_source, self).__init__(ui, url, rev=rev)
158 158
159 159 try:
160 160 SubversionException
161 161 except NameError:
162 162 raise NoRepo('Subversion python bindings could not be loaded')
163 163
164 164 self.encoding = locale.getpreferredencoding()
165 165 self.lastrevs = {}
166 166
167 167 latest = None
168 168 try:
169 169 # Support file://path@rev syntax. Useful e.g. to convert
170 170 # deleted branches.
171 171 at = url.rfind('@')
172 172 if at >= 0:
173 173 latest = int(url[at+1:])
174 174 url = url[:at]
175 175 except ValueError, e:
176 176 pass
177 177 self.url = geturl(url)
178 178 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
179 179 try:
180 180 self.transport = transport.SvnRaTransport(url=self.url)
181 181 self.ra = self.transport.ra
182 182 self.ctx = self.transport.client
183 183 self.base = svn.ra.get_repos_root(self.ra)
184 184 self.module = self.url[len(self.base):]
185 self.rootmodule = self.module
185 186 self.commits = {}
186 187 self.paths = {}
187 188 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
188 189 except SubversionException, e:
189 190 ui.print_exc()
190 191 raise NoRepo("%s does not look like a Subversion repo" % self.url)
191 192
192 193 if rev:
193 194 try:
194 195 latest = int(rev)
195 196 except ValueError:
196 197 raise util.Abort('svn: revision %s is not an integer' % rev)
197 198
198 199 try:
199 200 self.get_blacklist()
200 201 except IOError, e:
201 202 pass
202 203
203 204 self.head = self.latest(self.module, latest)
205 if not self.head:
206 raise util.Abort(_('no revision found in module %s') %
207 self.module.encode(self.encoding))
204 208 self.last_changed = self.revnum(self.head)
205 209
206 210 self._changescache = None
207 211
208 212 if os.path.exists(os.path.join(url, '.svn/entries')):
209 213 self.wc = url
210 214 else:
211 215 self.wc = None
212 216 self.convertfp = None
213 217
214 218 def setrevmap(self, revmap):
215 219 lastrevs = {}
216 220 for revid in revmap.iterkeys():
217 221 uuid, module, revnum = self.revsplit(revid)
218 222 lastrevnum = lastrevs.setdefault(module, revnum)
219 223 if revnum > lastrevnum:
220 224 lastrevs[module] = revnum
221 225 self.lastrevs = lastrevs
222 226
223 227 def exists(self, path, optrev):
224 228 try:
225 229 svn.client.ls(self.url.rstrip('/') + '/' + path,
226 230 optrev, False, self.ctx)
227 231 return True
228 232 except SubversionException, err:
229 233 return False
230 234
231 235 def getheads(self):
232 236
233 237 def getcfgpath(name, rev):
234 238 cfgpath = self.ui.config('convert', 'svn.' + name)
235 239 path = (cfgpath or name).strip('/')
236 240 if not self.exists(path, rev):
237 241 if cfgpath:
238 242 raise util.Abort(_('expected %s to be at %r, but not found')
239 243 % (name, path))
240 244 return None
241 245 self.ui.note(_('found %s at %r\n') % (name, path))
242 246 return path
243 247
244 248 rev = optrev(self.last_changed)
245 249 oldmodule = ''
246 250 trunk = getcfgpath('trunk', rev)
247 251 tags = getcfgpath('tags', rev)
248 252 branches = getcfgpath('branches', rev)
249 253
250 254 # If the project has a trunk or branches, we will extract heads
251 255 # from them. We keep the project root otherwise.
252 256 if trunk:
253 257 oldmodule = self.module or ''
254 258 self.module += '/' + trunk
255 259 self.head = self.latest(self.module, self.last_changed)
260 if not self.head:
261 raise util.Abort(_('no revision found in module %s') %
262 self.module.encode(self.encoding))
256 263
257 264 # First head in the list is the module's head
258 265 self.heads = [self.head]
259 266 self.tags = '%s/%s' % (oldmodule , (tags or 'tags'))
260 267
261 268 # Check if branches bring a few more heads to the list
262 269 if branches:
263 270 rpath = self.url.strip('/')
264 271 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
265 272 self.ctx)
266 273 for branch in branchnames.keys():
267 274 module = '%s/%s/%s' % (oldmodule, branches, branch)
268 275 brevid = self.latest(module, self.last_changed)
276 if not brevid:
277 self.ui.note(_('ignoring empty branch %s\n') %
278 branch.encode(self.encoding))
279 continue
269 280 self.ui.note('found branch %s at %d\n' %
270 281 (branch, self.revnum(brevid)))
271 282 self.heads.append(brevid)
272 283
273 284 return self.heads
274 285
275 286 def getfile(self, file, rev):
276 287 data, mode = self._getfile(file, rev)
277 288 self.modecache[(file, rev)] = mode
278 289 return data
279 290
280 291 def getmode(self, file, rev):
281 292 return self.modecache[(file, rev)]
282 293
283 294 def getchanges(self, rev):
284 295 if self._changescache and self._changescache[0] == rev:
285 296 return self._changescache[1]
286 297 self._changescache = None
287 298 self.modecache = {}
288 299 (paths, parents) = self.paths[rev]
289 300 if parents:
290 301 files, copies = self.expandpaths(rev, paths, parents)
291 302 else:
292 303 # Perform a full checkout on roots
293 304 uuid, module, revnum = self.revsplit(rev)
294 305 entries = svn.client.ls(self.base + module, optrev(revnum),
295 306 True, self.ctx)
296 307 files = [n for n,e in entries.iteritems()
297 308 if e.kind == svn.core.svn_node_file]
298 309 copies = {}
299 310
300 311 files.sort()
301 312 files = zip(files, [rev] * len(files))
302 313
303 314 # caller caches the result, so free it here to release memory
304 315 del self.paths[rev]
305 316 return (files, copies)
306 317
307 318 def getchangedfiles(self, rev, i):
308 319 changes = self.getchanges(rev)
309 320 self._changescache = (rev, changes)
310 321 return [f[0] for f in changes[0]]
311 322
312 323 def getcommit(self, rev):
313 324 if rev not in self.commits:
314 325 uuid, module, revnum = self.revsplit(rev)
315 326 self.module = module
316 327 self.reparent(module)
317 328 # We assume that:
318 329 # - requests for revisions after "stop" come from the
319 330 # revision graph backward traversal. Cache all of them
320 331 # down to stop, they will be used eventually.
321 332 # - requests for revisions before "stop" come to get
322 333 # isolated branches parents. Just fetch what is needed.
323 334 stop = self.lastrevs.get(module, 0)
324 335 if revnum < stop:
325 336 stop = revnum + 1
326 337 self._fetch_revisions(revnum, stop)
327 338 commit = self.commits[rev]
328 339 # caller caches the result, so free it here to release memory
329 340 del self.commits[rev]
330 341 return commit
331 342
332 343 def gettags(self):
333 344 tags = {}
334 345 start = self.revnum(self.head)
335 346 try:
336 347 for entry in get_log(self.url, [self.tags], 0, start):
337 348 orig_paths, revnum, author, date, message = entry
338 349 for path in orig_paths:
339 350 if not path.startswith(self.tags+'/'):
340 351 continue
341 352 ent = orig_paths[path]
342 353 source = ent.copyfrom_path
343 354 rev = ent.copyfrom_rev
344 355 tag = path.split('/')[-1]
345 356 tags[tag] = self.revid(rev, module=source)
346 357 except SubversionException, (inst, num):
347 358 self.ui.note('no tags found at revision %d\n' % start)
348 359 return tags
349 360
350 361 def converted(self, rev, destrev):
351 362 if not self.wc:
352 363 return
353 364 if self.convertfp is None:
354 365 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
355 366 'a')
356 367 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
357 368 self.convertfp.flush()
358 369
359 370 # -- helper functions --
360 371
361 372 def revid(self, revnum, module=None):
362 373 if not module:
363 374 module = self.module
364 375 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
365 376 revnum)
366 377
367 378 def revnum(self, rev):
368 379 return int(rev.split('@')[-1])
369 380
370 381 def revsplit(self, rev):
371 382 url, revnum = rev.encode(self.encoding).split('@', 1)
372 383 revnum = int(revnum)
373 384 parts = url.split('/', 1)
374 385 uuid = parts.pop(0)[4:]
375 386 mod = ''
376 387 if parts:
377 388 mod = '/' + parts[0]
378 389 return uuid, mod, revnum
379 390
380 391 def latest(self, path, stop=0):
381 392 """Find the latest revid affecting path, up to stop. It may return
382 393 a revision in a different module, since a branch may be moved without
383 a change being reported.
394 a change being reported. Return None if computed module does not
395 belong to rootmodule subtree.
384 396 """
385 397 if not stop:
386 398 stop = svn.ra.get_latest_revnum(self.ra)
387 399 try:
388 400 self.reparent('')
389 401 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
390 402 self.reparent(self.module)
391 403 except SubversionException:
392 404 dirent = None
393 405 if not dirent:
394 406 raise util.Abort('%s not found up to revision %d' % (path, stop))
395 407
396 408 # stat() gives us the previous revision on this line of development, but
397 409 # it might be in *another module*. Fetch the log and detect renames down
398 410 # to the latest revision.
399 411 stream = get_log(self.url, [path], stop, dirent.created_rev)
400 412 try:
401 413 for entry in stream:
402 414 paths, revnum, author, date, message = entry
403 415 if revnum <= dirent.created_rev:
404 416 break
405 417
406 418 for p in paths:
407 419 if not path.startswith(p) or not paths[p].copyfrom_path:
408 420 continue
409 421 newpath = paths[p].copyfrom_path + path[len(p):]
410 422 self.ui.debug("branch renamed from %s to %s at %d\n" %
411 423 (path, newpath, revnum))
412 424 path = newpath
413 425 break
414 426 finally:
415 427 stream.close()
416 428
429 if not path.startswith(self.rootmodule):
430 self.ui.debug(_('ignoring foreign branch %r\n') % path)
431 return None
417 432 return self.revid(dirent.created_rev, path)
418 433
419 434 def get_blacklist(self):
420 435 """Avoid certain revision numbers.
421 436 It is not uncommon for two nearby revisions to cancel each other
422 437 out, e.g. 'I copied trunk into a subdirectory of itself instead
423 438 of making a branch'. The converted repository is significantly
424 439 smaller if we ignore such revisions."""
425 440 self.blacklist = util.set()
426 441 blacklist = self.blacklist
427 442 for line in file("blacklist.txt", "r"):
428 443 if not line.startswith("#"):
429 444 try:
430 445 svn_rev = int(line.strip())
431 446 blacklist.add(svn_rev)
432 447 except ValueError, e:
433 448 pass # not an integer or a comment
434 449
435 450 def is_blacklisted(self, svn_rev):
436 451 return svn_rev in self.blacklist
437 452
438 453 def reparent(self, module):
439 454 svn_url = self.base + module
440 455 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
441 456 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
442 457
443 458 def expandpaths(self, rev, paths, parents):
444 459 def get_entry_from_path(path, module=self.module):
445 460 # Given the repository url of this wc, say
446 461 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
447 462 # extract the "entry" portion (a relative path) from what
448 463 # svn log --xml says, ie
449 464 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
450 465 # that is to say "tests/PloneTestCase.py"
451 466 if path.startswith(module):
452 467 relative = path[len(module):]
453 468 if relative.startswith('/'):
454 469 return relative[1:]
455 470 else:
456 471 return relative
457 472
458 473 # The path is outside our tracked tree...
459 474 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
460 475 return None
461 476
462 477 entries = []
463 478 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
464 479 copies = {}
465 480
466 481 new_module, revnum = self.revsplit(rev)[1:]
467 482 if new_module != self.module:
468 483 self.module = new_module
469 484 self.reparent(self.module)
470 485
471 486 for path, ent in paths:
472 487 entrypath = get_entry_from_path(path, module=self.module)
473 488 entry = entrypath.decode(self.encoding)
474 489
475 490 kind = svn.ra.check_path(self.ra, entrypath, revnum)
476 491 if kind == svn.core.svn_node_file:
477 492 if ent.copyfrom_path:
478 493 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
479 494 if copyfrom_path:
480 495 self.ui.debug("Copied to %s from %s@%s\n" %
481 496 (entrypath, copyfrom_path,
482 497 ent.copyfrom_rev))
483 498 # It's probably important for hg that the source
484 499 # exists in the revision's parent, not just the
485 500 # ent.copyfrom_rev
486 501 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
487 502 if fromkind != 0:
488 503 copies[self.recode(entry)] = self.recode(copyfrom_path)
489 504 entries.append(self.recode(entry))
490 505 elif kind == 0: # gone, but had better be a deleted *file*
491 506 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
492 507
493 508 # if a branch is created but entries are removed in the same
494 509 # changeset, get the right fromrev
495 510 # parents cannot be empty here, you cannot remove things from
496 511 # a root revision.
497 512 uuid, old_module, fromrev = self.revsplit(parents[0])
498 513
499 514 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
500 515 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
501 516
502 517 def lookup_parts(p):
503 518 rc = None
504 519 parts = p.split("/")
505 520 for i in range(len(parts)):
506 521 part = "/".join(parts[:i])
507 522 info = part, copyfrom.get(part, None)
508 523 if info[1] is not None:
509 524 self.ui.debug("Found parent directory %s\n" % info[1])
510 525 rc = info
511 526 return rc
512 527
513 528 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
514 529
515 530 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
516 531
517 532 # need to remove fragment from lookup_parts and replace with copyfrom_path
518 533 if frompath is not None:
519 534 self.ui.debug("munge-o-matic\n")
520 535 self.ui.debug(entrypath + '\n')
521 536 self.ui.debug(entrypath[len(frompath):] + '\n')
522 537 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
523 538 fromrev = froment.copyfrom_rev
524 539 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
525 540
526 541 # We can avoid the reparent calls if the module has not changed
527 542 # but it probably does not worth the pain.
528 543 self.reparent('')
529 544 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
530 545 self.reparent(self.module)
531 546
532 547 if fromkind == svn.core.svn_node_file: # a deleted file
533 548 entries.append(self.recode(entry))
534 549 elif fromkind == svn.core.svn_node_dir:
535 550 # print "Deleted/moved non-file:", revnum, path, ent
536 551 # children = self._find_children(path, revnum - 1)
537 552 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
538 553 # Sometimes this is tricky. For example: in
539 554 # The Subversion Repository revision 6940 a dir
540 555 # was copied and one of its files was deleted
541 556 # from the new location in the same commit. This
542 557 # code can't deal with that yet.
543 558 if ent.action == 'C':
544 559 children = self._find_children(path, fromrev)
545 560 else:
546 561 oroot = entrypath.strip('/')
547 562 nroot = path.strip('/')
548 563 children = self._find_children(oroot, fromrev)
549 564 children = [s.replace(oroot,nroot) for s in children]
550 565 # Mark all [files, not directories] as deleted.
551 566 for child in children:
552 567 # Can we move a child directory and its
553 568 # parent in the same commit? (probably can). Could
554 569 # cause problems if instead of revnum -1,
555 570 # we have to look in (copyfrom_path, revnum - 1)
556 571 entrypath = get_entry_from_path("/" + child, module=old_module)
557 572 if entrypath:
558 573 entry = self.recode(entrypath.decode(self.encoding))
559 574 if entry in copies:
560 575 # deleted file within a copy
561 576 del copies[entry]
562 577 else:
563 578 entries.append(entry)
564 579 else:
565 580 self.ui.debug('unknown path in revision %d: %s\n' % \
566 581 (revnum, path))
567 582 elif kind == svn.core.svn_node_dir:
568 583 # Should probably synthesize normal file entries
569 584 # and handle as above to clean up copy/rename handling.
570 585
571 586 # If the directory just had a prop change,
572 587 # then we shouldn't need to look for its children.
573 588 if ent.action == 'M':
574 589 continue
575 590
576 591 # Also this could create duplicate entries. Not sure
577 592 # whether this will matter. Maybe should make entries a set.
578 593 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
579 594 # This will fail if a directory was copied
580 595 # from another branch and then some of its files
581 596 # were deleted in the same transaction.
582 597 children = self._find_children(path, revnum)
583 598 children.sort()
584 599 for child in children:
585 600 # Can we move a child directory and its
586 601 # parent in the same commit? (probably can). Could
587 602 # cause problems if instead of revnum -1,
588 603 # we have to look in (copyfrom_path, revnum - 1)
589 604 entrypath = get_entry_from_path("/" + child, module=self.module)
590 605 # print child, self.module, entrypath
591 606 if entrypath:
592 607 # Need to filter out directories here...
593 608 kind = svn.ra.check_path(self.ra, entrypath, revnum)
594 609 if kind != svn.core.svn_node_dir:
595 610 entries.append(self.recode(entrypath))
596 611
597 612 # Copies here (must copy all from source)
598 613 # Probably not a real problem for us if
599 614 # source does not exist
600 615
601 616 # Can do this with the copy command "hg copy"
602 617 # if ent.copyfrom_path:
603 618 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
604 619 # module=self.module)
605 620 # copyto_entry = entrypath
606 621 #
607 622 # print "copy directory", copyfrom_entry, 'to', copyto_entry
608 623 #
609 624 # copies.append((copyfrom_entry, copyto_entry))
610 625
611 626 if ent.copyfrom_path:
612 627 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
613 628 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
614 629 if copyfrom_entry:
615 630 copyfrom[path] = ent
616 631 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
617 632
618 633 # Good, /probably/ a regular copy. Really should check
619 634 # to see whether the parent revision actually contains
620 635 # the directory in question.
621 636 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
622 637 children.sort()
623 638 for child in children:
624 639 entrypath = get_entry_from_path("/" + child, module=self.module)
625 640 if entrypath:
626 641 entry = entrypath.decode(self.encoding)
627 642 # print "COPY COPY From", copyfrom_entry, entry
628 643 copyto_path = path + entry[len(copyfrom_entry):]
629 644 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
630 645 # print "COPY", entry, "COPY To", copyto_entry
631 646 copies[self.recode(copyto_entry)] = self.recode(entry)
632 647 # copy from quux splort/quuxfile
633 648
634 649 return (util.unique(entries), copies)
635 650
636 651 def _fetch_revisions(self, from_revnum, to_revnum):
637 652 if from_revnum < to_revnum:
638 653 from_revnum, to_revnum = to_revnum, from_revnum
639 654
640 655 self.child_cset = None
641 656 def parselogentry(orig_paths, revnum, author, date, message):
642 657 """Return the parsed commit object or None, and True if
643 658 the revision is a branch root.
644 659 """
645 660 self.ui.debug("parsing revision %d (%d changes)\n" %
646 661 (revnum, len(orig_paths)))
647 662
663 branched = False
648 664 rev = self.revid(revnum)
649 665 # branch log might return entries for a parent we already have
650 666
651 667 if (rev in self.commits or revnum < to_revnum):
652 return None, False
668 return None, branched
653 669
654 670 parents = []
655 671 # check whether this revision is the start of a branch
656 672 if self.module in orig_paths:
657 673 ent = orig_paths[self.module]
658 674 if ent.copyfrom_path:
675 branched = True
659 676 # ent.copyfrom_rev may not be the actual last revision
660 677 previd = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
661 parents = [previd]
662 prevmodule, prevnum = self.revsplit(previd)[1:]
663 self.ui.note('found parent of branch %s at %d: %s\n' %
664 (self.module, prevnum, prevmodule))
678 if previd is not None:
679 parents = [previd]
680 prevmodule, prevnum = self.revsplit(previd)[1:]
681 self.ui.note('found parent of branch %s at %d: %s\n' %
682 (self.module, prevnum, prevmodule))
665 683 else:
666 684 self.ui.debug("No copyfrom path, don't know what to do.\n")
667 685
668 686 orig_paths = orig_paths.items()
669 687 orig_paths.sort()
670 688 paths = []
671 689 # filter out unrelated paths
672 690 for path, ent in orig_paths:
673 691 if not path.startswith(self.module):
674 692 self.ui.debug("boring@%s: %s\n" % (revnum, path))
675 693 continue
676 694 paths.append((path, ent))
677 695
678 696 # Example SVN datetime. Includes microseconds.
679 697 # ISO-8601 conformant
680 698 # '2007-01-04T17:35:00.902377Z'
681 699 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
682 700
683 701 log = message and self.recode(message) or ''
684 702 author = author and self.recode(author) or ''
685 703 try:
686 704 branch = self.module.split("/")[-1]
687 705 if branch == 'trunk':
688 706 branch = ''
689 707 except IndexError:
690 708 branch = None
691 709
692 710 cset = commit(author=author,
693 711 date=util.datestr(date),
694 712 desc=log,
695 713 parents=parents,
696 714 branch=branch,
697 715 rev=rev.encode('utf-8'))
698 716
699 717 self.commits[rev] = cset
700 718 # The parents list is *shared* among self.paths and the
701 719 # commit object. Both will be updated below.
702 720 self.paths[rev] = (paths, cset.parents)
703 721 if self.child_cset and not self.child_cset.parents:
704 722 self.child_cset.parents[:] = [rev]
705 723 self.child_cset = cset
706 return cset, len(parents) > 0
724 return cset, branched
707 725
708 726 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
709 727 (self.module, from_revnum, to_revnum))
710 728
711 729 try:
712 730 firstcset = None
731 branched = False
713 732 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
714 733 try:
715 734 for entry in stream:
716 735 paths, revnum, author, date, message = entry
717 736 if self.is_blacklisted(revnum):
718 737 self.ui.note('skipping blacklisted revision %d\n'
719 738 % revnum)
720 739 continue
721 740 if paths is None:
722 741 self.ui.debug('revision %d has no entries\n' % revnum)
723 742 continue
724 743 cset, branched = parselogentry(paths, revnum, author,
725 744 date, message)
726 745 if cset:
727 746 firstcset = cset
728 747 if branched:
729 748 break
730 749 finally:
731 750 stream.close()
732 751
733 if firstcset and not firstcset.parents:
752 if not branched and firstcset and not firstcset.parents:
734 753 # The first revision of the sequence (the last fetched one)
735 754 # has invalid parents if not a branch root. Find the parent
736 755 # revision now, if any.
737 756 try:
738 757 firstrevnum = self.revnum(firstcset.rev)
739 758 if firstrevnum > 1:
740 759 latest = self.latest(self.module, firstrevnum - 1)
741 firstcset.parents.append(latest)
760 if latest:
761 firstcset.parents.append(latest)
742 762 except util.Abort:
743 763 pass
744 764 except SubversionException, (inst, num):
745 765 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
746 766 raise NoSuchRevision(branch=self,
747 767 revision="Revision number %d" % to_revnum)
748 768 raise
749 769
750 770 def _getfile(self, file, rev):
751 771 io = StringIO()
752 772 # TODO: ra.get_file transmits the whole file instead of diffs.
753 773 mode = ''
754 774 try:
755 775 new_module, revnum = self.revsplit(rev)[1:]
756 776 if self.module != new_module:
757 777 self.module = new_module
758 778 self.reparent(self.module)
759 779 info = svn.ra.get_file(self.ra, file, revnum, io)
760 780 if isinstance(info, list):
761 781 info = info[-1]
762 782 mode = ("svn:executable" in info) and 'x' or ''
763 783 mode = ("svn:special" in info) and 'l' or mode
764 784 except SubversionException, e:
765 785 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
766 786 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
767 787 if e.apr_err in notfound: # File not found
768 788 raise IOError()
769 789 raise
770 790 data = io.getvalue()
771 791 if mode == 'l':
772 792 link_prefix = "link "
773 793 if data.startswith(link_prefix):
774 794 data = data[len(link_prefix):]
775 795 return data, mode
776 796
777 797 def _find_children(self, path, revnum):
778 798 path = path.strip('/')
779 799 pool = Pool()
780 800 rpath = '/'.join([self.base, path]).strip('/')
781 801 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
782 802
783 803 pre_revprop_change = '''#!/bin/sh
784 804
785 805 REPOS="$1"
786 806 REV="$2"
787 807 USER="$3"
788 808 PROPNAME="$4"
789 809 ACTION="$5"
790 810
791 811 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
792 812 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
793 813 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
794 814
795 815 echo "Changing prohibited revision property" >&2
796 816 exit 1
797 817 '''
798 818
799 819 class svn_sink(converter_sink, commandline):
800 820 commit_re = re.compile(r'Committed revision (\d+).', re.M)
801 821
802 822 def prerun(self):
803 823 if self.wc:
804 824 os.chdir(self.wc)
805 825
806 826 def postrun(self):
807 827 if self.wc:
808 828 os.chdir(self.cwd)
809 829
810 830 def join(self, name):
811 831 return os.path.join(self.wc, '.svn', name)
812 832
813 833 def revmapfile(self):
814 834 return self.join('hg-shamap')
815 835
816 836 def authorfile(self):
817 837 return self.join('hg-authormap')
818 838
819 839 def __init__(self, ui, path):
820 840 converter_sink.__init__(self, ui, path)
821 841 commandline.__init__(self, ui, 'svn')
822 842 self.delete = []
823 843 self.setexec = []
824 844 self.delexec = []
825 845 self.copies = []
826 846 self.wc = None
827 847 self.cwd = os.getcwd()
828 848
829 849 path = os.path.realpath(path)
830 850
831 851 created = False
832 852 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
833 853 self.wc = path
834 854 self.run0('update')
835 855 else:
836 856 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
837 857
838 858 if os.path.isdir(os.path.dirname(path)):
839 859 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
840 860 ui.status(_('initializing svn repo %r\n') %
841 861 os.path.basename(path))
842 862 commandline(ui, 'svnadmin').run0('create', path)
843 863 created = path
844 864 path = util.normpath(path)
845 865 if not path.startswith('/'):
846 866 path = '/' + path
847 867 path = 'file://' + path
848 868
849 869 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
850 870 self.run0('checkout', path, wcpath)
851 871
852 872 self.wc = wcpath
853 873 self.opener = util.opener(self.wc)
854 874 self.wopener = util.opener(self.wc)
855 875 self.childmap = mapfile(ui, self.join('hg-childmap'))
856 876 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
857 877
858 878 if created:
859 879 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
860 880 fp = open(hook, 'w')
861 881 fp.write(pre_revprop_change)
862 882 fp.close()
863 883 util.set_flags(hook, "x")
864 884
865 885 xport = transport.SvnRaTransport(url=geturl(path))
866 886 self.uuid = svn.ra.get_uuid(xport.ra)
867 887
868 888 def wjoin(self, *names):
869 889 return os.path.join(self.wc, *names)
870 890
871 891 def putfile(self, filename, flags, data):
872 892 if 'l' in flags:
873 893 self.wopener.symlink(data, filename)
874 894 else:
875 895 try:
876 896 if os.path.islink(self.wjoin(filename)):
877 897 os.unlink(filename)
878 898 except OSError:
879 899 pass
880 900 self.wopener(filename, 'w').write(data)
881 901
882 902 if self.is_exec:
883 903 was_exec = self.is_exec(self.wjoin(filename))
884 904 else:
885 905 # On filesystems not supporting execute-bit, there is no way
886 906 # to know if it is set but asking subversion. Setting it
887 907 # systematically is just as expensive and much simpler.
888 908 was_exec = 'x' not in flags
889 909
890 910 util.set_flags(self.wjoin(filename), flags)
891 911 if was_exec:
892 912 if 'x' not in flags:
893 913 self.delexec.append(filename)
894 914 else:
895 915 if 'x' in flags:
896 916 self.setexec.append(filename)
897 917
898 918 def delfile(self, name):
899 919 self.delete.append(name)
900 920
901 921 def copyfile(self, source, dest):
902 922 self.copies.append([source, dest])
903 923
904 924 def _copyfile(self, source, dest):
905 925 # SVN's copy command pukes if the destination file exists, but
906 926 # our copyfile method expects to record a copy that has
907 927 # already occurred. Cross the semantic gap.
908 928 wdest = self.wjoin(dest)
909 929 exists = os.path.exists(wdest)
910 930 if exists:
911 931 fd, tempname = tempfile.mkstemp(
912 932 prefix='hg-copy-', dir=os.path.dirname(wdest))
913 933 os.close(fd)
914 934 os.unlink(tempname)
915 935 os.rename(wdest, tempname)
916 936 try:
917 937 self.run0('copy', source, dest)
918 938 finally:
919 939 if exists:
920 940 try:
921 941 os.unlink(wdest)
922 942 except OSError:
923 943 pass
924 944 os.rename(tempname, wdest)
925 945
926 946 def dirs_of(self, files):
927 947 dirs = set()
928 948 for f in files:
929 949 if os.path.isdir(self.wjoin(f)):
930 950 dirs.add(f)
931 951 for i in strutil.rfindall(f, '/'):
932 952 dirs.add(f[:i])
933 953 return dirs
934 954
935 955 def add_dirs(self, files):
936 956 add_dirs = [d for d in self.dirs_of(files)
937 957 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
938 958 if add_dirs:
939 959 add_dirs.sort()
940 960 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
941 961 return add_dirs
942 962
943 963 def add_files(self, files):
944 964 if files:
945 965 self.xargs(files, 'add', quiet=True)
946 966 return files
947 967
948 968 def tidy_dirs(self, names):
949 969 dirs = list(self.dirs_of(names))
950 970 dirs.sort(reverse=True)
951 971 deleted = []
952 972 for d in dirs:
953 973 wd = self.wjoin(d)
954 974 if os.listdir(wd) == '.svn':
955 975 self.run0('delete', d)
956 976 deleted.append(d)
957 977 return deleted
958 978
959 979 def addchild(self, parent, child):
960 980 self.childmap[parent] = child
961 981
962 982 def revid(self, rev):
963 983 return u"svn:%s@%s" % (self.uuid, rev)
964 984
965 985 def putcommit(self, files, parents, commit):
966 986 for parent in parents:
967 987 try:
968 988 return self.revid(self.childmap[parent])
969 989 except KeyError:
970 990 pass
971 991 entries = set(self.delete)
972 992 files = util.frozenset(files)
973 993 entries.update(self.add_dirs(files.difference(entries)))
974 994 if self.copies:
975 995 for s, d in self.copies:
976 996 self._copyfile(s, d)
977 997 self.copies = []
978 998 if self.delete:
979 999 self.xargs(self.delete, 'delete')
980 1000 self.delete = []
981 1001 entries.update(self.add_files(files.difference(entries)))
982 1002 entries.update(self.tidy_dirs(entries))
983 1003 if self.delexec:
984 1004 self.xargs(self.delexec, 'propdel', 'svn:executable')
985 1005 self.delexec = []
986 1006 if self.setexec:
987 1007 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
988 1008 self.setexec = []
989 1009
990 1010 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
991 1011 fp = os.fdopen(fd, 'w')
992 1012 fp.write(commit.desc)
993 1013 fp.close()
994 1014 try:
995 1015 output = self.run0('commit',
996 1016 username=util.shortuser(commit.author),
997 1017 file=messagefile,
998 1018 encoding='utf-8')
999 1019 try:
1000 1020 rev = self.commit_re.search(output).group(1)
1001 1021 except AttributeError:
1002 1022 self.ui.warn(_('unexpected svn output:\n'))
1003 1023 self.ui.warn(output)
1004 1024 raise util.Abort(_('unable to cope with svn output'))
1005 1025 if commit.rev:
1006 1026 self.run('propset', 'hg:convert-rev', commit.rev,
1007 1027 revprop=True, revision=rev)
1008 1028 if commit.branch and commit.branch != 'default':
1009 1029 self.run('propset', 'hg:convert-branch', commit.branch,
1010 1030 revprop=True, revision=rev)
1011 1031 for parent in parents:
1012 1032 self.addchild(parent, rev)
1013 1033 return self.revid(rev)
1014 1034 finally:
1015 1035 os.unlink(messagefile)
1016 1036
1017 1037 def puttags(self, tags):
1018 1038 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
@@ -1,60 +1,65 b''
1 1 #!/bin/sh
2 2
3 3 "$TESTDIR/hghave" svn svn-bindings || exit 80
4 4
5 5 fix_path()
6 6 {
7 7 tr '\\' /
8 8 }
9 9
10 10 echo "[extensions]" >> $HGRCPATH
11 11 echo "convert = " >> $HGRCPATH
12 12 echo "hgext.graphlog =" >> $HGRCPATH
13 13
14 14 svnadmin create svn-repo
15 15
16 16 svnpath=`pwd | fix_path`
17 17 # SVN wants all paths to start with a slash. Unfortunately,
18 18 # Windows ones don't. Handle that.
19 19 expr $svnpath : "\/" > /dev/null
20 20 if [ $? -ne 0 ]; then
21 21 svnpath='/'$svnpath
22 22 fi
23 23
24 24 echo % initial svn import
25 25 mkdir projA
26 26 cd projA
27 27 mkdir trunk
28 echo a > trunk/a
28 29 mkdir trunk/d1
29 30 echo b > trunk/d1/b
31 echo c > trunk/d1/c
30 32 cd ..
31 33
32 34 svnurl=file://$svnpath/svn-repo/projA
33 35 svn import -m "init projA" projA $svnurl | fix_path
34 36
35 37 # Build a module renaming chain which used to confuse the converter.
36 38 echo % update svn repository
37 39 svn co $svnurl A | fix_path
38 40 cd A
41 echo a >> trunk/a
42 echo c >> trunk/d1/c
43 svn ci -m commitbeforemove
39 44 svn mv $svnurl/trunk $svnurl/subproject -m movedtrunk
40 45 svn up
41 46 mkdir subproject/trunk
42 47 svn add subproject/trunk
43 48 svn ci -m createtrunk
44 49 mkdir subproject/branches
45 50 svn add subproject/branches
46 51 svn ci -m createbranches
47 52 svn mv $svnurl/subproject/d1 $svnurl/subproject/trunk/d1 -m moved1
48 53 svn up
49 54 echo b >> subproject/trunk/d1/b
50 55 svn ci -m changeb
51 56 svn mv $svnurl/subproject/trunk/d1 $svnurl/subproject/branches/d1 -m moved1again
52 57 cd ..
53 58
54 59 echo % convert trunk and branches
55 60 hg convert --datesort $svnurl/subproject A-hg
56 61
57 62 cd A-hg
58 63 hg glog --template '#rev# #desc|firstline# files: #files#\n'
59 64 hg branches | sed 's/:.*/:/'
60 65 cd ..
@@ -1,68 +1,76 b''
1 1 % initial svn import
2 2 Adding projA/trunk
3 Adding projA/trunk/a
3 4 Adding projA/trunk/d1
4 5 Adding projA/trunk/d1/b
6 Adding projA/trunk/d1/c
5 7
6 8 Committed revision 1.
7 9 % update svn repository
8 10 A A/trunk
11 A A/trunk/a
9 12 A A/trunk/d1
10 13 A A/trunk/d1/b
14 A A/trunk/d1/c
11 15 Checked out revision 1.
16 Sending trunk/a
17 Sending trunk/d1/c
18 Transmitting file data ..
19 Committed revision 2.
12 20
13 Committed revision 2.
21 Committed revision 3.
14 22 D trunk
15 23 A subproject
24 A subproject/a
16 25 A subproject/d1
17 26 A subproject/d1/b
18 Updated to revision 2.
27 A subproject/d1/c
28 Updated to revision 3.
19 29 A subproject/trunk
20 30 Adding subproject/trunk
21 31
22 Committed revision 3.
32 Committed revision 4.
23 33 A subproject/branches
24 34 Adding subproject/branches
25 35
26 Committed revision 4.
36 Committed revision 5.
27 37
28 Committed revision 5.
38 Committed revision 6.
29 39 A subproject/trunk/d1
30 40 A subproject/trunk/d1/b
41 A subproject/trunk/d1/c
31 42 D subproject/d1
32 Updated to revision 5.
43 Updated to revision 6.
33 44 Sending subproject/trunk/d1/b
34 45 Transmitting file data .
35 Committed revision 6.
46 Committed revision 7.
36 47
37 Committed revision 7.
48 Committed revision 8.
38 49 % convert trunk and branches
39 50 initializing destination A-hg repository
40 51 scanning source...
41 52 sorting...
42 53 converting...
43 7 init projA
44 54 6 createtrunk
45 55 5 moved1
46 56 4 moved1
47 57 3 changeb
48 58 2 changeb
49 59 1 moved1again
50 60 0 moved1again
51 o 7 moved1again files: d1/b
61 o 6 moved1again files: d1/b d1/c
52 62 |
53 | o 6 moved1again files:
63 | o 5 moved1again files:
54 64 | |
55 o | 5 changeb files: d1/b
65 o | 4 changeb files: d1/b
56 66 | |
57 | o 4 changeb files: b
67 | o 3 changeb files: b
58 68 | |
59 o | 3 moved1 files: d1/b
60 | |
61 | o 2 moved1 files:
69 o | 2 moved1 files: d1/b d1/c
62 70 | |
63 o | 1 createtrunk files:
64 /
65 o 0 init projA files: b
71 | o 1 moved1 files: b c
72 |
73 o 0 createtrunk files:
66 74
67 default 7:
68 d1 6:
75 default 6:
76 d1 5:
General Comments 0
You need to be logged in to leave comments. Login now