##// END OF EJS Templates
convert: follow svn module parent moves
Patrick Mezard -
r5958:59dce249 default
parent child Browse files
Show More
@@ -1,1038 +1,1041 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream:
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
130 130 strict_node_history=False):
131 131 args = [url, paths, start, end, limit, discover_changed_paths,
132 132 strict_node_history]
133 133 arg = encodeargs(args)
134 134 hgexe = util.hgexecutable()
135 135 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
136 136 stdin, stdout = os.popen2(cmd, 'b')
137 137 stdin.write(arg)
138 138 stdin.close()
139 139 return logstream(stdout)
140 140
141 141 # SVN conversion code stolen from bzr-svn and tailor
142 142 #
143 143 # Subversion looks like a versioned filesystem, branches structures
144 144 # are defined by conventions and not enforced by the tool. First,
145 145 # we define the potential branches (modules) as "trunk" and "branches"
146 146 # children directories. Revisions are then identified by their
147 147 # module and revision number (and a repository identifier).
148 148 #
149 149 # The revision graph is really a tree (or a forest). By default, a
150 150 # revision parent is the previous revision in the same module. If the
151 151 # module directory is copied/moved from another module then the
152 152 # revision is the module root and its parent the source revision in
153 153 # the parent module. A revision has at most one parent.
154 154 #
155 155 class svn_source(converter_source):
156 156 def __init__(self, ui, url, rev=None):
157 157 super(svn_source, self).__init__(ui, url, rev=rev)
158 158
159 159 try:
160 160 SubversionException
161 161 except NameError:
162 162 raise NoRepo('Subversion python bindings could not be loaded')
163 163
164 164 self.encoding = locale.getpreferredencoding()
165 165 self.lastrevs = {}
166 166
167 167 latest = None
168 168 try:
169 169 # Support file://path@rev syntax. Useful e.g. to convert
170 170 # deleted branches.
171 171 at = url.rfind('@')
172 172 if at >= 0:
173 173 latest = int(url[at+1:])
174 174 url = url[:at]
175 175 except ValueError, e:
176 176 pass
177 177 self.url = geturl(url)
178 178 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
179 179 try:
180 180 self.transport = transport.SvnRaTransport(url=self.url)
181 181 self.ra = self.transport.ra
182 182 self.ctx = self.transport.client
183 183 self.base = svn.ra.get_repos_root(self.ra)
184 184 self.module = self.url[len(self.base):]
185 185 self.rootmodule = self.module
186 186 self.commits = {}
187 187 self.paths = {}
188 188 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
189 189 except SubversionException, e:
190 190 ui.print_exc()
191 191 raise NoRepo("%s does not look like a Subversion repo" % self.url)
192 192
193 193 if rev:
194 194 try:
195 195 latest = int(rev)
196 196 except ValueError:
197 197 raise util.Abort('svn: revision %s is not an integer' % rev)
198 198
199 199 try:
200 200 self.get_blacklist()
201 201 except IOError, e:
202 202 pass
203 203
204 204 self.head = self.latest(self.module, latest)
205 205 if not self.head:
206 206 raise util.Abort(_('no revision found in module %s') %
207 207 self.module.encode(self.encoding))
208 208 self.last_changed = self.revnum(self.head)
209 209
210 210 self._changescache = None
211 211
212 212 if os.path.exists(os.path.join(url, '.svn/entries')):
213 213 self.wc = url
214 214 else:
215 215 self.wc = None
216 216 self.convertfp = None
217 217
218 218 def setrevmap(self, revmap):
219 219 lastrevs = {}
220 220 for revid in revmap.iterkeys():
221 221 uuid, module, revnum = self.revsplit(revid)
222 222 lastrevnum = lastrevs.setdefault(module, revnum)
223 223 if revnum > lastrevnum:
224 224 lastrevs[module] = revnum
225 225 self.lastrevs = lastrevs
226 226
227 227 def exists(self, path, optrev):
228 228 try:
229 229 svn.client.ls(self.url.rstrip('/') + '/' + path,
230 230 optrev, False, self.ctx)
231 231 return True
232 232 except SubversionException, err:
233 233 return False
234 234
235 235 def getheads(self):
236 236
237 237 def getcfgpath(name, rev):
238 238 cfgpath = self.ui.config('convert', 'svn.' + name)
239 239 path = (cfgpath or name).strip('/')
240 240 if not self.exists(path, rev):
241 241 if cfgpath:
242 242 raise util.Abort(_('expected %s to be at %r, but not found')
243 243 % (name, path))
244 244 return None
245 245 self.ui.note(_('found %s at %r\n') % (name, path))
246 246 return path
247 247
248 248 rev = optrev(self.last_changed)
249 249 oldmodule = ''
250 250 trunk = getcfgpath('trunk', rev)
251 251 tags = getcfgpath('tags', rev)
252 252 branches = getcfgpath('branches', rev)
253 253
254 254 # If the project has a trunk or branches, we will extract heads
255 255 # from them. We keep the project root otherwise.
256 256 if trunk:
257 257 oldmodule = self.module or ''
258 258 self.module += '/' + trunk
259 259 self.head = self.latest(self.module, self.last_changed)
260 260 if not self.head:
261 261 raise util.Abort(_('no revision found in module %s') %
262 262 self.module.encode(self.encoding))
263 263
264 264 # First head in the list is the module's head
265 265 self.heads = [self.head]
266 266 self.tags = '%s/%s' % (oldmodule , (tags or 'tags'))
267 267
268 268 # Check if branches bring a few more heads to the list
269 269 if branches:
270 270 rpath = self.url.strip('/')
271 271 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
272 272 self.ctx)
273 273 for branch in branchnames.keys():
274 274 module = '%s/%s/%s' % (oldmodule, branches, branch)
275 275 brevid = self.latest(module, self.last_changed)
276 276 if not brevid:
277 277 self.ui.note(_('ignoring empty branch %s\n') %
278 278 branch.encode(self.encoding))
279 279 continue
280 280 self.ui.note('found branch %s at %d\n' %
281 281 (branch, self.revnum(brevid)))
282 282 self.heads.append(brevid)
283 283
284 284 return self.heads
285 285
286 286 def getfile(self, file, rev):
287 287 data, mode = self._getfile(file, rev)
288 288 self.modecache[(file, rev)] = mode
289 289 return data
290 290
291 291 def getmode(self, file, rev):
292 292 return self.modecache[(file, rev)]
293 293
294 294 def getchanges(self, rev):
295 295 if self._changescache and self._changescache[0] == rev:
296 296 return self._changescache[1]
297 297 self._changescache = None
298 298 self.modecache = {}
299 299 (paths, parents) = self.paths[rev]
300 300 if parents:
301 301 files, copies = self.expandpaths(rev, paths, parents)
302 302 else:
303 303 # Perform a full checkout on roots
304 304 uuid, module, revnum = self.revsplit(rev)
305 305 entries = svn.client.ls(self.base + module, optrev(revnum),
306 306 True, self.ctx)
307 307 files = [n for n,e in entries.iteritems()
308 308 if e.kind == svn.core.svn_node_file]
309 309 copies = {}
310 310
311 311 files.sort()
312 312 files = zip(files, [rev] * len(files))
313 313
314 314 # caller caches the result, so free it here to release memory
315 315 del self.paths[rev]
316 316 return (files, copies)
317 317
318 318 def getchangedfiles(self, rev, i):
319 319 changes = self.getchanges(rev)
320 320 self._changescache = (rev, changes)
321 321 return [f[0] for f in changes[0]]
322 322
323 323 def getcommit(self, rev):
324 324 if rev not in self.commits:
325 325 uuid, module, revnum = self.revsplit(rev)
326 326 self.module = module
327 327 self.reparent(module)
328 328 # We assume that:
329 329 # - requests for revisions after "stop" come from the
330 330 # revision graph backward traversal. Cache all of them
331 331 # down to stop, they will be used eventually.
332 332 # - requests for revisions before "stop" come to get
333 333 # isolated branches parents. Just fetch what is needed.
334 334 stop = self.lastrevs.get(module, 0)
335 335 if revnum < stop:
336 336 stop = revnum + 1
337 337 self._fetch_revisions(revnum, stop)
338 338 commit = self.commits[rev]
339 339 # caller caches the result, so free it here to release memory
340 340 del self.commits[rev]
341 341 return commit
342 342
343 343 def gettags(self):
344 344 tags = {}
345 345 start = self.revnum(self.head)
346 346 try:
347 347 for entry in get_log(self.url, [self.tags], 0, start):
348 348 orig_paths, revnum, author, date, message = entry
349 349 for path in orig_paths:
350 350 if not path.startswith(self.tags+'/'):
351 351 continue
352 352 ent = orig_paths[path]
353 353 source = ent.copyfrom_path
354 354 rev = ent.copyfrom_rev
355 355 tag = path.split('/')[-1]
356 356 tags[tag] = self.revid(rev, module=source)
357 357 except SubversionException, (inst, num):
358 358 self.ui.note('no tags found at revision %d\n' % start)
359 359 return tags
360 360
361 361 def converted(self, rev, destrev):
362 362 if not self.wc:
363 363 return
364 364 if self.convertfp is None:
365 365 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
366 366 'a')
367 367 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
368 368 self.convertfp.flush()
369 369
370 370 # -- helper functions --
371 371
372 372 def revid(self, revnum, module=None):
373 373 if not module:
374 374 module = self.module
375 375 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
376 376 revnum)
377 377
378 378 def revnum(self, rev):
379 379 return int(rev.split('@')[-1])
380 380
381 381 def revsplit(self, rev):
382 382 url, revnum = rev.encode(self.encoding).split('@', 1)
383 383 revnum = int(revnum)
384 384 parts = url.split('/', 1)
385 385 uuid = parts.pop(0)[4:]
386 386 mod = ''
387 387 if parts:
388 388 mod = '/' + parts[0]
389 389 return uuid, mod, revnum
390 390
391 391 def latest(self, path, stop=0):
392 392 """Find the latest revid affecting path, up to stop. It may return
393 393 a revision in a different module, since a branch may be moved without
394 394 a change being reported. Return None if computed module does not
395 395 belong to rootmodule subtree.
396 396 """
397 397 if not stop:
398 398 stop = svn.ra.get_latest_revnum(self.ra)
399 399 try:
400 400 self.reparent('')
401 401 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
402 402 self.reparent(self.module)
403 403 except SubversionException:
404 404 dirent = None
405 405 if not dirent:
406 406 raise util.Abort('%s not found up to revision %d' % (path, stop))
407 407
408 408 # stat() gives us the previous revision on this line of development, but
409 409 # it might be in *another module*. Fetch the log and detect renames down
410 410 # to the latest revision.
411 411 stream = get_log(self.url, [path], stop, dirent.created_rev)
412 412 try:
413 413 for entry in stream:
414 414 paths, revnum, author, date, message = entry
415 415 if revnum <= dirent.created_rev:
416 416 break
417 417
418 418 for p in paths:
419 419 if not path.startswith(p) or not paths[p].copyfrom_path:
420 420 continue
421 421 newpath = paths[p].copyfrom_path + path[len(p):]
422 422 self.ui.debug("branch renamed from %s to %s at %d\n" %
423 423 (path, newpath, revnum))
424 424 path = newpath
425 425 break
426 426 finally:
427 427 stream.close()
428 428
429 429 if not path.startswith(self.rootmodule):
430 430 self.ui.debug(_('ignoring foreign branch %r\n') % path)
431 431 return None
432 432 return self.revid(dirent.created_rev, path)
433 433
434 434 def get_blacklist(self):
435 435 """Avoid certain revision numbers.
436 436 It is not uncommon for two nearby revisions to cancel each other
437 437 out, e.g. 'I copied trunk into a subdirectory of itself instead
438 438 of making a branch'. The converted repository is significantly
439 439 smaller if we ignore such revisions."""
440 440 self.blacklist = util.set()
441 441 blacklist = self.blacklist
442 442 for line in file("blacklist.txt", "r"):
443 443 if not line.startswith("#"):
444 444 try:
445 445 svn_rev = int(line.strip())
446 446 blacklist.add(svn_rev)
447 447 except ValueError, e:
448 448 pass # not an integer or a comment
449 449
450 450 def is_blacklisted(self, svn_rev):
451 451 return svn_rev in self.blacklist
452 452
453 453 def reparent(self, module):
454 454 svn_url = self.base + module
455 455 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
456 456 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
457 457
458 458 def expandpaths(self, rev, paths, parents):
459 459 def get_entry_from_path(path, module=self.module):
460 460 # Given the repository url of this wc, say
461 461 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
462 462 # extract the "entry" portion (a relative path) from what
463 463 # svn log --xml says, ie
464 464 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
465 465 # that is to say "tests/PloneTestCase.py"
466 466 if path.startswith(module):
467 467 relative = path[len(module):]
468 468 if relative.startswith('/'):
469 469 return relative[1:]
470 470 else:
471 471 return relative
472 472
473 473 # The path is outside our tracked tree...
474 474 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
475 475 return None
476 476
477 477 entries = []
478 478 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
479 479 copies = {}
480 480
481 481 new_module, revnum = self.revsplit(rev)[1:]
482 482 if new_module != self.module:
483 483 self.module = new_module
484 484 self.reparent(self.module)
485 485
486 486 for path, ent in paths:
487 487 entrypath = get_entry_from_path(path, module=self.module)
488 488 entry = entrypath.decode(self.encoding)
489 489
490 490 kind = svn.ra.check_path(self.ra, entrypath, revnum)
491 491 if kind == svn.core.svn_node_file:
492 492 if ent.copyfrom_path:
493 493 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
494 494 if copyfrom_path:
495 495 self.ui.debug("Copied to %s from %s@%s\n" %
496 496 (entrypath, copyfrom_path,
497 497 ent.copyfrom_rev))
498 498 # It's probably important for hg that the source
499 499 # exists in the revision's parent, not just the
500 500 # ent.copyfrom_rev
501 501 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
502 502 if fromkind != 0:
503 503 copies[self.recode(entry)] = self.recode(copyfrom_path)
504 504 entries.append(self.recode(entry))
505 505 elif kind == 0: # gone, but had better be a deleted *file*
506 506 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
507 507
508 508 # if a branch is created but entries are removed in the same
509 509 # changeset, get the right fromrev
510 510 # parents cannot be empty here, you cannot remove things from
511 511 # a root revision.
512 512 uuid, old_module, fromrev = self.revsplit(parents[0])
513 513
514 514 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
515 515 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
516 516
517 517 def lookup_parts(p):
518 518 rc = None
519 519 parts = p.split("/")
520 520 for i in range(len(parts)):
521 521 part = "/".join(parts[:i])
522 522 info = part, copyfrom.get(part, None)
523 523 if info[1] is not None:
524 524 self.ui.debug("Found parent directory %s\n" % info[1])
525 525 rc = info
526 526 return rc
527 527
528 528 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
529 529
530 530 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
531 531
532 532 # need to remove fragment from lookup_parts and replace with copyfrom_path
533 533 if frompath is not None:
534 534 self.ui.debug("munge-o-matic\n")
535 535 self.ui.debug(entrypath + '\n')
536 536 self.ui.debug(entrypath[len(frompath):] + '\n')
537 537 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
538 538 fromrev = froment.copyfrom_rev
539 539 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
540 540
541 541 # We can avoid the reparent calls if the module has not changed
542 542 # but it probably does not worth the pain.
543 543 self.reparent('')
544 544 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
545 545 self.reparent(self.module)
546 546
547 547 if fromkind == svn.core.svn_node_file: # a deleted file
548 548 entries.append(self.recode(entry))
549 549 elif fromkind == svn.core.svn_node_dir:
550 550 # print "Deleted/moved non-file:", revnum, path, ent
551 551 # children = self._find_children(path, revnum - 1)
552 552 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
553 553 # Sometimes this is tricky. For example: in
554 554 # The Subversion Repository revision 6940 a dir
555 555 # was copied and one of its files was deleted
556 556 # from the new location in the same commit. This
557 557 # code can't deal with that yet.
558 558 if ent.action == 'C':
559 559 children = self._find_children(path, fromrev)
560 560 else:
561 561 oroot = entrypath.strip('/')
562 562 nroot = path.strip('/')
563 563 children = self._find_children(oroot, fromrev)
564 564 children = [s.replace(oroot,nroot) for s in children]
565 565 # Mark all [files, not directories] as deleted.
566 566 for child in children:
567 567 # Can we move a child directory and its
568 568 # parent in the same commit? (probably can). Could
569 569 # cause problems if instead of revnum -1,
570 570 # we have to look in (copyfrom_path, revnum - 1)
571 571 entrypath = get_entry_from_path("/" + child, module=old_module)
572 572 if entrypath:
573 573 entry = self.recode(entrypath.decode(self.encoding))
574 574 if entry in copies:
575 575 # deleted file within a copy
576 576 del copies[entry]
577 577 else:
578 578 entries.append(entry)
579 579 else:
580 580 self.ui.debug('unknown path in revision %d: %s\n' % \
581 581 (revnum, path))
582 582 elif kind == svn.core.svn_node_dir:
583 583 # Should probably synthesize normal file entries
584 584 # and handle as above to clean up copy/rename handling.
585 585
586 586 # If the directory just had a prop change,
587 587 # then we shouldn't need to look for its children.
588 588 if ent.action == 'M':
589 589 continue
590 590
591 591 # Also this could create duplicate entries. Not sure
592 592 # whether this will matter. Maybe should make entries a set.
593 593 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
594 594 # This will fail if a directory was copied
595 595 # from another branch and then some of its files
596 596 # were deleted in the same transaction.
597 597 children = self._find_children(path, revnum)
598 598 children.sort()
599 599 for child in children:
600 600 # Can we move a child directory and its
601 601 # parent in the same commit? (probably can). Could
602 602 # cause problems if instead of revnum -1,
603 603 # we have to look in (copyfrom_path, revnum - 1)
604 604 entrypath = get_entry_from_path("/" + child, module=self.module)
605 605 # print child, self.module, entrypath
606 606 if entrypath:
607 607 # Need to filter out directories here...
608 608 kind = svn.ra.check_path(self.ra, entrypath, revnum)
609 609 if kind != svn.core.svn_node_dir:
610 610 entries.append(self.recode(entrypath))
611 611
612 612 # Copies here (must copy all from source)
613 613 # Probably not a real problem for us if
614 614 # source does not exist
615 615
616 616 # Can do this with the copy command "hg copy"
617 617 # if ent.copyfrom_path:
618 618 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
619 619 # module=self.module)
620 620 # copyto_entry = entrypath
621 621 #
622 622 # print "copy directory", copyfrom_entry, 'to', copyto_entry
623 623 #
624 624 # copies.append((copyfrom_entry, copyto_entry))
625 625
626 626 if ent.copyfrom_path:
627 627 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
628 628 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
629 629 if copyfrom_entry:
630 630 copyfrom[path] = ent
631 631 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
632 632
633 633 # Good, /probably/ a regular copy. Really should check
634 634 # to see whether the parent revision actually contains
635 635 # the directory in question.
636 636 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
637 637 children.sort()
638 638 for child in children:
639 639 entrypath = get_entry_from_path("/" + child, module=self.module)
640 640 if entrypath:
641 641 entry = entrypath.decode(self.encoding)
642 642 # print "COPY COPY From", copyfrom_entry, entry
643 643 copyto_path = path + entry[len(copyfrom_entry):]
644 644 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
645 645 # print "COPY", entry, "COPY To", copyto_entry
646 646 copies[self.recode(copyto_entry)] = self.recode(entry)
647 647 # copy from quux splort/quuxfile
648 648
649 649 return (util.unique(entries), copies)
650 650
651 651 def _fetch_revisions(self, from_revnum, to_revnum):
652 652 if from_revnum < to_revnum:
653 653 from_revnum, to_revnum = to_revnum, from_revnum
654 654
655 655 self.child_cset = None
656 656 def parselogentry(orig_paths, revnum, author, date, message):
657 657 """Return the parsed commit object or None, and True if
658 658 the revision is a branch root.
659 659 """
660 660 self.ui.debug("parsing revision %d (%d changes)\n" %
661 661 (revnum, len(orig_paths)))
662 662
663 663 branched = False
664 664 rev = self.revid(revnum)
665 665 # branch log might return entries for a parent we already have
666 666
667 667 if (rev in self.commits or revnum < to_revnum):
668 668 return None, branched
669 669
670 670 parents = []
671 # check whether this revision is the start of a branch
672 if self.module in orig_paths:
673 ent = orig_paths[self.module]
671 # check whether this revision is the start of a branch or part
672 # of a branch renaming
673 orig_paths = orig_paths.items()
674 orig_paths.sort()
675 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
676 if root_paths:
677 path, ent = root_paths[-1]
674 678 if ent.copyfrom_path:
675 679 branched = True
680 newpath = ent.copyfrom_path + self.module[len(path):]
676 681 # ent.copyfrom_rev may not be the actual last revision
677 previd = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
682 previd = self.latest(newpath, ent.copyfrom_rev)
678 683 if previd is not None:
679 684 parents = [previd]
680 685 prevmodule, prevnum = self.revsplit(previd)[1:]
681 686 self.ui.note('found parent of branch %s at %d: %s\n' %
682 687 (self.module, prevnum, prevmodule))
683 688 else:
684 689 self.ui.debug("No copyfrom path, don't know what to do.\n")
685 690
686 orig_paths = orig_paths.items()
687 orig_paths.sort()
688 691 paths = []
689 692 # filter out unrelated paths
690 693 for path, ent in orig_paths:
691 694 if not path.startswith(self.module):
692 695 self.ui.debug("boring@%s: %s\n" % (revnum, path))
693 696 continue
694 697 paths.append((path, ent))
695 698
696 699 # Example SVN datetime. Includes microseconds.
697 700 # ISO-8601 conformant
698 701 # '2007-01-04T17:35:00.902377Z'
699 702 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
700 703
701 704 log = message and self.recode(message) or ''
702 705 author = author and self.recode(author) or ''
703 706 try:
704 707 branch = self.module.split("/")[-1]
705 708 if branch == 'trunk':
706 709 branch = ''
707 710 except IndexError:
708 711 branch = None
709 712
710 713 cset = commit(author=author,
711 714 date=util.datestr(date),
712 715 desc=log,
713 716 parents=parents,
714 717 branch=branch,
715 718 rev=rev.encode('utf-8'))
716 719
717 720 self.commits[rev] = cset
718 721 # The parents list is *shared* among self.paths and the
719 722 # commit object. Both will be updated below.
720 723 self.paths[rev] = (paths, cset.parents)
721 724 if self.child_cset and not self.child_cset.parents:
722 725 self.child_cset.parents[:] = [rev]
723 726 self.child_cset = cset
724 727 return cset, branched
725 728
726 729 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
727 730 (self.module, from_revnum, to_revnum))
728 731
729 732 try:
730 733 firstcset = None
731 734 branched = False
732 735 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
733 736 try:
734 737 for entry in stream:
735 738 paths, revnum, author, date, message = entry
736 739 if self.is_blacklisted(revnum):
737 740 self.ui.note('skipping blacklisted revision %d\n'
738 741 % revnum)
739 742 continue
740 743 if paths is None:
741 744 self.ui.debug('revision %d has no entries\n' % revnum)
742 745 continue
743 746 cset, branched = parselogentry(paths, revnum, author,
744 747 date, message)
745 748 if cset:
746 749 firstcset = cset
747 750 if branched:
748 751 break
749 752 finally:
750 753 stream.close()
751 754
752 755 if not branched and firstcset and not firstcset.parents:
753 756 # The first revision of the sequence (the last fetched one)
754 757 # has invalid parents if not a branch root. Find the parent
755 758 # revision now, if any.
756 759 try:
757 760 firstrevnum = self.revnum(firstcset.rev)
758 761 if firstrevnum > 1:
759 762 latest = self.latest(self.module, firstrevnum - 1)
760 763 if latest:
761 764 firstcset.parents.append(latest)
762 765 except util.Abort:
763 766 pass
764 767 except SubversionException, (inst, num):
765 768 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
766 769 raise NoSuchRevision(branch=self,
767 770 revision="Revision number %d" % to_revnum)
768 771 raise
769 772
770 773 def _getfile(self, file, rev):
771 774 io = StringIO()
772 775 # TODO: ra.get_file transmits the whole file instead of diffs.
773 776 mode = ''
774 777 try:
775 778 new_module, revnum = self.revsplit(rev)[1:]
776 779 if self.module != new_module:
777 780 self.module = new_module
778 781 self.reparent(self.module)
779 782 info = svn.ra.get_file(self.ra, file, revnum, io)
780 783 if isinstance(info, list):
781 784 info = info[-1]
782 785 mode = ("svn:executable" in info) and 'x' or ''
783 786 mode = ("svn:special" in info) and 'l' or mode
784 787 except SubversionException, e:
785 788 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
786 789 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
787 790 if e.apr_err in notfound: # File not found
788 791 raise IOError()
789 792 raise
790 793 data = io.getvalue()
791 794 if mode == 'l':
792 795 link_prefix = "link "
793 796 if data.startswith(link_prefix):
794 797 data = data[len(link_prefix):]
795 798 return data, mode
796 799
797 800 def _find_children(self, path, revnum):
798 801 path = path.strip('/')
799 802 pool = Pool()
800 803 rpath = '/'.join([self.base, path]).strip('/')
801 804 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
802 805
803 806 pre_revprop_change = '''#!/bin/sh
804 807
805 808 REPOS="$1"
806 809 REV="$2"
807 810 USER="$3"
808 811 PROPNAME="$4"
809 812 ACTION="$5"
810 813
811 814 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
812 815 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
813 816 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
814 817
815 818 echo "Changing prohibited revision property" >&2
816 819 exit 1
817 820 '''
818 821
819 822 class svn_sink(converter_sink, commandline):
820 823 commit_re = re.compile(r'Committed revision (\d+).', re.M)
821 824
822 825 def prerun(self):
823 826 if self.wc:
824 827 os.chdir(self.wc)
825 828
826 829 def postrun(self):
827 830 if self.wc:
828 831 os.chdir(self.cwd)
829 832
830 833 def join(self, name):
831 834 return os.path.join(self.wc, '.svn', name)
832 835
833 836 def revmapfile(self):
834 837 return self.join('hg-shamap')
835 838
836 839 def authorfile(self):
837 840 return self.join('hg-authormap')
838 841
839 842 def __init__(self, ui, path):
840 843 converter_sink.__init__(self, ui, path)
841 844 commandline.__init__(self, ui, 'svn')
842 845 self.delete = []
843 846 self.setexec = []
844 847 self.delexec = []
845 848 self.copies = []
846 849 self.wc = None
847 850 self.cwd = os.getcwd()
848 851
849 852 path = os.path.realpath(path)
850 853
851 854 created = False
852 855 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
853 856 self.wc = path
854 857 self.run0('update')
855 858 else:
856 859 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
857 860
858 861 if os.path.isdir(os.path.dirname(path)):
859 862 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
860 863 ui.status(_('initializing svn repo %r\n') %
861 864 os.path.basename(path))
862 865 commandline(ui, 'svnadmin').run0('create', path)
863 866 created = path
864 867 path = util.normpath(path)
865 868 if not path.startswith('/'):
866 869 path = '/' + path
867 870 path = 'file://' + path
868 871
869 872 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
870 873 self.run0('checkout', path, wcpath)
871 874
872 875 self.wc = wcpath
873 876 self.opener = util.opener(self.wc)
874 877 self.wopener = util.opener(self.wc)
875 878 self.childmap = mapfile(ui, self.join('hg-childmap'))
876 879 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
877 880
878 881 if created:
879 882 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
880 883 fp = open(hook, 'w')
881 884 fp.write(pre_revprop_change)
882 885 fp.close()
883 886 util.set_flags(hook, "x")
884 887
885 888 xport = transport.SvnRaTransport(url=geturl(path))
886 889 self.uuid = svn.ra.get_uuid(xport.ra)
887 890
888 891 def wjoin(self, *names):
889 892 return os.path.join(self.wc, *names)
890 893
891 894 def putfile(self, filename, flags, data):
892 895 if 'l' in flags:
893 896 self.wopener.symlink(data, filename)
894 897 else:
895 898 try:
896 899 if os.path.islink(self.wjoin(filename)):
897 900 os.unlink(filename)
898 901 except OSError:
899 902 pass
900 903 self.wopener(filename, 'w').write(data)
901 904
902 905 if self.is_exec:
903 906 was_exec = self.is_exec(self.wjoin(filename))
904 907 else:
905 908 # On filesystems not supporting execute-bit, there is no way
906 909 # to know if it is set but asking subversion. Setting it
907 910 # systematically is just as expensive and much simpler.
908 911 was_exec = 'x' not in flags
909 912
910 913 util.set_flags(self.wjoin(filename), flags)
911 914 if was_exec:
912 915 if 'x' not in flags:
913 916 self.delexec.append(filename)
914 917 else:
915 918 if 'x' in flags:
916 919 self.setexec.append(filename)
917 920
918 921 def delfile(self, name):
919 922 self.delete.append(name)
920 923
921 924 def copyfile(self, source, dest):
922 925 self.copies.append([source, dest])
923 926
924 927 def _copyfile(self, source, dest):
925 928 # SVN's copy command pukes if the destination file exists, but
926 929 # our copyfile method expects to record a copy that has
927 930 # already occurred. Cross the semantic gap.
928 931 wdest = self.wjoin(dest)
929 932 exists = os.path.exists(wdest)
930 933 if exists:
931 934 fd, tempname = tempfile.mkstemp(
932 935 prefix='hg-copy-', dir=os.path.dirname(wdest))
933 936 os.close(fd)
934 937 os.unlink(tempname)
935 938 os.rename(wdest, tempname)
936 939 try:
937 940 self.run0('copy', source, dest)
938 941 finally:
939 942 if exists:
940 943 try:
941 944 os.unlink(wdest)
942 945 except OSError:
943 946 pass
944 947 os.rename(tempname, wdest)
945 948
946 949 def dirs_of(self, files):
947 950 dirs = set()
948 951 for f in files:
949 952 if os.path.isdir(self.wjoin(f)):
950 953 dirs.add(f)
951 954 for i in strutil.rfindall(f, '/'):
952 955 dirs.add(f[:i])
953 956 return dirs
954 957
955 958 def add_dirs(self, files):
956 959 add_dirs = [d for d in self.dirs_of(files)
957 960 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
958 961 if add_dirs:
959 962 add_dirs.sort()
960 963 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
961 964 return add_dirs
962 965
963 966 def add_files(self, files):
964 967 if files:
965 968 self.xargs(files, 'add', quiet=True)
966 969 return files
967 970
968 971 def tidy_dirs(self, names):
969 972 dirs = list(self.dirs_of(names))
970 973 dirs.sort(reverse=True)
971 974 deleted = []
972 975 for d in dirs:
973 976 wd = self.wjoin(d)
974 977 if os.listdir(wd) == '.svn':
975 978 self.run0('delete', d)
976 979 deleted.append(d)
977 980 return deleted
978 981
979 982 def addchild(self, parent, child):
980 983 self.childmap[parent] = child
981 984
982 985 def revid(self, rev):
983 986 return u"svn:%s@%s" % (self.uuid, rev)
984 987
985 988 def putcommit(self, files, parents, commit):
986 989 for parent in parents:
987 990 try:
988 991 return self.revid(self.childmap[parent])
989 992 except KeyError:
990 993 pass
991 994 entries = set(self.delete)
992 995 files = util.frozenset(files)
993 996 entries.update(self.add_dirs(files.difference(entries)))
994 997 if self.copies:
995 998 for s, d in self.copies:
996 999 self._copyfile(s, d)
997 1000 self.copies = []
998 1001 if self.delete:
999 1002 self.xargs(self.delete, 'delete')
1000 1003 self.delete = []
1001 1004 entries.update(self.add_files(files.difference(entries)))
1002 1005 entries.update(self.tidy_dirs(entries))
1003 1006 if self.delexec:
1004 1007 self.xargs(self.delexec, 'propdel', 'svn:executable')
1005 1008 self.delexec = []
1006 1009 if self.setexec:
1007 1010 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1008 1011 self.setexec = []
1009 1012
1010 1013 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1011 1014 fp = os.fdopen(fd, 'w')
1012 1015 fp.write(commit.desc)
1013 1016 fp.close()
1014 1017 try:
1015 1018 output = self.run0('commit',
1016 1019 username=util.shortuser(commit.author),
1017 1020 file=messagefile,
1018 1021 encoding='utf-8')
1019 1022 try:
1020 1023 rev = self.commit_re.search(output).group(1)
1021 1024 except AttributeError:
1022 1025 self.ui.warn(_('unexpected svn output:\n'))
1023 1026 self.ui.warn(output)
1024 1027 raise util.Abort(_('unable to cope with svn output'))
1025 1028 if commit.rev:
1026 1029 self.run('propset', 'hg:convert-rev', commit.rev,
1027 1030 revprop=True, revision=rev)
1028 1031 if commit.branch and commit.branch != 'default':
1029 1032 self.run('propset', 'hg:convert-branch', commit.branch,
1030 1033 revprop=True, revision=rev)
1031 1034 for parent in parents:
1032 1035 self.addchild(parent, rev)
1033 1036 return self.revid(rev)
1034 1037 finally:
1035 1038 os.unlink(messagefile)
1036 1039
1037 1040 def puttags(self, tags):
1038 1041 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now