##// END OF EJS Templates
convert: cleanup svn file copy handling
Patrick Mezard -
r6544:3447c088 default
parent child Browse files
Show More
@@ -1,1118 +1,1120 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98 # With large history, cleanup process goes crazy and suddenly
99 99 # consumes *huge* amount of memory. The output file being closed,
100 100 # there is no need for clean termination.
101 101 os._exit(0)
102 102
103 103 def debugsvnlog(ui, **opts):
104 104 """Fetch SVN log in a subprocess and channel them back to parent to
105 105 avoid memory collection issues.
106 106 """
107 107 util.set_binary(sys.stdin)
108 108 util.set_binary(sys.stdout)
109 109 args = decodeargs(sys.stdin.read())
110 110 get_log_child(sys.stdout, *args)
111 111
112 112 class logstream:
113 113 """Interruptible revision log iterator."""
114 114 def __init__(self, stdout):
115 115 self._stdout = stdout
116 116
117 117 def __iter__(self):
118 118 while True:
119 119 entry = pickle.load(self._stdout)
120 120 try:
121 121 orig_paths, revnum, author, date, message = entry
122 122 except:
123 123 if entry is None:
124 124 break
125 125 raise SubversionException("child raised exception", entry)
126 126 yield entry
127 127
128 128 def close(self):
129 129 if self._stdout:
130 130 self._stdout.close()
131 131 self._stdout = None
132 132
133 133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
134 134 strict_node_history=False):
135 135 args = [url, paths, start, end, limit, discover_changed_paths,
136 136 strict_node_history]
137 137 arg = encodeargs(args)
138 138 hgexe = util.hgexecutable()
139 139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
140 140 stdin, stdout = os.popen2(cmd, 'b')
141 141 stdin.write(arg)
142 142 stdin.close()
143 143 return logstream(stdout)
144 144
145 145 # SVN conversion code stolen from bzr-svn and tailor
146 146 #
147 147 # Subversion looks like a versioned filesystem, branches structures
148 148 # are defined by conventions and not enforced by the tool. First,
149 149 # we define the potential branches (modules) as "trunk" and "branches"
150 150 # children directories. Revisions are then identified by their
151 151 # module and revision number (and a repository identifier).
152 152 #
153 153 # The revision graph is really a tree (or a forest). By default, a
154 154 # revision parent is the previous revision in the same module. If the
155 155 # module directory is copied/moved from another module then the
156 156 # revision is the module root and its parent the source revision in
157 157 # the parent module. A revision has at most one parent.
158 158 #
159 159 class svn_source(converter_source):
160 160 def __init__(self, ui, url, rev=None):
161 161 super(svn_source, self).__init__(ui, url, rev=rev)
162 162
163 163 try:
164 164 SubversionException
165 165 except NameError:
166 166 raise NoRepo('Subversion python bindings could not be loaded')
167 167
168 168 self.encoding = locale.getpreferredencoding()
169 169 self.lastrevs = {}
170 170
171 171 latest = None
172 172 try:
173 173 # Support file://path@rev syntax. Useful e.g. to convert
174 174 # deleted branches.
175 175 at = url.rfind('@')
176 176 if at >= 0:
177 177 latest = int(url[at+1:])
178 178 url = url[:at]
179 179 except ValueError, e:
180 180 pass
181 181 self.url = geturl(url)
182 182 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
183 183 try:
184 184 self.transport = transport.SvnRaTransport(url=self.url)
185 185 self.ra = self.transport.ra
186 186 self.ctx = self.transport.client
187 187 self.base = svn.ra.get_repos_root(self.ra)
188 188 # Module is either empty or a repository path starting with
189 189 # a slash and not ending with a slash.
190 190 self.module = self.url[len(self.base):]
191 191 self.rootmodule = self.module
192 192 self.commits = {}
193 193 self.paths = {}
194 194 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
195 195 except SubversionException, e:
196 196 ui.print_exc()
197 197 raise NoRepo("%s does not look like a Subversion repo" % self.url)
198 198
199 199 if rev:
200 200 try:
201 201 latest = int(rev)
202 202 except ValueError:
203 203 raise util.Abort('svn: revision %s is not an integer' % rev)
204 204
205 205 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
206 206 try:
207 207 self.startrev = int(self.startrev)
208 208 if self.startrev < 0:
209 209 self.startrev = 0
210 210 except ValueError:
211 211 raise util.Abort(_('svn: start revision %s is not an integer')
212 212 % self.startrev)
213 213
214 214 try:
215 215 self.get_blacklist()
216 216 except IOError, e:
217 217 pass
218 218
219 219 self.head = self.latest(self.module, latest)
220 220 if not self.head:
221 221 raise util.Abort(_('no revision found in module %s') %
222 222 self.module.encode(self.encoding))
223 223 self.last_changed = self.revnum(self.head)
224 224
225 225 self._changescache = None
226 226
227 227 if os.path.exists(os.path.join(url, '.svn/entries')):
228 228 self.wc = url
229 229 else:
230 230 self.wc = None
231 231 self.convertfp = None
232 232
233 233 def setrevmap(self, revmap):
234 234 lastrevs = {}
235 235 for revid in revmap.iterkeys():
236 236 uuid, module, revnum = self.revsplit(revid)
237 237 lastrevnum = lastrevs.setdefault(module, revnum)
238 238 if revnum > lastrevnum:
239 239 lastrevs[module] = revnum
240 240 self.lastrevs = lastrevs
241 241
242 242 def exists(self, path, optrev):
243 243 try:
244 244 svn.client.ls(self.url.rstrip('/') + '/' + path,
245 245 optrev, False, self.ctx)
246 246 return True
247 247 except SubversionException, err:
248 248 return False
249 249
250 250 def getheads(self):
251 251
252 252 def isdir(path, revnum):
253 253 kind = svn.ra.check_path(self.ra, path, revnum)
254 254 return kind == svn.core.svn_node_dir
255 255
256 256 def getcfgpath(name, rev):
257 257 cfgpath = self.ui.config('convert', 'svn.' + name)
258 258 if cfgpath is not None and cfgpath.strip() == '':
259 259 return None
260 260 path = (cfgpath or name).strip('/')
261 261 if not self.exists(path, rev):
262 262 if cfgpath:
263 263 raise util.Abort(_('expected %s to be at %r, but not found')
264 264 % (name, path))
265 265 return None
266 266 self.ui.note(_('found %s at %r\n') % (name, path))
267 267 return path
268 268
269 269 rev = optrev(self.last_changed)
270 270 oldmodule = ''
271 271 trunk = getcfgpath('trunk', rev)
272 272 self.tags = getcfgpath('tags', rev)
273 273 branches = getcfgpath('branches', rev)
274 274
275 275 # If the project has a trunk or branches, we will extract heads
276 276 # from them. We keep the project root otherwise.
277 277 if trunk:
278 278 oldmodule = self.module or ''
279 279 self.module += '/' + trunk
280 280 self.head = self.latest(self.module, self.last_changed)
281 281 if not self.head:
282 282 raise util.Abort(_('no revision found in module %s') %
283 283 self.module.encode(self.encoding))
284 284
285 285 # First head in the list is the module's head
286 286 self.heads = [self.head]
287 287 if self.tags is not None:
288 288 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
289 289
290 290 # Check if branches bring a few more heads to the list
291 291 if branches:
292 292 rpath = self.url.strip('/')
293 293 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
294 294 self.ctx)
295 295 for branch in branchnames.keys():
296 296 module = '%s/%s/%s' % (oldmodule, branches, branch)
297 297 if not isdir(module, self.last_changed):
298 298 continue
299 299 brevid = self.latest(module, self.last_changed)
300 300 if not brevid:
301 301 self.ui.note(_('ignoring empty branch %s\n') %
302 302 branch.encode(self.encoding))
303 303 continue
304 304 self.ui.note('found branch %s at %d\n' %
305 305 (branch, self.revnum(brevid)))
306 306 self.heads.append(brevid)
307 307
308 308 if self.startrev and self.heads:
309 309 if len(self.heads) > 1:
310 310 raise util.Abort(_('svn: start revision is not supported with '
311 311 'with more than one branch'))
312 312 revnum = self.revnum(self.heads[0])
313 313 if revnum < self.startrev:
314 314 raise util.Abort(_('svn: no revision found after start revision %d')
315 315 % self.startrev)
316 316
317 317 return self.heads
318 318
319 319 def getfile(self, file, rev):
320 320 data, mode = self._getfile(file, rev)
321 321 self.modecache[(file, rev)] = mode
322 322 return data
323 323
324 324 def getmode(self, file, rev):
325 325 return self.modecache[(file, rev)]
326 326
327 327 def getchanges(self, rev):
328 328 if self._changescache and self._changescache[0] == rev:
329 329 return self._changescache[1]
330 330 self._changescache = None
331 331 self.modecache = {}
332 332 (paths, parents) = self.paths[rev]
333 333 if parents:
334 334 files, copies = self.expandpaths(rev, paths, parents)
335 335 else:
336 336 # Perform a full checkout on roots
337 337 uuid, module, revnum = self.revsplit(rev)
338 338 entries = svn.client.ls(self.base + module, optrev(revnum),
339 339 True, self.ctx)
340 340 files = [n for n,e in entries.iteritems()
341 341 if e.kind == svn.core.svn_node_file]
342 342 copies = {}
343 343
344 344 files.sort()
345 345 files = zip(files, [rev] * len(files))
346 346
347 347 # caller caches the result, so free it here to release memory
348 348 del self.paths[rev]
349 349 return (files, copies)
350 350
351 351 def getchangedfiles(self, rev, i):
352 352 changes = self.getchanges(rev)
353 353 self._changescache = (rev, changes)
354 354 return [f[0] for f in changes[0]]
355 355
356 356 def getcommit(self, rev):
357 357 if rev not in self.commits:
358 358 uuid, module, revnum = self.revsplit(rev)
359 359 self.module = module
360 360 self.reparent(module)
361 361 # We assume that:
362 362 # - requests for revisions after "stop" come from the
363 363 # revision graph backward traversal. Cache all of them
364 364 # down to stop, they will be used eventually.
365 365 # - requests for revisions before "stop" come to get
366 366 # isolated branches parents. Just fetch what is needed.
367 367 stop = self.lastrevs.get(module, 0)
368 368 if revnum < stop:
369 369 stop = revnum + 1
370 370 self._fetch_revisions(revnum, stop)
371 371 commit = self.commits[rev]
372 372 # caller caches the result, so free it here to release memory
373 373 del self.commits[rev]
374 374 return commit
375 375
376 376 def gettags(self):
377 377 tags = {}
378 378 if self.tags is None:
379 379 return tags
380 380
381 381 # svn tags are just a convention, project branches left in a
382 382 # 'tags' directory. There is no other relationship than
383 383 # ancestry, which is expensive to discover and makes them hard
384 384 # to update incrementally. Worse, past revisions may be
385 385 # referenced by tags far away in the future, requiring a deep
386 386 # history traversal on every calculation. Current code
387 387 # performs a single backward traversal, tracking moves within
388 388 # the tags directory (tag renaming) and recording a new tag
389 389 # everytime a project is copied from outside the tags
390 390 # directory. It also lists deleted tags, this behaviour may
391 391 # change in the future.
392 392 pendings = []
393 393 tagspath = self.tags
394 394 start = svn.ra.get_latest_revnum(self.ra)
395 395 try:
396 396 for entry in get_log(self.url, [self.tags], start, self.startrev):
397 397 origpaths, revnum, author, date, message = entry
398 398 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
399 399 in origpaths.iteritems() if e.copyfrom_path]
400 400 copies.sort()
401 401 # Apply moves/copies from more specific to general
402 402 copies.reverse()
403 403
404 404 srctagspath = tagspath
405 405 if copies and copies[-1][2] == tagspath:
406 406 # Track tags directory moves
407 407 srctagspath = copies.pop()[0]
408 408
409 409 for source, sourcerev, dest in copies:
410 410 if not dest.startswith(tagspath + '/'):
411 411 continue
412 412 for tag in pendings:
413 413 if tag[0].startswith(dest):
414 414 tagpath = source + tag[0][len(dest):]
415 415 tag[:2] = [tagpath, sourcerev]
416 416 break
417 417 else:
418 418 pendings.append([source, sourcerev, dest.split('/')[-1]])
419 419
420 420 # Tell tag renamings from tag creations
421 421 remainings = []
422 422 for source, sourcerev, tagname in pendings:
423 423 if source.startswith(srctagspath):
424 424 remainings.append([source, sourcerev, tagname])
425 425 continue
426 426 # From revision may be fake, get one with changes
427 427 tagid = self.latest(source, sourcerev)
428 428 if tagid:
429 429 tags[tagname] = tagid
430 430 pendings = remainings
431 431 tagspath = srctagspath
432 432
433 433 except SubversionException, (inst, num):
434 434 self.ui.note('no tags found at revision %d\n' % start)
435 435 return tags
436 436
437 437 def converted(self, rev, destrev):
438 438 if not self.wc:
439 439 return
440 440 if self.convertfp is None:
441 441 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
442 442 'a')
443 443 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
444 444 self.convertfp.flush()
445 445
446 446 # -- helper functions --
447 447
448 448 def revid(self, revnum, module=None):
449 449 if not module:
450 450 module = self.module
451 451 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
452 452 revnum)
453 453
454 454 def revnum(self, rev):
455 455 return int(rev.split('@')[-1])
456 456
457 457 def revsplit(self, rev):
458 458 url, revnum = rev.encode(self.encoding).split('@', 1)
459 459 revnum = int(revnum)
460 460 parts = url.split('/', 1)
461 461 uuid = parts.pop(0)[4:]
462 462 mod = ''
463 463 if parts:
464 464 mod = '/' + parts[0]
465 465 return uuid, mod, revnum
466 466
467 467 def latest(self, path, stop=0):
468 468 """Find the latest revid affecting path, up to stop. It may return
469 469 a revision in a different module, since a branch may be moved without
470 470 a change being reported. Return None if computed module does not
471 471 belong to rootmodule subtree.
472 472 """
473 473 if not path.startswith(self.rootmodule):
474 474 # Requests on foreign branches may be forbidden at server level
475 475 self.ui.debug(_('ignoring foreign branch %r\n') % path)
476 476 return None
477 477
478 478 if not stop:
479 479 stop = svn.ra.get_latest_revnum(self.ra)
480 480 try:
481 481 self.reparent('')
482 482 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
483 483 self.reparent(self.module)
484 484 except SubversionException:
485 485 dirent = None
486 486 if not dirent:
487 487 raise util.Abort('%s not found up to revision %d' % (path, stop))
488 488
489 489 # stat() gives us the previous revision on this line of development, but
490 490 # it might be in *another module*. Fetch the log and detect renames down
491 491 # to the latest revision.
492 492 stream = get_log(self.url, [path], stop, dirent.created_rev)
493 493 try:
494 494 for entry in stream:
495 495 paths, revnum, author, date, message = entry
496 496 if revnum <= dirent.created_rev:
497 497 break
498 498
499 499 for p in paths:
500 500 if not path.startswith(p) or not paths[p].copyfrom_path:
501 501 continue
502 502 newpath = paths[p].copyfrom_path + path[len(p):]
503 503 self.ui.debug("branch renamed from %s to %s at %d\n" %
504 504 (path, newpath, revnum))
505 505 path = newpath
506 506 break
507 507 finally:
508 508 stream.close()
509 509
510 510 if not path.startswith(self.rootmodule):
511 511 self.ui.debug(_('ignoring foreign branch %r\n') % path)
512 512 return None
513 513 return self.revid(dirent.created_rev, path)
514 514
515 515 def get_blacklist(self):
516 516 """Avoid certain revision numbers.
517 517 It is not uncommon for two nearby revisions to cancel each other
518 518 out, e.g. 'I copied trunk into a subdirectory of itself instead
519 519 of making a branch'. The converted repository is significantly
520 520 smaller if we ignore such revisions."""
521 521 self.blacklist = util.set()
522 522 blacklist = self.blacklist
523 523 for line in file("blacklist.txt", "r"):
524 524 if not line.startswith("#"):
525 525 try:
526 526 svn_rev = int(line.strip())
527 527 blacklist.add(svn_rev)
528 528 except ValueError, e:
529 529 pass # not an integer or a comment
530 530
531 531 def is_blacklisted(self, svn_rev):
532 532 return svn_rev in self.blacklist
533 533
534 534 def reparent(self, module):
535 535 svn_url = self.base + module
536 536 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
537 537 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
538 538
539 539 def expandpaths(self, rev, paths, parents):
540 540 entries = []
541 541 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
542 542 copies = {}
543 543
544 544 new_module, revnum = self.revsplit(rev)[1:]
545 545 if new_module != self.module:
546 546 self.module = new_module
547 547 self.reparent(self.module)
548 548
549 549 for path, ent in paths:
550 550 entrypath = self.getrelpath(path)
551 551 entry = entrypath.decode(self.encoding)
552 552
553 553 kind = svn.ra.check_path(self.ra, entrypath, revnum)
554 554 if kind == svn.core.svn_node_file:
555 if ent.copyfrom_path:
556 copyfrom_path = self.getrelpath(ent.copyfrom_path)
557 if copyfrom_path:
558 self.ui.debug("Copied to %s from %s@%s\n" %
559 (entrypath, copyfrom_path,
560 ent.copyfrom_rev))
561 # It's probably important for hg that the source
562 # exists in the revision's parent, not just the
563 # ent.copyfrom_rev
564 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
565 if fromkind != 0:
566 copies[self.recode(entry)] = self.recode(copyfrom_path)
567 555 entries.append(self.recode(entry))
556
557 if not ent.copyfrom_path:
558 continue
559 copyfrom_path = self.getrelpath(ent.copyfrom_path)
560 if not copyfrom_path:
561 continue
562 self.ui.debug("copied to %s from %s@%s\n" %
563 (entrypath, copyfrom_path, ent.copyfrom_rev))
564 # It's probably important for hg that the source
565 # exists in the revision's parent, not just the
566 # ent.copyfrom_rev
567 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
568 if fromkind != 0:
569 copies[self.recode(entry)] = self.recode(copyfrom_path)
568 570 elif kind == 0: # gone, but had better be a deleted *file*
569 571 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
570 572
571 573 # if a branch is created but entries are removed in the same
572 574 # changeset, get the right fromrev
573 575 # parents cannot be empty here, you cannot remove things from
574 576 # a root revision.
575 577 uuid, old_module, fromrev = self.revsplit(parents[0])
576 578
577 579 basepath = old_module + "/" + self.getrelpath(path)
578 580 entrypath = basepath
579 581
580 582 def lookup_parts(p):
581 583 rc = None
582 584 parts = p.split("/")
583 585 for i in range(len(parts)):
584 586 part = "/".join(parts[:i])
585 587 info = part, copyfrom.get(part, None)
586 588 if info[1] is not None:
587 589 self.ui.debug("Found parent directory %s\n" % info[1])
588 590 rc = info
589 591 return rc
590 592
591 593 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
592 594
593 595 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
594 596
595 597 # need to remove fragment from lookup_parts and replace with copyfrom_path
596 598 if frompath is not None:
597 599 self.ui.debug("munge-o-matic\n")
598 600 self.ui.debug(entrypath + '\n')
599 601 self.ui.debug(entrypath[len(frompath):] + '\n')
600 602 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
601 603 fromrev = froment.copyfrom_rev
602 604 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
603 605
604 606 # We can avoid the reparent calls if the module has not changed
605 607 # but it probably does not worth the pain.
606 608 self.reparent('')
607 609 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
608 610 self.reparent(self.module)
609 611
610 612 if fromkind == svn.core.svn_node_file: # a deleted file
611 613 entries.append(self.recode(entry))
612 614 elif fromkind == svn.core.svn_node_dir:
613 615 # print "Deleted/moved non-file:", revnum, path, ent
614 616 # children = self._find_children(path, revnum - 1)
615 617 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
616 618 # Sometimes this is tricky. For example: in
617 619 # The Subversion Repository revision 6940 a dir
618 620 # was copied and one of its files was deleted
619 621 # from the new location in the same commit. This
620 622 # code can't deal with that yet.
621 623 if ent.action == 'C':
622 624 children = self._find_children(path, fromrev)
623 625 else:
624 626 oroot = entrypath.strip('/')
625 627 nroot = path.strip('/')
626 628 children = self._find_children(oroot, fromrev)
627 629 children = [s.replace(oroot,nroot) for s in children]
628 630 # Mark all [files, not directories] as deleted.
629 631 for child in children:
630 632 # Can we move a child directory and its
631 633 # parent in the same commit? (probably can). Could
632 634 # cause problems if instead of revnum -1,
633 635 # we have to look in (copyfrom_path, revnum - 1)
634 636 entrypath = self.getrelpath("/" + child, module=old_module)
635 637 if entrypath:
636 638 entry = self.recode(entrypath.decode(self.encoding))
637 639 if entry in copies:
638 640 # deleted file within a copy
639 641 del copies[entry]
640 642 else:
641 643 entries.append(entry)
642 644 else:
643 645 self.ui.debug('unknown path in revision %d: %s\n' % \
644 646 (revnum, path))
645 647 elif kind == svn.core.svn_node_dir:
646 648 # Should probably synthesize normal file entries
647 649 # and handle as above to clean up copy/rename handling.
648 650
649 651 # If the directory just had a prop change,
650 652 # then we shouldn't need to look for its children.
651 653 if ent.action == 'M':
652 654 continue
653 655
654 656 # Also this could create duplicate entries. Not sure
655 657 # whether this will matter. Maybe should make entries a set.
656 658 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
657 659 # This will fail if a directory was copied
658 660 # from another branch and then some of its files
659 661 # were deleted in the same transaction.
660 662 children = self._find_children(path, revnum)
661 663 children.sort()
662 664 for child in children:
663 665 # Can we move a child directory and its
664 666 # parent in the same commit? (probably can). Could
665 667 # cause problems if instead of revnum -1,
666 668 # we have to look in (copyfrom_path, revnum - 1)
667 669 entrypath = self.getrelpath("/" + child)
668 670 # print child, self.module, entrypath
669 671 if entrypath:
670 672 # Need to filter out directories here...
671 673 kind = svn.ra.check_path(self.ra, entrypath, revnum)
672 674 if kind != svn.core.svn_node_dir:
673 675 entries.append(self.recode(entrypath))
674 676
675 677 # Copies here (must copy all from source)
676 678 # Probably not a real problem for us if
677 679 # source does not exist
678 680 if not ent.copyfrom_path or not parents:
679 681 continue
680 682 # Copy sources not in parent revisions cannot be represented,
681 683 # ignore their origin for now
682 684 pmodule, prevnum = self.revsplit(parents[0])[1:]
683 685 if ent.copyfrom_rev < prevnum:
684 686 continue
685 687 copyfrompath = ent.copyfrom_path.decode(self.encoding)
686 688 copyfrompath = self.getrelpath(copyfrompath, pmodule)
687 689 if not copyfrompath:
688 690 continue
689 691 copyfrom[path] = ent
690 692 self.ui.debug("mark %s came from %s:%d\n"
691 693 % (path, copyfrompath, ent.copyfrom_rev))
692 694 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
693 695 children.sort()
694 696 for child in children:
695 697 entrypath = self.getrelpath("/" + child, pmodule)
696 698 if not entrypath:
697 699 continue
698 700 entry = entrypath.decode(self.encoding)
699 701 copytopath = path + entry[len(copyfrompath):]
700 702 copytopath = self.getrelpath(copytopath)
701 703 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
702 704
703 705 return (util.unique(entries), copies)
704 706
705 707 def _fetch_revisions(self, from_revnum, to_revnum):
706 708 if from_revnum < to_revnum:
707 709 from_revnum, to_revnum = to_revnum, from_revnum
708 710
709 711 self.child_cset = None
710 712 def parselogentry(orig_paths, revnum, author, date, message):
711 713 """Return the parsed commit object or None, and True if
712 714 the revision is a branch root.
713 715 """
714 716 self.ui.debug("parsing revision %d (%d changes)\n" %
715 717 (revnum, len(orig_paths)))
716 718
717 719 branched = False
718 720 rev = self.revid(revnum)
719 721 # branch log might return entries for a parent we already have
720 722
721 723 if (rev in self.commits or revnum < to_revnum):
722 724 return None, branched
723 725
724 726 parents = []
725 727 # check whether this revision is the start of a branch or part
726 728 # of a branch renaming
727 729 orig_paths = orig_paths.items()
728 730 orig_paths.sort()
729 731 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
730 732 if root_paths:
731 733 path, ent = root_paths[-1]
732 734 if ent.copyfrom_path:
733 735 branched = True
734 736 newpath = ent.copyfrom_path + self.module[len(path):]
735 737 # ent.copyfrom_rev may not be the actual last revision
736 738 previd = self.latest(newpath, ent.copyfrom_rev)
737 739 if previd is not None:
738 740 prevmodule, prevnum = self.revsplit(previd)[1:]
739 741 if prevnum >= self.startrev:
740 742 parents = [previd]
741 743 self.ui.note('found parent of branch %s at %d: %s\n' %
742 744 (self.module, prevnum, prevmodule))
743 745 else:
744 746 self.ui.debug("No copyfrom path, don't know what to do.\n")
745 747
746 748 paths = []
747 749 # filter out unrelated paths
748 750 for path, ent in orig_paths:
749 751 if self.getrelpath(path) is None:
750 752 continue
751 753 paths.append((path, ent))
752 754
753 755 # Example SVN datetime. Includes microseconds.
754 756 # ISO-8601 conformant
755 757 # '2007-01-04T17:35:00.902377Z'
756 758 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
757 759
758 760 log = message and self.recode(message) or ''
759 761 author = author and self.recode(author) or ''
760 762 try:
761 763 branch = self.module.split("/")[-1]
762 764 if branch == 'trunk':
763 765 branch = ''
764 766 except IndexError:
765 767 branch = None
766 768
767 769 cset = commit(author=author,
768 770 date=util.datestr(date),
769 771 desc=log,
770 772 parents=parents,
771 773 branch=branch,
772 774 rev=rev.encode('utf-8'))
773 775
774 776 self.commits[rev] = cset
775 777 # The parents list is *shared* among self.paths and the
776 778 # commit object. Both will be updated below.
777 779 self.paths[rev] = (paths, cset.parents)
778 780 if self.child_cset and not self.child_cset.parents:
779 781 self.child_cset.parents[:] = [rev]
780 782 self.child_cset = cset
781 783 return cset, branched
782 784
783 785 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
784 786 (self.module, from_revnum, to_revnum))
785 787
786 788 try:
787 789 firstcset = None
788 790 lastonbranch = False
789 791 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
790 792 try:
791 793 for entry in stream:
792 794 paths, revnum, author, date, message = entry
793 795 if revnum < self.startrev:
794 796 lastonbranch = True
795 797 break
796 798 if self.is_blacklisted(revnum):
797 799 self.ui.note('skipping blacklisted revision %d\n'
798 800 % revnum)
799 801 continue
800 802 if paths is None:
801 803 self.ui.debug('revision %d has no entries\n' % revnum)
802 804 continue
803 805 cset, lastonbranch = parselogentry(paths, revnum, author,
804 806 date, message)
805 807 if cset:
806 808 firstcset = cset
807 809 if lastonbranch:
808 810 break
809 811 finally:
810 812 stream.close()
811 813
812 814 if not lastonbranch and firstcset and not firstcset.parents:
813 815 # The first revision of the sequence (the last fetched one)
814 816 # has invalid parents if not a branch root. Find the parent
815 817 # revision now, if any.
816 818 try:
817 819 firstrevnum = self.revnum(firstcset.rev)
818 820 if firstrevnum > 1:
819 821 latest = self.latest(self.module, firstrevnum - 1)
820 822 if latest:
821 823 firstcset.parents.append(latest)
822 824 except util.Abort:
823 825 pass
824 826 except SubversionException, (inst, num):
825 827 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
826 828 raise util.Abort('svn: branch has no revision %s' % to_revnum)
827 829 raise
828 830
829 831 def _getfile(self, file, rev):
830 832 io = StringIO()
831 833 # TODO: ra.get_file transmits the whole file instead of diffs.
832 834 mode = ''
833 835 try:
834 836 new_module, revnum = self.revsplit(rev)[1:]
835 837 if self.module != new_module:
836 838 self.module = new_module
837 839 self.reparent(self.module)
838 840 info = svn.ra.get_file(self.ra, file, revnum, io)
839 841 if isinstance(info, list):
840 842 info = info[-1]
841 843 mode = ("svn:executable" in info) and 'x' or ''
842 844 mode = ("svn:special" in info) and 'l' or mode
843 845 except SubversionException, e:
844 846 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
845 847 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
846 848 if e.apr_err in notfound: # File not found
847 849 raise IOError()
848 850 raise
849 851 data = io.getvalue()
850 852 if mode == 'l':
851 853 link_prefix = "link "
852 854 if data.startswith(link_prefix):
853 855 data = data[len(link_prefix):]
854 856 return data, mode
855 857
856 858 def _find_children(self, path, revnum):
857 859 path = path.strip('/')
858 860 pool = Pool()
859 861 rpath = '/'.join([self.base, path]).strip('/')
860 862 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
861 863
862 864 def getrelpath(self, path, module=None):
863 865 if module is None:
864 866 module = self.module
865 867 # Given the repository url of this wc, say
866 868 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
867 869 # extract the "entry" portion (a relative path) from what
868 870 # svn log --xml says, ie
869 871 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
870 872 # that is to say "tests/PloneTestCase.py"
871 873 if path.startswith(module):
872 874 relative = path.rstrip('/')[len(module):]
873 875 if relative.startswith('/'):
874 876 return relative[1:]
875 877 elif relative == '':
876 878 return relative
877 879
878 880 # The path is outside our tracked tree...
879 881 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
880 882 return None
881 883
882 884 pre_revprop_change = '''#!/bin/sh
883 885
884 886 REPOS="$1"
885 887 REV="$2"
886 888 USER="$3"
887 889 PROPNAME="$4"
888 890 ACTION="$5"
889 891
890 892 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
891 893 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
892 894 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
893 895
894 896 echo "Changing prohibited revision property" >&2
895 897 exit 1
896 898 '''
897 899
898 900 class svn_sink(converter_sink, commandline):
899 901 commit_re = re.compile(r'Committed revision (\d+).', re.M)
900 902
901 903 def prerun(self):
902 904 if self.wc:
903 905 os.chdir(self.wc)
904 906
905 907 def postrun(self):
906 908 if self.wc:
907 909 os.chdir(self.cwd)
908 910
909 911 def join(self, name):
910 912 return os.path.join(self.wc, '.svn', name)
911 913
912 914 def revmapfile(self):
913 915 return self.join('hg-shamap')
914 916
915 917 def authorfile(self):
916 918 return self.join('hg-authormap')
917 919
918 920 def __init__(self, ui, path):
919 921 converter_sink.__init__(self, ui, path)
920 922 commandline.__init__(self, ui, 'svn')
921 923 self.delete = []
922 924 self.setexec = []
923 925 self.delexec = []
924 926 self.copies = []
925 927 self.wc = None
926 928 self.cwd = os.getcwd()
927 929
928 930 path = os.path.realpath(path)
929 931
930 932 created = False
931 933 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
932 934 self.wc = path
933 935 self.run0('update')
934 936 else:
935 937 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
936 938
937 939 if os.path.isdir(os.path.dirname(path)):
938 940 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
939 941 ui.status(_('initializing svn repo %r\n') %
940 942 os.path.basename(path))
941 943 commandline(ui, 'svnadmin').run0('create', path)
942 944 created = path
943 945 path = util.normpath(path)
944 946 if not path.startswith('/'):
945 947 path = '/' + path
946 948 path = 'file://' + path
947 949
948 950 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
949 951 self.run0('checkout', path, wcpath)
950 952
951 953 self.wc = wcpath
952 954 self.opener = util.opener(self.wc)
953 955 self.wopener = util.opener(self.wc)
954 956 self.childmap = mapfile(ui, self.join('hg-childmap'))
955 957 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
956 958
957 959 if created:
958 960 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
959 961 fp = open(hook, 'w')
960 962 fp.write(pre_revprop_change)
961 963 fp.close()
962 964 util.set_flags(hook, "x")
963 965
964 966 xport = transport.SvnRaTransport(url=geturl(path))
965 967 self.uuid = svn.ra.get_uuid(xport.ra)
966 968
967 969 def wjoin(self, *names):
968 970 return os.path.join(self.wc, *names)
969 971
970 972 def putfile(self, filename, flags, data):
971 973 if 'l' in flags:
972 974 self.wopener.symlink(data, filename)
973 975 else:
974 976 try:
975 977 if os.path.islink(self.wjoin(filename)):
976 978 os.unlink(filename)
977 979 except OSError:
978 980 pass
979 981 self.wopener(filename, 'w').write(data)
980 982
981 983 if self.is_exec:
982 984 was_exec = self.is_exec(self.wjoin(filename))
983 985 else:
984 986 # On filesystems not supporting execute-bit, there is no way
985 987 # to know if it is set but asking subversion. Setting it
986 988 # systematically is just as expensive and much simpler.
987 989 was_exec = 'x' not in flags
988 990
989 991 util.set_flags(self.wjoin(filename), flags)
990 992 if was_exec:
991 993 if 'x' not in flags:
992 994 self.delexec.append(filename)
993 995 else:
994 996 if 'x' in flags:
995 997 self.setexec.append(filename)
996 998
997 999 def delfile(self, name):
998 1000 self.delete.append(name)
999 1001
1000 1002 def copyfile(self, source, dest):
1001 1003 self.copies.append([source, dest])
1002 1004
1003 1005 def _copyfile(self, source, dest):
1004 1006 # SVN's copy command pukes if the destination file exists, but
1005 1007 # our copyfile method expects to record a copy that has
1006 1008 # already occurred. Cross the semantic gap.
1007 1009 wdest = self.wjoin(dest)
1008 1010 exists = os.path.exists(wdest)
1009 1011 if exists:
1010 1012 fd, tempname = tempfile.mkstemp(
1011 1013 prefix='hg-copy-', dir=os.path.dirname(wdest))
1012 1014 os.close(fd)
1013 1015 os.unlink(tempname)
1014 1016 os.rename(wdest, tempname)
1015 1017 try:
1016 1018 self.run0('copy', source, dest)
1017 1019 finally:
1018 1020 if exists:
1019 1021 try:
1020 1022 os.unlink(wdest)
1021 1023 except OSError:
1022 1024 pass
1023 1025 os.rename(tempname, wdest)
1024 1026
1025 1027 def dirs_of(self, files):
1026 1028 dirs = util.set()
1027 1029 for f in files:
1028 1030 if os.path.isdir(self.wjoin(f)):
1029 1031 dirs.add(f)
1030 1032 for i in strutil.rfindall(f, '/'):
1031 1033 dirs.add(f[:i])
1032 1034 return dirs
1033 1035
1034 1036 def add_dirs(self, files):
1035 1037 add_dirs = [d for d in self.dirs_of(files)
1036 1038 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1037 1039 if add_dirs:
1038 1040 add_dirs.sort()
1039 1041 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1040 1042 return add_dirs
1041 1043
1042 1044 def add_files(self, files):
1043 1045 if files:
1044 1046 self.xargs(files, 'add', quiet=True)
1045 1047 return files
1046 1048
1047 1049 def tidy_dirs(self, names):
1048 1050 dirs = list(self.dirs_of(names))
1049 1051 dirs.sort()
1050 1052 dirs.reverse()
1051 1053 deleted = []
1052 1054 for d in dirs:
1053 1055 wd = self.wjoin(d)
1054 1056 if os.listdir(wd) == '.svn':
1055 1057 self.run0('delete', d)
1056 1058 deleted.append(d)
1057 1059 return deleted
1058 1060
1059 1061 def addchild(self, parent, child):
1060 1062 self.childmap[parent] = child
1061 1063
1062 1064 def revid(self, rev):
1063 1065 return u"svn:%s@%s" % (self.uuid, rev)
1064 1066
1065 1067 def putcommit(self, files, parents, commit):
1066 1068 for parent in parents:
1067 1069 try:
1068 1070 return self.revid(self.childmap[parent])
1069 1071 except KeyError:
1070 1072 pass
1071 1073 entries = util.set(self.delete)
1072 1074 files = util.frozenset(files)
1073 1075 entries.update(self.add_dirs(files.difference(entries)))
1074 1076 if self.copies:
1075 1077 for s, d in self.copies:
1076 1078 self._copyfile(s, d)
1077 1079 self.copies = []
1078 1080 if self.delete:
1079 1081 self.xargs(self.delete, 'delete')
1080 1082 self.delete = []
1081 1083 entries.update(self.add_files(files.difference(entries)))
1082 1084 entries.update(self.tidy_dirs(entries))
1083 1085 if self.delexec:
1084 1086 self.xargs(self.delexec, 'propdel', 'svn:executable')
1085 1087 self.delexec = []
1086 1088 if self.setexec:
1087 1089 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1088 1090 self.setexec = []
1089 1091
1090 1092 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1091 1093 fp = os.fdopen(fd, 'w')
1092 1094 fp.write(commit.desc)
1093 1095 fp.close()
1094 1096 try:
1095 1097 output = self.run0('commit',
1096 1098 username=util.shortuser(commit.author),
1097 1099 file=messagefile,
1098 1100 encoding='utf-8')
1099 1101 try:
1100 1102 rev = self.commit_re.search(output).group(1)
1101 1103 except AttributeError:
1102 1104 self.ui.warn(_('unexpected svn output:\n'))
1103 1105 self.ui.warn(output)
1104 1106 raise util.Abort(_('unable to cope with svn output'))
1105 1107 if commit.rev:
1106 1108 self.run('propset', 'hg:convert-rev', commit.rev,
1107 1109 revprop=True, revision=rev)
1108 1110 if commit.branch and commit.branch != 'default':
1109 1111 self.run('propset', 'hg:convert-branch', commit.branch,
1110 1112 revprop=True, revision=rev)
1111 1113 for parent in parents:
1112 1114 self.addchild(parent, rev)
1113 1115 return self.revid(rev)
1114 1116 finally:
1115 1117 os.unlink(messagefile)
1116 1118
1117 1119 def puttags(self, tags):
1118 1120 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now