##// END OF EJS Templates
convert: restore previous svn transport parent correctly
Patrick Mezard -
r6847:9d6d1fdb default
parent child Browse files
Show More
@@ -1,1137 +1,1146
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98 # With large history, cleanup process goes crazy and suddenly
99 99 # consumes *huge* amount of memory. The output file being closed,
100 100 # there is no need for clean termination.
101 101 os._exit(0)
102 102
103 103 def debugsvnlog(ui, **opts):
104 104 """Fetch SVN log in a subprocess and channel them back to parent to
105 105 avoid memory collection issues.
106 106 """
107 107 util.set_binary(sys.stdin)
108 108 util.set_binary(sys.stdout)
109 109 args = decodeargs(sys.stdin.read())
110 110 get_log_child(sys.stdout, *args)
111 111
112 112 class logstream:
113 113 """Interruptible revision log iterator."""
114 114 def __init__(self, stdout):
115 115 self._stdout = stdout
116 116
117 117 def __iter__(self):
118 118 while True:
119 119 entry = pickle.load(self._stdout)
120 120 try:
121 121 orig_paths, revnum, author, date, message = entry
122 122 except:
123 123 if entry is None:
124 124 break
125 125 raise SubversionException("child raised exception", entry)
126 126 yield entry
127 127
128 128 def close(self):
129 129 if self._stdout:
130 130 self._stdout.close()
131 131 self._stdout = None
132 132
133 133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
134 134 strict_node_history=False):
135 135 args = [url, paths, start, end, limit, discover_changed_paths,
136 136 strict_node_history]
137 137 arg = encodeargs(args)
138 138 hgexe = util.hgexecutable()
139 139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
140 140 stdin, stdout = os.popen2(cmd, 'b')
141 141 stdin.write(arg)
142 142 stdin.close()
143 143 return logstream(stdout)
144 144
145 145 # SVN conversion code stolen from bzr-svn and tailor
146 146 #
147 147 # Subversion looks like a versioned filesystem, branches structures
148 148 # are defined by conventions and not enforced by the tool. First,
149 149 # we define the potential branches (modules) as "trunk" and "branches"
150 150 # children directories. Revisions are then identified by their
151 151 # module and revision number (and a repository identifier).
152 152 #
153 153 # The revision graph is really a tree (or a forest). By default, a
154 154 # revision parent is the previous revision in the same module. If the
155 155 # module directory is copied/moved from another module then the
156 156 # revision is the module root and its parent the source revision in
157 157 # the parent module. A revision has at most one parent.
158 158 #
159 159 class svn_source(converter_source):
160 160 def __init__(self, ui, url, rev=None):
161 161 super(svn_source, self).__init__(ui, url, rev=rev)
162 162
163 163 try:
164 164 SubversionException
165 165 except NameError:
166 166 raise NoRepo('Subversion python bindings could not be loaded')
167 167
168 168 self.encoding = locale.getpreferredencoding()
169 169 self.lastrevs = {}
170 170
171 171 latest = None
172 172 try:
173 173 # Support file://path@rev syntax. Useful e.g. to convert
174 174 # deleted branches.
175 175 at = url.rfind('@')
176 176 if at >= 0:
177 177 latest = int(url[at+1:])
178 178 url = url[:at]
179 179 except ValueError, e:
180 180 pass
181 181 self.url = geturl(url)
182 182 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
183 183 try:
184 184 self.transport = transport.SvnRaTransport(url=self.url)
185 185 self.ra = self.transport.ra
186 186 self.ctx = self.transport.client
187 187 self.base = svn.ra.get_repos_root(self.ra)
188 188 # Module is either empty or a repository path starting with
189 189 # a slash and not ending with a slash.
190 190 self.module = self.url[len(self.base):]
191 self.prevmodule = None
191 192 self.rootmodule = self.module
192 193 self.commits = {}
193 194 self.paths = {}
194 195 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
195 196 except SubversionException, e:
196 197 ui.print_exc()
197 198 raise NoRepo("%s does not look like a Subversion repo" % self.url)
198 199
199 200 if rev:
200 201 try:
201 202 latest = int(rev)
202 203 except ValueError:
203 204 raise util.Abort('svn: revision %s is not an integer' % rev)
204 205
205 206 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
206 207 try:
207 208 self.startrev = int(self.startrev)
208 209 if self.startrev < 0:
209 210 self.startrev = 0
210 211 except ValueError:
211 212 raise util.Abort(_('svn: start revision %s is not an integer')
212 213 % self.startrev)
213 214
214 215 try:
215 216 self.get_blacklist()
216 217 except IOError, e:
217 218 pass
218 219
219 220 self.head = self.latest(self.module, latest)
220 221 if not self.head:
221 222 raise util.Abort(_('no revision found in module %s') %
222 223 self.module.encode(self.encoding))
223 224 self.last_changed = self.revnum(self.head)
224 225
225 226 self._changescache = None
226 227
227 228 if os.path.exists(os.path.join(url, '.svn/entries')):
228 229 self.wc = url
229 230 else:
230 231 self.wc = None
231 232 self.convertfp = None
232 233
233 234 def setrevmap(self, revmap):
234 235 lastrevs = {}
235 236 for revid in revmap.iterkeys():
236 237 uuid, module, revnum = self.revsplit(revid)
237 238 lastrevnum = lastrevs.setdefault(module, revnum)
238 239 if revnum > lastrevnum:
239 240 lastrevs[module] = revnum
240 241 self.lastrevs = lastrevs
241 242
242 243 def exists(self, path, optrev):
243 244 try:
244 245 svn.client.ls(self.url.rstrip('/') + '/' + path,
245 246 optrev, False, self.ctx)
246 247 return True
247 248 except SubversionException, err:
248 249 return False
249 250
250 251 def getheads(self):
251 252
252 253 def isdir(path, revnum):
253 254 kind = svn.ra.check_path(self.ra, path, revnum)
254 255 return kind == svn.core.svn_node_dir
255 256
256 257 def getcfgpath(name, rev):
257 258 cfgpath = self.ui.config('convert', 'svn.' + name)
258 259 if cfgpath is not None and cfgpath.strip() == '':
259 260 return None
260 261 path = (cfgpath or name).strip('/')
261 262 if not self.exists(path, rev):
262 263 if cfgpath:
263 264 raise util.Abort(_('expected %s to be at %r, but not found')
264 265 % (name, path))
265 266 return None
266 267 self.ui.note(_('found %s at %r\n') % (name, path))
267 268 return path
268 269
269 270 rev = optrev(self.last_changed)
270 271 oldmodule = ''
271 272 trunk = getcfgpath('trunk', rev)
272 273 self.tags = getcfgpath('tags', rev)
273 274 branches = getcfgpath('branches', rev)
274 275
275 276 # If the project has a trunk or branches, we will extract heads
276 277 # from them. We keep the project root otherwise.
277 278 if trunk:
278 279 oldmodule = self.module or ''
279 280 self.module += '/' + trunk
280 281 self.head = self.latest(self.module, self.last_changed)
281 282 if not self.head:
282 283 raise util.Abort(_('no revision found in module %s') %
283 284 self.module.encode(self.encoding))
284 285
285 286 # First head in the list is the module's head
286 287 self.heads = [self.head]
287 288 if self.tags is not None:
288 289 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
289 290
290 291 # Check if branches bring a few more heads to the list
291 292 if branches:
292 293 rpath = self.url.strip('/')
293 294 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
294 295 self.ctx)
295 296 for branch in branchnames.keys():
296 297 module = '%s/%s/%s' % (oldmodule, branches, branch)
297 298 if not isdir(module, self.last_changed):
298 299 continue
299 300 brevid = self.latest(module, self.last_changed)
300 301 if not brevid:
301 302 self.ui.note(_('ignoring empty branch %s\n') %
302 303 branch.encode(self.encoding))
303 304 continue
304 305 self.ui.note('found branch %s at %d\n' %
305 306 (branch, self.revnum(brevid)))
306 307 self.heads.append(brevid)
307 308
308 309 if self.startrev and self.heads:
309 310 if len(self.heads) > 1:
310 311 raise util.Abort(_('svn: start revision is not supported with '
311 312 'with more than one branch'))
312 313 revnum = self.revnum(self.heads[0])
313 314 if revnum < self.startrev:
314 315 raise util.Abort(_('svn: no revision found after start revision %d')
315 316 % self.startrev)
316 317
317 318 return self.heads
318 319
319 320 def getfile(self, file, rev):
320 321 data, mode = self._getfile(file, rev)
321 322 self.modecache[(file, rev)] = mode
322 323 return data
323 324
324 325 def getmode(self, file, rev):
325 326 return self.modecache[(file, rev)]
326 327
327 328 def getchanges(self, rev):
328 329 if self._changescache and self._changescache[0] == rev:
329 330 return self._changescache[1]
330 331 self._changescache = None
331 332 self.modecache = {}
332 333 (paths, parents) = self.paths[rev]
333 334 if parents:
334 335 files, copies = self.expandpaths(rev, paths, parents)
335 336 else:
336 337 # Perform a full checkout on roots
337 338 uuid, module, revnum = self.revsplit(rev)
338 339 entries = svn.client.ls(self.base + module, optrev(revnum),
339 340 True, self.ctx)
340 341 files = [n for n,e in entries.iteritems()
341 342 if e.kind == svn.core.svn_node_file]
342 343 copies = {}
343 344
344 345 files.sort()
345 346 files = zip(files, [rev] * len(files))
346 347
347 348 # caller caches the result, so free it here to release memory
348 349 del self.paths[rev]
349 350 return (files, copies)
350 351
351 352 def getchangedfiles(self, rev, i):
352 353 changes = self.getchanges(rev)
353 354 self._changescache = (rev, changes)
354 355 return [f[0] for f in changes[0]]
355 356
356 357 def getcommit(self, rev):
357 358 if rev not in self.commits:
358 359 uuid, module, revnum = self.revsplit(rev)
359 360 self.module = module
360 361 self.reparent(module)
361 362 # We assume that:
362 363 # - requests for revisions after "stop" come from the
363 364 # revision graph backward traversal. Cache all of them
364 365 # down to stop, they will be used eventually.
365 366 # - requests for revisions before "stop" come to get
366 367 # isolated branches parents. Just fetch what is needed.
367 368 stop = self.lastrevs.get(module, 0)
368 369 if revnum < stop:
369 370 stop = revnum + 1
370 371 self._fetch_revisions(revnum, stop)
371 372 commit = self.commits[rev]
372 373 # caller caches the result, so free it here to release memory
373 374 del self.commits[rev]
374 375 return commit
375 376
376 377 def gettags(self):
377 378 tags = {}
378 379 if self.tags is None:
379 380 return tags
380 381
381 382 # svn tags are just a convention, project branches left in a
382 383 # 'tags' directory. There is no other relationship than
383 384 # ancestry, which is expensive to discover and makes them hard
384 385 # to update incrementally. Worse, past revisions may be
385 386 # referenced by tags far away in the future, requiring a deep
386 387 # history traversal on every calculation. Current code
387 388 # performs a single backward traversal, tracking moves within
388 389 # the tags directory (tag renaming) and recording a new tag
389 390 # everytime a project is copied from outside the tags
390 391 # directory. It also lists deleted tags, this behaviour may
391 392 # change in the future.
392 393 pendings = []
393 394 tagspath = self.tags
394 395 start = svn.ra.get_latest_revnum(self.ra)
395 396 try:
396 397 for entry in get_log(self.url, [self.tags], start, self.startrev):
397 398 origpaths, revnum, author, date, message = entry
398 399 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
399 400 in origpaths.iteritems() if e.copyfrom_path]
400 401 copies.sort()
401 402 # Apply moves/copies from more specific to general
402 403 copies.reverse()
403 404
404 405 srctagspath = tagspath
405 406 if copies and copies[-1][2] == tagspath:
406 407 # Track tags directory moves
407 408 srctagspath = copies.pop()[0]
408 409
409 410 for source, sourcerev, dest in copies:
410 411 if not dest.startswith(tagspath + '/'):
411 412 continue
412 413 for tag in pendings:
413 414 if tag[0].startswith(dest):
414 415 tagpath = source + tag[0][len(dest):]
415 416 tag[:2] = [tagpath, sourcerev]
416 417 break
417 418 else:
418 419 pendings.append([source, sourcerev, dest.split('/')[-1]])
419 420
420 421 # Tell tag renamings from tag creations
421 422 remainings = []
422 423 for source, sourcerev, tagname in pendings:
423 424 if source.startswith(srctagspath):
424 425 remainings.append([source, sourcerev, tagname])
425 426 continue
426 427 # From revision may be fake, get one with changes
427 428 tagid = self.latest(source, sourcerev)
428 429 if tagid:
429 430 tags[tagname] = tagid
430 431 pendings = remainings
431 432 tagspath = srctagspath
432 433
433 434 except SubversionException, (inst, num):
434 435 self.ui.note('no tags found at revision %d\n' % start)
435 436 return tags
436 437
437 438 def converted(self, rev, destrev):
438 439 if not self.wc:
439 440 return
440 441 if self.convertfp is None:
441 442 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
442 443 'a')
443 444 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
444 445 self.convertfp.flush()
445 446
446 447 # -- helper functions --
447 448
448 449 def revid(self, revnum, module=None):
449 450 if not module:
450 451 module = self.module
451 452 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
452 453 revnum)
453 454
454 455 def revnum(self, rev):
455 456 return int(rev.split('@')[-1])
456 457
457 458 def revsplit(self, rev):
458 459 url, revnum = rev.encode(self.encoding).split('@', 1)
459 460 revnum = int(revnum)
460 461 parts = url.split('/', 1)
461 462 uuid = parts.pop(0)[4:]
462 463 mod = ''
463 464 if parts:
464 465 mod = '/' + parts[0]
465 466 return uuid, mod, revnum
466 467
467 468 def latest(self, path, stop=0):
468 469 """Find the latest revid affecting path, up to stop. It may return
469 470 a revision in a different module, since a branch may be moved without
470 471 a change being reported. Return None if computed module does not
471 472 belong to rootmodule subtree.
472 473 """
473 474 if not path.startswith(self.rootmodule):
474 475 # Requests on foreign branches may be forbidden at server level
475 476 self.ui.debug(_('ignoring foreign branch %r\n') % path)
476 477 return None
477 478
478 479 if not stop:
479 480 stop = svn.ra.get_latest_revnum(self.ra)
480 481 try:
481 self.reparent('')
482 prevmodule = self.reparent('')
482 483 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
483 self.reparent(self.module)
484 self.reparent(prevmodule)
484 485 except SubversionException:
485 486 dirent = None
486 487 if not dirent:
487 488 raise util.Abort('%s not found up to revision %d' % (path, stop))
488 489
489 490 # stat() gives us the previous revision on this line of development, but
490 491 # it might be in *another module*. Fetch the log and detect renames down
491 492 # to the latest revision.
492 493 stream = get_log(self.url, [path], stop, dirent.created_rev)
493 494 try:
494 495 for entry in stream:
495 496 paths, revnum, author, date, message = entry
496 497 if revnum <= dirent.created_rev:
497 498 break
498 499
499 500 for p in paths:
500 501 if not path.startswith(p) or not paths[p].copyfrom_path:
501 502 continue
502 503 newpath = paths[p].copyfrom_path + path[len(p):]
503 504 self.ui.debug("branch renamed from %s to %s at %d\n" %
504 505 (path, newpath, revnum))
505 506 path = newpath
506 507 break
507 508 finally:
508 509 stream.close()
509 510
510 511 if not path.startswith(self.rootmodule):
511 512 self.ui.debug(_('ignoring foreign branch %r\n') % path)
512 513 return None
513 514 return self.revid(dirent.created_rev, path)
514 515
515 516 def get_blacklist(self):
516 517 """Avoid certain revision numbers.
517 518 It is not uncommon for two nearby revisions to cancel each other
518 519 out, e.g. 'I copied trunk into a subdirectory of itself instead
519 520 of making a branch'. The converted repository is significantly
520 521 smaller if we ignore such revisions."""
521 522 self.blacklist = util.set()
522 523 blacklist = self.blacklist
523 524 for line in file("blacklist.txt", "r"):
524 525 if not line.startswith("#"):
525 526 try:
526 527 svn_rev = int(line.strip())
527 528 blacklist.add(svn_rev)
528 529 except ValueError, e:
529 530 pass # not an integer or a comment
530 531
531 532 def is_blacklisted(self, svn_rev):
532 533 return svn_rev in self.blacklist
533 534
534 535 def reparent(self, module):
535 svn_url = self.base + module
536 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
537 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
536 """Reparent the svn transport and return the previous parent."""
537 if self.prevmodule == module:
538 return module
539 svn_url = (self.base + module).encode(self.encoding)
540 prevmodule = self.prevmodule
541 if prevmodule is None:
542 prevmodule = ''
543 self.ui.debug("reparent to %s\n" % svn_url)
544 svn.ra.reparent(self.ra, svn_url)
545 self.prevmodule = module
546 return prevmodule
538 547
539 548 def expandpaths(self, rev, paths, parents):
540 549 entries = []
541 550 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
542 551 copies = {}
543 552
544 553 new_module, revnum = self.revsplit(rev)[1:]
545 554 if new_module != self.module:
546 555 self.module = new_module
547 556 self.reparent(self.module)
548 557
549 558 for path, ent in paths:
550 559 entrypath = self.getrelpath(path)
551 560 entry = entrypath.decode(self.encoding)
552 561
553 562 kind = svn.ra.check_path(self.ra, entrypath, revnum)
554 563 if kind == svn.core.svn_node_file:
555 564 entries.append(self.recode(entry))
556 565 if not ent.copyfrom_path or not parents:
557 566 continue
558 567 # Copy sources not in parent revisions cannot be represented,
559 568 # ignore their origin for now
560 569 pmodule, prevnum = self.revsplit(parents[0])[1:]
561 570 if ent.copyfrom_rev < prevnum:
562 571 continue
563 572 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
564 573 if not copyfrom_path:
565 574 continue
566 575 self.ui.debug("copied to %s from %s@%s\n" %
567 576 (entrypath, copyfrom_path, ent.copyfrom_rev))
568 577 copies[self.recode(entry)] = self.recode(copyfrom_path)
569 578 elif kind == 0: # gone, but had better be a deleted *file*
570 579 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
571 580
572 581 # if a branch is created but entries are removed in the same
573 582 # changeset, get the right fromrev
574 583 # parents cannot be empty here, you cannot remove things from
575 584 # a root revision.
576 585 uuid, old_module, fromrev = self.revsplit(parents[0])
577 586
578 587 basepath = old_module + "/" + self.getrelpath(path)
579 588 entrypath = basepath
580 589
581 590 def lookup_parts(p):
582 591 rc = None
583 592 parts = p.split("/")
584 593 for i in range(len(parts)):
585 594 part = "/".join(parts[:i])
586 595 info = part, copyfrom.get(part, None)
587 596 if info[1] is not None:
588 597 self.ui.debug("Found parent directory %s\n" % info[1])
589 598 rc = info
590 599 return rc
591 600
592 601 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
593 602
594 603 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
595 604
596 605 # need to remove fragment from lookup_parts and replace with copyfrom_path
597 606 if frompath is not None:
598 607 self.ui.debug("munge-o-matic\n")
599 608 self.ui.debug(entrypath + '\n')
600 609 self.ui.debug(entrypath[len(frompath):] + '\n')
601 610 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
602 611 fromrev = froment.copyfrom_rev
603 612 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
604 613
605 614 # We can avoid the reparent calls if the module has not changed
606 615 # but it probably does not worth the pain.
607 self.reparent('')
616 prevmodule = self.reparent('')
608 617 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
609 self.reparent(self.module)
618 self.reparent(prevmodule)
610 619
611 620 if fromkind == svn.core.svn_node_file: # a deleted file
612 621 entries.append(self.recode(entry))
613 622 elif fromkind == svn.core.svn_node_dir:
614 623 # print "Deleted/moved non-file:", revnum, path, ent
615 624 # children = self._find_children(path, revnum - 1)
616 625 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
617 626 # Sometimes this is tricky. For example: in
618 627 # The Subversion Repository revision 6940 a dir
619 628 # was copied and one of its files was deleted
620 629 # from the new location in the same commit. This
621 630 # code can't deal with that yet.
622 631 if ent.action == 'C':
623 632 children = self._find_children(path, fromrev)
624 633 else:
625 634 oroot = entrypath.strip('/')
626 635 nroot = path.strip('/')
627 636 children = self._find_children(oroot, fromrev)
628 637 children = [s.replace(oroot,nroot) for s in children]
629 638 # Mark all [files, not directories] as deleted.
630 639 for child in children:
631 640 # Can we move a child directory and its
632 641 # parent in the same commit? (probably can). Could
633 642 # cause problems if instead of revnum -1,
634 643 # we have to look in (copyfrom_path, revnum - 1)
635 644 entrypath = self.getrelpath("/" + child, module=old_module)
636 645 if entrypath:
637 646 entry = self.recode(entrypath.decode(self.encoding))
638 647 if entry in copies:
639 648 # deleted file within a copy
640 649 del copies[entry]
641 650 else:
642 651 entries.append(entry)
643 652 else:
644 653 self.ui.debug('unknown path in revision %d: %s\n' % \
645 654 (revnum, path))
646 655 elif kind == svn.core.svn_node_dir:
647 656 # Should probably synthesize normal file entries
648 657 # and handle as above to clean up copy/rename handling.
649 658
650 659 # If the directory just had a prop change,
651 660 # then we shouldn't need to look for its children.
652 661 if ent.action == 'M':
653 662 continue
654 663
655 664 # Also this could create duplicate entries. Not sure
656 665 # whether this will matter. Maybe should make entries a set.
657 666 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
658 667 # This will fail if a directory was copied
659 668 # from another branch and then some of its files
660 669 # were deleted in the same transaction.
661 670 children = self._find_children(path, revnum)
662 671 children.sort()
663 672 for child in children:
664 673 # Can we move a child directory and its
665 674 # parent in the same commit? (probably can). Could
666 675 # cause problems if instead of revnum -1,
667 676 # we have to look in (copyfrom_path, revnum - 1)
668 677 entrypath = self.getrelpath("/" + child)
669 678 # print child, self.module, entrypath
670 679 if entrypath:
671 680 # Need to filter out directories here...
672 681 kind = svn.ra.check_path(self.ra, entrypath, revnum)
673 682 if kind != svn.core.svn_node_dir:
674 683 entries.append(self.recode(entrypath))
675 684
676 685 # Copies here (must copy all from source)
677 686 # Probably not a real problem for us if
678 687 # source does not exist
679 688 if not ent.copyfrom_path or not parents:
680 689 continue
681 690 # Copy sources not in parent revisions cannot be represented,
682 691 # ignore their origin for now
683 692 pmodule, prevnum = self.revsplit(parents[0])[1:]
684 693 if ent.copyfrom_rev < prevnum:
685 694 continue
686 695 copyfrompath = ent.copyfrom_path.decode(self.encoding)
687 696 copyfrompath = self.getrelpath(copyfrompath, pmodule)
688 697 if not copyfrompath:
689 698 continue
690 699 copyfrom[path] = ent
691 700 self.ui.debug("mark %s came from %s:%d\n"
692 701 % (path, copyfrompath, ent.copyfrom_rev))
693 702 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
694 703 children.sort()
695 704 for child in children:
696 705 entrypath = self.getrelpath("/" + child, pmodule)
697 706 if not entrypath:
698 707 continue
699 708 entry = entrypath.decode(self.encoding)
700 709 copytopath = path + entry[len(copyfrompath):]
701 710 copytopath = self.getrelpath(copytopath)
702 711 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
703 712
704 713 return (util.unique(entries), copies)
705 714
706 715 def _fetch_revisions(self, from_revnum, to_revnum):
707 716 if from_revnum < to_revnum:
708 717 from_revnum, to_revnum = to_revnum, from_revnum
709 718
710 719 self.child_cset = None
711 720
712 721 def isdescendantof(parent, child):
713 722 if not child or not parent or not child.startswith(parent):
714 723 return False
715 724 subpath = child[len(parent):]
716 725 return len(subpath) > 1 and subpath[0] == '/'
717 726
718 727 def parselogentry(orig_paths, revnum, author, date, message):
719 728 """Return the parsed commit object or None, and True if
720 729 the revision is a branch root.
721 730 """
722 731 self.ui.debug("parsing revision %d (%d changes)\n" %
723 732 (revnum, len(orig_paths)))
724 733
725 734 branched = False
726 735 rev = self.revid(revnum)
727 736 # branch log might return entries for a parent we already have
728 737
729 738 if (rev in self.commits or revnum < to_revnum):
730 739 return None, branched
731 740
732 741 parents = []
733 742 # check whether this revision is the start of a branch or part
734 743 # of a branch renaming
735 744 orig_paths = orig_paths.items()
736 745 orig_paths.sort()
737 746 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
738 747 if root_paths:
739 748 path, ent = root_paths[-1]
740 749 if ent.copyfrom_path:
741 750 # If dir was moved while one of its file was removed
742 751 # the log may look like:
743 752 # A /dir (from /dir:x)
744 753 # A /dir/a (from /dir/a:y)
745 754 # A /dir/b (from /dir/b:z)
746 755 # ...
747 756 # for all remaining children.
748 757 # Let's take the highest child element from rev as source.
749 758 copies = [(p,e) for p,e in orig_paths[:-1]
750 759 if isdescendantof(ent.copyfrom_path, e.copyfrom_path)]
751 760 fromrev = max([e.copyfrom_rev for p,e in copies] + [ent.copyfrom_rev])
752 761 branched = True
753 762 newpath = ent.copyfrom_path + self.module[len(path):]
754 763 # ent.copyfrom_rev may not be the actual last revision
755 764 previd = self.latest(newpath, fromrev)
756 765 if previd is not None:
757 766 prevmodule, prevnum = self.revsplit(previd)[1:]
758 767 if prevnum >= self.startrev:
759 768 parents = [previd]
760 769 self.ui.note('found parent of branch %s at %d: %s\n' %
761 770 (self.module, prevnum, prevmodule))
762 771 else:
763 772 self.ui.debug("No copyfrom path, don't know what to do.\n")
764 773
765 774 paths = []
766 775 # filter out unrelated paths
767 776 for path, ent in orig_paths:
768 777 if self.getrelpath(path) is None:
769 778 continue
770 779 paths.append((path, ent))
771 780
772 781 # Example SVN datetime. Includes microseconds.
773 782 # ISO-8601 conformant
774 783 # '2007-01-04T17:35:00.902377Z'
775 784 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
776 785
777 786 log = message and self.recode(message) or ''
778 787 author = author and self.recode(author) or ''
779 788 try:
780 789 branch = self.module.split("/")[-1]
781 790 if branch == 'trunk':
782 791 branch = ''
783 792 except IndexError:
784 793 branch = None
785 794
786 795 cset = commit(author=author,
787 796 date=util.datestr(date),
788 797 desc=log,
789 798 parents=parents,
790 799 branch=branch,
791 800 rev=rev.encode('utf-8'))
792 801
793 802 self.commits[rev] = cset
794 803 # The parents list is *shared* among self.paths and the
795 804 # commit object. Both will be updated below.
796 805 self.paths[rev] = (paths, cset.parents)
797 806 if self.child_cset and not self.child_cset.parents:
798 807 self.child_cset.parents[:] = [rev]
799 808 self.child_cset = cset
800 809 return cset, branched
801 810
802 811 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
803 812 (self.module, from_revnum, to_revnum))
804 813
805 814 try:
806 815 firstcset = None
807 816 lastonbranch = False
808 817 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
809 818 try:
810 819 for entry in stream:
811 820 paths, revnum, author, date, message = entry
812 821 if revnum < self.startrev:
813 822 lastonbranch = True
814 823 break
815 824 if self.is_blacklisted(revnum):
816 825 self.ui.note('skipping blacklisted revision %d\n'
817 826 % revnum)
818 827 continue
819 828 if paths is None:
820 829 self.ui.debug('revision %d has no entries\n' % revnum)
821 830 continue
822 831 cset, lastonbranch = parselogentry(paths, revnum, author,
823 832 date, message)
824 833 if cset:
825 834 firstcset = cset
826 835 if lastonbranch:
827 836 break
828 837 finally:
829 838 stream.close()
830 839
831 840 if not lastonbranch and firstcset and not firstcset.parents:
832 841 # The first revision of the sequence (the last fetched one)
833 842 # has invalid parents if not a branch root. Find the parent
834 843 # revision now, if any.
835 844 try:
836 845 firstrevnum = self.revnum(firstcset.rev)
837 846 if firstrevnum > 1:
838 847 latest = self.latest(self.module, firstrevnum - 1)
839 848 if latest:
840 849 firstcset.parents.append(latest)
841 850 except util.Abort:
842 851 pass
843 852 except SubversionException, (inst, num):
844 853 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
845 854 raise util.Abort('svn: branch has no revision %s' % to_revnum)
846 855 raise
847 856
848 857 def _getfile(self, file, rev):
849 858 io = StringIO()
850 859 # TODO: ra.get_file transmits the whole file instead of diffs.
851 860 mode = ''
852 861 try:
853 862 new_module, revnum = self.revsplit(rev)[1:]
854 863 if self.module != new_module:
855 864 self.module = new_module
856 865 self.reparent(self.module)
857 866 info = svn.ra.get_file(self.ra, file, revnum, io)
858 867 if isinstance(info, list):
859 868 info = info[-1]
860 869 mode = ("svn:executable" in info) and 'x' or ''
861 870 mode = ("svn:special" in info) and 'l' or mode
862 871 except SubversionException, e:
863 872 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
864 873 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
865 874 if e.apr_err in notfound: # File not found
866 875 raise IOError()
867 876 raise
868 877 data = io.getvalue()
869 878 if mode == 'l':
870 879 link_prefix = "link "
871 880 if data.startswith(link_prefix):
872 881 data = data[len(link_prefix):]
873 882 return data, mode
874 883
875 884 def _find_children(self, path, revnum):
876 885 path = path.strip('/')
877 886 pool = Pool()
878 887 rpath = '/'.join([self.base, path]).strip('/')
879 888 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
880 889
881 890 def getrelpath(self, path, module=None):
882 891 if module is None:
883 892 module = self.module
884 893 # Given the repository url of this wc, say
885 894 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
886 895 # extract the "entry" portion (a relative path) from what
887 896 # svn log --xml says, ie
888 897 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
889 898 # that is to say "tests/PloneTestCase.py"
890 899 if path.startswith(module):
891 900 relative = path.rstrip('/')[len(module):]
892 901 if relative.startswith('/'):
893 902 return relative[1:]
894 903 elif relative == '':
895 904 return relative
896 905
897 906 # The path is outside our tracked tree...
898 907 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
899 908 return None
900 909
901 910 pre_revprop_change = '''#!/bin/sh
902 911
903 912 REPOS="$1"
904 913 REV="$2"
905 914 USER="$3"
906 915 PROPNAME="$4"
907 916 ACTION="$5"
908 917
909 918 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
910 919 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
911 920 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
912 921
913 922 echo "Changing prohibited revision property" >&2
914 923 exit 1
915 924 '''
916 925
917 926 class svn_sink(converter_sink, commandline):
918 927 commit_re = re.compile(r'Committed revision (\d+).', re.M)
919 928
920 929 def prerun(self):
921 930 if self.wc:
922 931 os.chdir(self.wc)
923 932
924 933 def postrun(self):
925 934 if self.wc:
926 935 os.chdir(self.cwd)
927 936
928 937 def join(self, name):
929 938 return os.path.join(self.wc, '.svn', name)
930 939
931 940 def revmapfile(self):
932 941 return self.join('hg-shamap')
933 942
934 943 def authorfile(self):
935 944 return self.join('hg-authormap')
936 945
937 946 def __init__(self, ui, path):
938 947 converter_sink.__init__(self, ui, path)
939 948 commandline.__init__(self, ui, 'svn')
940 949 self.delete = []
941 950 self.setexec = []
942 951 self.delexec = []
943 952 self.copies = []
944 953 self.wc = None
945 954 self.cwd = os.getcwd()
946 955
947 956 path = os.path.realpath(path)
948 957
949 958 created = False
950 959 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
951 960 self.wc = path
952 961 self.run0('update')
953 962 else:
954 963 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
955 964
956 965 if os.path.isdir(os.path.dirname(path)):
957 966 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
958 967 ui.status(_('initializing svn repo %r\n') %
959 968 os.path.basename(path))
960 969 commandline(ui, 'svnadmin').run0('create', path)
961 970 created = path
962 971 path = util.normpath(path)
963 972 if not path.startswith('/'):
964 973 path = '/' + path
965 974 path = 'file://' + path
966 975
967 976 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
968 977 self.run0('checkout', path, wcpath)
969 978
970 979 self.wc = wcpath
971 980 self.opener = util.opener(self.wc)
972 981 self.wopener = util.opener(self.wc)
973 982 self.childmap = mapfile(ui, self.join('hg-childmap'))
974 983 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
975 984
976 985 if created:
977 986 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
978 987 fp = open(hook, 'w')
979 988 fp.write(pre_revprop_change)
980 989 fp.close()
981 990 util.set_flags(hook, "x")
982 991
983 992 xport = transport.SvnRaTransport(url=geturl(path))
984 993 self.uuid = svn.ra.get_uuid(xport.ra)
985 994
986 995 def wjoin(self, *names):
987 996 return os.path.join(self.wc, *names)
988 997
989 998 def putfile(self, filename, flags, data):
990 999 if 'l' in flags:
991 1000 self.wopener.symlink(data, filename)
992 1001 else:
993 1002 try:
994 1003 if os.path.islink(self.wjoin(filename)):
995 1004 os.unlink(filename)
996 1005 except OSError:
997 1006 pass
998 1007 self.wopener(filename, 'w').write(data)
999 1008
1000 1009 if self.is_exec:
1001 1010 was_exec = self.is_exec(self.wjoin(filename))
1002 1011 else:
1003 1012 # On filesystems not supporting execute-bit, there is no way
1004 1013 # to know if it is set but asking subversion. Setting it
1005 1014 # systematically is just as expensive and much simpler.
1006 1015 was_exec = 'x' not in flags
1007 1016
1008 1017 util.set_flags(self.wjoin(filename), flags)
1009 1018 if was_exec:
1010 1019 if 'x' not in flags:
1011 1020 self.delexec.append(filename)
1012 1021 else:
1013 1022 if 'x' in flags:
1014 1023 self.setexec.append(filename)
1015 1024
1016 1025 def delfile(self, name):
1017 1026 self.delete.append(name)
1018 1027
1019 1028 def copyfile(self, source, dest):
1020 1029 self.copies.append([source, dest])
1021 1030
1022 1031 def _copyfile(self, source, dest):
1023 1032 # SVN's copy command pukes if the destination file exists, but
1024 1033 # our copyfile method expects to record a copy that has
1025 1034 # already occurred. Cross the semantic gap.
1026 1035 wdest = self.wjoin(dest)
1027 1036 exists = os.path.exists(wdest)
1028 1037 if exists:
1029 1038 fd, tempname = tempfile.mkstemp(
1030 1039 prefix='hg-copy-', dir=os.path.dirname(wdest))
1031 1040 os.close(fd)
1032 1041 os.unlink(tempname)
1033 1042 os.rename(wdest, tempname)
1034 1043 try:
1035 1044 self.run0('copy', source, dest)
1036 1045 finally:
1037 1046 if exists:
1038 1047 try:
1039 1048 os.unlink(wdest)
1040 1049 except OSError:
1041 1050 pass
1042 1051 os.rename(tempname, wdest)
1043 1052
1044 1053 def dirs_of(self, files):
1045 1054 dirs = util.set()
1046 1055 for f in files:
1047 1056 if os.path.isdir(self.wjoin(f)):
1048 1057 dirs.add(f)
1049 1058 for i in strutil.rfindall(f, '/'):
1050 1059 dirs.add(f[:i])
1051 1060 return dirs
1052 1061
1053 1062 def add_dirs(self, files):
1054 1063 add_dirs = [d for d in self.dirs_of(files)
1055 1064 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1056 1065 if add_dirs:
1057 1066 add_dirs.sort()
1058 1067 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1059 1068 return add_dirs
1060 1069
1061 1070 def add_files(self, files):
1062 1071 if files:
1063 1072 self.xargs(files, 'add', quiet=True)
1064 1073 return files
1065 1074
1066 1075 def tidy_dirs(self, names):
1067 1076 dirs = list(self.dirs_of(names))
1068 1077 dirs.sort()
1069 1078 dirs.reverse()
1070 1079 deleted = []
1071 1080 for d in dirs:
1072 1081 wd = self.wjoin(d)
1073 1082 if os.listdir(wd) == '.svn':
1074 1083 self.run0('delete', d)
1075 1084 deleted.append(d)
1076 1085 return deleted
1077 1086
1078 1087 def addchild(self, parent, child):
1079 1088 self.childmap[parent] = child
1080 1089
1081 1090 def revid(self, rev):
1082 1091 return u"svn:%s@%s" % (self.uuid, rev)
1083 1092
1084 1093 def putcommit(self, files, parents, commit):
1085 1094 for parent in parents:
1086 1095 try:
1087 1096 return self.revid(self.childmap[parent])
1088 1097 except KeyError:
1089 1098 pass
1090 1099 entries = util.set(self.delete)
1091 1100 files = util.frozenset(files)
1092 1101 entries.update(self.add_dirs(files.difference(entries)))
1093 1102 if self.copies:
1094 1103 for s, d in self.copies:
1095 1104 self._copyfile(s, d)
1096 1105 self.copies = []
1097 1106 if self.delete:
1098 1107 self.xargs(self.delete, 'delete')
1099 1108 self.delete = []
1100 1109 entries.update(self.add_files(files.difference(entries)))
1101 1110 entries.update(self.tidy_dirs(entries))
1102 1111 if self.delexec:
1103 1112 self.xargs(self.delexec, 'propdel', 'svn:executable')
1104 1113 self.delexec = []
1105 1114 if self.setexec:
1106 1115 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1107 1116 self.setexec = []
1108 1117
1109 1118 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1110 1119 fp = os.fdopen(fd, 'w')
1111 1120 fp.write(commit.desc)
1112 1121 fp.close()
1113 1122 try:
1114 1123 output = self.run0('commit',
1115 1124 username=util.shortuser(commit.author),
1116 1125 file=messagefile,
1117 1126 encoding='utf-8')
1118 1127 try:
1119 1128 rev = self.commit_re.search(output).group(1)
1120 1129 except AttributeError:
1121 1130 self.ui.warn(_('unexpected svn output:\n'))
1122 1131 self.ui.warn(output)
1123 1132 raise util.Abort(_('unable to cope with svn output'))
1124 1133 if commit.rev:
1125 1134 self.run('propset', 'hg:convert-rev', commit.rev,
1126 1135 revprop=True, revision=rev)
1127 1136 if commit.branch and commit.branch != 'default':
1128 1137 self.run('propset', 'hg:convert-branch', commit.branch,
1129 1138 revprop=True, revision=rev)
1130 1139 for parent in parents:
1131 1140 self.addchild(parent, rev)
1132 1141 return self.revid(rev)
1133 1142 finally:
1134 1143 os.unlink(messagefile)
1135 1144
1136 1145 def puttags(self, tags):
1137 1146 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now