##// END OF EJS Templates
convert: fix trailing space introduced in 5efd447a9b8d
Dirkjan Ochtman -
r6418:593a598a default
parent child Browse files
Show More
@@ -1,1118 +1,1118 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98 # With large history, cleanup process goes crazy and suddenly
99 99 # consumes *huge* amount of memory. The output file being closed,
100 100 # there is no need for clean termination.
101 101 os._exit(0)
102 102
103 103 def debugsvnlog(ui, **opts):
104 104 """Fetch SVN log in a subprocess and channel them back to parent to
105 105 avoid memory collection issues.
106 106 """
107 107 util.set_binary(sys.stdin)
108 108 util.set_binary(sys.stdout)
109 109 args = decodeargs(sys.stdin.read())
110 110 get_log_child(sys.stdout, *args)
111 111
112 112 class logstream:
113 113 """Interruptible revision log iterator."""
114 114 def __init__(self, stdout):
115 115 self._stdout = stdout
116 116
117 117 def __iter__(self):
118 118 while True:
119 119 entry = pickle.load(self._stdout)
120 120 try:
121 121 orig_paths, revnum, author, date, message = entry
122 122 except:
123 123 if entry is None:
124 124 break
125 125 raise SubversionException("child raised exception", entry)
126 126 yield entry
127 127
128 128 def close(self):
129 129 if self._stdout:
130 130 self._stdout.close()
131 131 self._stdout = None
132 132
133 133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
134 134 strict_node_history=False):
135 135 args = [url, paths, start, end, limit, discover_changed_paths,
136 136 strict_node_history]
137 137 arg = encodeargs(args)
138 138 hgexe = util.hgexecutable()
139 139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
140 140 stdin, stdout = os.popen2(cmd, 'b')
141 141 stdin.write(arg)
142 142 stdin.close()
143 143 return logstream(stdout)
144 144
145 145 # SVN conversion code stolen from bzr-svn and tailor
146 146 #
147 147 # Subversion looks like a versioned filesystem, branches structures
148 148 # are defined by conventions and not enforced by the tool. First,
149 149 # we define the potential branches (modules) as "trunk" and "branches"
150 150 # children directories. Revisions are then identified by their
151 151 # module and revision number (and a repository identifier).
152 152 #
153 153 # The revision graph is really a tree (or a forest). By default, a
154 154 # revision parent is the previous revision in the same module. If the
155 155 # module directory is copied/moved from another module then the
156 156 # revision is the module root and its parent the source revision in
157 157 # the parent module. A revision has at most one parent.
158 158 #
159 159 class svn_source(converter_source):
160 160 def __init__(self, ui, url, rev=None):
161 161 super(svn_source, self).__init__(ui, url, rev=rev)
162 162
163 163 try:
164 164 SubversionException
165 165 except NameError:
166 166 raise NoRepo('Subversion python bindings could not be loaded')
167 167
168 168 self.encoding = locale.getpreferredencoding()
169 169 self.lastrevs = {}
170 170
171 171 latest = None
172 172 try:
173 173 # Support file://path@rev syntax. Useful e.g. to convert
174 174 # deleted branches.
175 175 at = url.rfind('@')
176 176 if at >= 0:
177 177 latest = int(url[at+1:])
178 178 url = url[:at]
179 179 except ValueError, e:
180 180 pass
181 181 self.url = geturl(url)
182 182 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
183 183 try:
184 184 self.transport = transport.SvnRaTransport(url=self.url)
185 185 self.ra = self.transport.ra
186 186 self.ctx = self.transport.client
187 187 self.base = svn.ra.get_repos_root(self.ra)
188 188 self.module = self.url[len(self.base):]
189 189 self.rootmodule = self.module
190 190 self.commits = {}
191 191 self.paths = {}
192 192 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
193 193 except SubversionException, e:
194 194 ui.print_exc()
195 195 raise NoRepo("%s does not look like a Subversion repo" % self.url)
196 196
197 197 if rev:
198 198 try:
199 199 latest = int(rev)
200 200 except ValueError:
201 201 raise util.Abort('svn: revision %s is not an integer' % rev)
202 202
203 203 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
204 204 try:
205 205 self.startrev = int(self.startrev)
206 206 if self.startrev < 0:
207 207 self.startrev = 0
208 208 except ValueError:
209 209 raise util.Abort(_('svn: start revision %s is not an integer')
210 210 % self.startrev)
211 211
212 212 try:
213 213 self.get_blacklist()
214 214 except IOError, e:
215 215 pass
216 216
217 217 self.head = self.latest(self.module, latest)
218 218 if not self.head:
219 219 raise util.Abort(_('no revision found in module %s') %
220 220 self.module.encode(self.encoding))
221 221 self.last_changed = self.revnum(self.head)
222 222
223 223 self._changescache = None
224 224
225 225 if os.path.exists(os.path.join(url, '.svn/entries')):
226 226 self.wc = url
227 227 else:
228 228 self.wc = None
229 229 self.convertfp = None
230 230
231 231 def setrevmap(self, revmap):
232 232 lastrevs = {}
233 233 for revid in revmap.iterkeys():
234 234 uuid, module, revnum = self.revsplit(revid)
235 235 lastrevnum = lastrevs.setdefault(module, revnum)
236 236 if revnum > lastrevnum:
237 237 lastrevs[module] = revnum
238 238 self.lastrevs = lastrevs
239 239
240 240 def exists(self, path, optrev):
241 241 try:
242 242 svn.client.ls(self.url.rstrip('/') + '/' + path,
243 243 optrev, False, self.ctx)
244 244 return True
245 245 except SubversionException, err:
246 246 return False
247 247
248 248 def getheads(self):
249 249
250 250 def getcfgpath(name, rev):
251 251 cfgpath = self.ui.config('convert', 'svn.' + name)
252 252 if cfgpath is not None and cfgpath.strip() == '':
253 253 return None
254 254 path = (cfgpath or name).strip('/')
255 255 if not self.exists(path, rev):
256 256 if cfgpath:
257 257 raise util.Abort(_('expected %s to be at %r, but not found')
258 258 % (name, path))
259 259 return None
260 260 self.ui.note(_('found %s at %r\n') % (name, path))
261 261 return path
262 262
263 263 rev = optrev(self.last_changed)
264 264 oldmodule = ''
265 265 trunk = getcfgpath('trunk', rev)
266 266 self.tags = getcfgpath('tags', rev)
267 267 branches = getcfgpath('branches', rev)
268 268
269 269 # If the project has a trunk or branches, we will extract heads
270 270 # from them. We keep the project root otherwise.
271 271 if trunk:
272 272 oldmodule = self.module or ''
273 273 self.module += '/' + trunk
274 274 self.head = self.latest(self.module, self.last_changed)
275 275 if not self.head:
276 276 raise util.Abort(_('no revision found in module %s') %
277 277 self.module.encode(self.encoding))
278 278
279 279 # First head in the list is the module's head
280 280 self.heads = [self.head]
281 281 if self.tags is not None:
282 282 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
283 283
284 284 # Check if branches bring a few more heads to the list
285 285 if branches:
286 286 rpath = self.url.strip('/')
287 287 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
288 288 self.ctx)
289 289 for branch in branchnames.keys():
290 290 module = '%s/%s/%s' % (oldmodule, branches, branch)
291 291 brevid = self.latest(module, self.last_changed)
292 292 if not brevid:
293 293 self.ui.note(_('ignoring empty branch %s\n') %
294 294 branch.encode(self.encoding))
295 295 continue
296 296 self.ui.note('found branch %s at %d\n' %
297 297 (branch, self.revnum(brevid)))
298 298 self.heads.append(brevid)
299 299
300 300 if self.startrev and self.heads:
301 301 if len(self.heads) > 1:
302 302 raise util.Abort(_('svn: start revision is not supported with '
303 303 'with more than one branch'))
304 304 revnum = self.revnum(self.heads[0])
305 305 if revnum < self.startrev:
306 306 raise util.Abort(_('svn: no revision found after start revision %d')
307 307 % self.startrev)
308 308
309 309 return self.heads
310 310
311 311 def getfile(self, file, rev):
312 312 data, mode = self._getfile(file, rev)
313 313 self.modecache[(file, rev)] = mode
314 314 return data
315 315
316 316 def getmode(self, file, rev):
317 317 return self.modecache[(file, rev)]
318 318
319 319 def getchanges(self, rev):
320 320 if self._changescache and self._changescache[0] == rev:
321 321 return self._changescache[1]
322 322 self._changescache = None
323 323 self.modecache = {}
324 324 (paths, parents) = self.paths[rev]
325 325 if parents:
326 326 files, copies = self.expandpaths(rev, paths, parents)
327 327 else:
328 328 # Perform a full checkout on roots
329 329 uuid, module, revnum = self.revsplit(rev)
330 330 entries = svn.client.ls(self.base + module, optrev(revnum),
331 331 True, self.ctx)
332 332 files = [n for n,e in entries.iteritems()
333 333 if e.kind == svn.core.svn_node_file]
334 334 copies = {}
335 335
336 336 files.sort()
337 337 files = zip(files, [rev] * len(files))
338 338
339 339 # caller caches the result, so free it here to release memory
340 340 del self.paths[rev]
341 341 return (files, copies)
342 342
343 343 def getchangedfiles(self, rev, i):
344 344 changes = self.getchanges(rev)
345 345 self._changescache = (rev, changes)
346 346 return [f[0] for f in changes[0]]
347 347
348 348 def getcommit(self, rev):
349 349 if rev not in self.commits:
350 350 uuid, module, revnum = self.revsplit(rev)
351 351 self.module = module
352 352 self.reparent(module)
353 353 # We assume that:
354 354 # - requests for revisions after "stop" come from the
355 355 # revision graph backward traversal. Cache all of them
356 356 # down to stop, they will be used eventually.
357 357 # - requests for revisions before "stop" come to get
358 358 # isolated branches parents. Just fetch what is needed.
359 359 stop = self.lastrevs.get(module, 0)
360 360 if revnum < stop:
361 361 stop = revnum + 1
362 362 self._fetch_revisions(revnum, stop)
363 363 commit = self.commits[rev]
364 364 # caller caches the result, so free it here to release memory
365 365 del self.commits[rev]
366 366 return commit
367 367
368 368 def gettags(self):
369 369 tags = {}
370 370 if self.tags is None:
371 371 return tags
372 372
373 373 # svn tags are just a convention, project branches left in a
374 374 # 'tags' directory. There is no other relationship than
375 375 # ancestry, which is expensive to discover and makes them hard
376 376 # to update incrementally. Worse, past revisions may be
377 377 # referenced by tags far away in the future, requiring a deep
378 378 # history traversal on every calculation. Current code
379 379 # performs a single backward traversal, tracking moves within
380 380 # the tags directory (tag renaming) and recording a new tag
381 381 # everytime a project is copied from outside the tags
382 382 # directory. It also lists deleted tags, this behaviour may
383 383 # change in the future.
384 384 pendings = []
385 385 tagspath = self.tags
386 386 start = svn.ra.get_latest_revnum(self.ra)
387 387 try:
388 388 for entry in get_log(self.url, [self.tags], start, self.startrev):
389 389 origpaths, revnum, author, date, message = entry
390 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p,e
390 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
391 391 in origpaths.iteritems() if e.copyfrom_path]
392 392 copies.sort()
393 393 # Apply moves/copies from more specific to general
394 394 copies.reverse()
395 395
396 396 srctagspath = tagspath
397 397 if copies and copies[-1][2] == tagspath:
398 398 # Track tags directory moves
399 399 srctagspath = copies.pop()[0]
400 400
401 401 for source, sourcerev, dest in copies:
402 402 if not dest.startswith(tagspath + '/'):
403 403 continue
404 404 for tag in pendings:
405 405 if tag[0].startswith(dest):
406 406 tagpath = source + tag[0][len(dest):]
407 407 tag[:2] = [tagpath, sourcerev]
408 408 break
409 409 else:
410 410 pendings.append([source, sourcerev, dest.split('/')[-1]])
411 411
412 412 # Tell tag renamings from tag creations
413 413 remainings = []
414 414 for source, sourcerev, tagname in pendings:
415 415 if source.startswith(srctagspath):
416 416 remainings.append([source, sourcerev, tagname])
417 417 continue
418 418 # From revision may be fake, get one with changes
419 419 tagid = self.latest(source, sourcerev)
420 420 if tagid:
421 421 tags[tagname] = tagid
422 422 pendings = remainings
423 423 tagspath = srctagspath
424 424
425 425 except SubversionException, (inst, num):
426 426 self.ui.note('no tags found at revision %d\n' % start)
427 427 return tags
428 428
429 429 def converted(self, rev, destrev):
430 430 if not self.wc:
431 431 return
432 432 if self.convertfp is None:
433 433 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
434 434 'a')
435 435 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
436 436 self.convertfp.flush()
437 437
438 438 # -- helper functions --
439 439
440 440 def revid(self, revnum, module=None):
441 441 if not module:
442 442 module = self.module
443 443 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
444 444 revnum)
445 445
446 446 def revnum(self, rev):
447 447 return int(rev.split('@')[-1])
448 448
449 449 def revsplit(self, rev):
450 450 url, revnum = rev.encode(self.encoding).split('@', 1)
451 451 revnum = int(revnum)
452 452 parts = url.split('/', 1)
453 453 uuid = parts.pop(0)[4:]
454 454 mod = ''
455 455 if parts:
456 456 mod = '/' + parts[0]
457 457 return uuid, mod, revnum
458 458
459 459 def latest(self, path, stop=0):
460 460 """Find the latest revid affecting path, up to stop. It may return
461 461 a revision in a different module, since a branch may be moved without
462 462 a change being reported. Return None if computed module does not
463 463 belong to rootmodule subtree.
464 464 """
465 465 if not path.startswith(self.rootmodule):
466 466 # Requests on foreign branches may be forbidden at server level
467 467 self.ui.debug(_('ignoring foreign branch %r\n') % path)
468 468 return None
469 469
470 470 if not stop:
471 471 stop = svn.ra.get_latest_revnum(self.ra)
472 472 try:
473 473 self.reparent('')
474 474 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
475 475 self.reparent(self.module)
476 476 except SubversionException:
477 477 dirent = None
478 478 if not dirent:
479 479 raise util.Abort('%s not found up to revision %d' % (path, stop))
480 480
481 481 # stat() gives us the previous revision on this line of development, but
482 482 # it might be in *another module*. Fetch the log and detect renames down
483 483 # to the latest revision.
484 484 stream = get_log(self.url, [path], stop, dirent.created_rev)
485 485 try:
486 486 for entry in stream:
487 487 paths, revnum, author, date, message = entry
488 488 if revnum <= dirent.created_rev:
489 489 break
490 490
491 491 for p in paths:
492 492 if not path.startswith(p) or not paths[p].copyfrom_path:
493 493 continue
494 494 newpath = paths[p].copyfrom_path + path[len(p):]
495 495 self.ui.debug("branch renamed from %s to %s at %d\n" %
496 496 (path, newpath, revnum))
497 497 path = newpath
498 498 break
499 499 finally:
500 500 stream.close()
501 501
502 502 if not path.startswith(self.rootmodule):
503 503 self.ui.debug(_('ignoring foreign branch %r\n') % path)
504 504 return None
505 505 return self.revid(dirent.created_rev, path)
506 506
507 507 def get_blacklist(self):
508 508 """Avoid certain revision numbers.
509 509 It is not uncommon for two nearby revisions to cancel each other
510 510 out, e.g. 'I copied trunk into a subdirectory of itself instead
511 511 of making a branch'. The converted repository is significantly
512 512 smaller if we ignore such revisions."""
513 513 self.blacklist = util.set()
514 514 blacklist = self.blacklist
515 515 for line in file("blacklist.txt", "r"):
516 516 if not line.startswith("#"):
517 517 try:
518 518 svn_rev = int(line.strip())
519 519 blacklist.add(svn_rev)
520 520 except ValueError, e:
521 521 pass # not an integer or a comment
522 522
523 523 def is_blacklisted(self, svn_rev):
524 524 return svn_rev in self.blacklist
525 525
526 526 def reparent(self, module):
527 527 svn_url = self.base + module
528 528 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
529 529 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
530 530
531 531 def expandpaths(self, rev, paths, parents):
532 532 def get_entry_from_path(path, module=self.module):
533 533 # Given the repository url of this wc, say
534 534 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
535 535 # extract the "entry" portion (a relative path) from what
536 536 # svn log --xml says, ie
537 537 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
538 538 # that is to say "tests/PloneTestCase.py"
539 539 if path.startswith(module):
540 540 relative = path[len(module):]
541 541 if relative.startswith('/'):
542 542 return relative[1:]
543 543 else:
544 544 return relative
545 545
546 546 # The path is outside our tracked tree...
547 547 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
548 548 return None
549 549
550 550 entries = []
551 551 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
552 552 copies = {}
553 553
554 554 new_module, revnum = self.revsplit(rev)[1:]
555 555 if new_module != self.module:
556 556 self.module = new_module
557 557 self.reparent(self.module)
558 558
559 559 for path, ent in paths:
560 560 entrypath = get_entry_from_path(path, module=self.module)
561 561 entry = entrypath.decode(self.encoding)
562 562
563 563 kind = svn.ra.check_path(self.ra, entrypath, revnum)
564 564 if kind == svn.core.svn_node_file:
565 565 if ent.copyfrom_path:
566 566 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
567 567 if copyfrom_path:
568 568 self.ui.debug("Copied to %s from %s@%s\n" %
569 569 (entrypath, copyfrom_path,
570 570 ent.copyfrom_rev))
571 571 # It's probably important for hg that the source
572 572 # exists in the revision's parent, not just the
573 573 # ent.copyfrom_rev
574 574 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
575 575 if fromkind != 0:
576 576 copies[self.recode(entry)] = self.recode(copyfrom_path)
577 577 entries.append(self.recode(entry))
578 578 elif kind == 0: # gone, but had better be a deleted *file*
579 579 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
580 580
581 581 # if a branch is created but entries are removed in the same
582 582 # changeset, get the right fromrev
583 583 # parents cannot be empty here, you cannot remove things from
584 584 # a root revision.
585 585 uuid, old_module, fromrev = self.revsplit(parents[0])
586 586
587 587 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
588 588 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
589 589
590 590 def lookup_parts(p):
591 591 rc = None
592 592 parts = p.split("/")
593 593 for i in range(len(parts)):
594 594 part = "/".join(parts[:i])
595 595 info = part, copyfrom.get(part, None)
596 596 if info[1] is not None:
597 597 self.ui.debug("Found parent directory %s\n" % info[1])
598 598 rc = info
599 599 return rc
600 600
601 601 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
602 602
603 603 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
604 604
605 605 # need to remove fragment from lookup_parts and replace with copyfrom_path
606 606 if frompath is not None:
607 607 self.ui.debug("munge-o-matic\n")
608 608 self.ui.debug(entrypath + '\n')
609 609 self.ui.debug(entrypath[len(frompath):] + '\n')
610 610 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
611 611 fromrev = froment.copyfrom_rev
612 612 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
613 613
614 614 # We can avoid the reparent calls if the module has not changed
615 615 # but it probably does not worth the pain.
616 616 self.reparent('')
617 617 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
618 618 self.reparent(self.module)
619 619
620 620 if fromkind == svn.core.svn_node_file: # a deleted file
621 621 entries.append(self.recode(entry))
622 622 elif fromkind == svn.core.svn_node_dir:
623 623 # print "Deleted/moved non-file:", revnum, path, ent
624 624 # children = self._find_children(path, revnum - 1)
625 625 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
626 626 # Sometimes this is tricky. For example: in
627 627 # The Subversion Repository revision 6940 a dir
628 628 # was copied and one of its files was deleted
629 629 # from the new location in the same commit. This
630 630 # code can't deal with that yet.
631 631 if ent.action == 'C':
632 632 children = self._find_children(path, fromrev)
633 633 else:
634 634 oroot = entrypath.strip('/')
635 635 nroot = path.strip('/')
636 636 children = self._find_children(oroot, fromrev)
637 637 children = [s.replace(oroot,nroot) for s in children]
638 638 # Mark all [files, not directories] as deleted.
639 639 for child in children:
640 640 # Can we move a child directory and its
641 641 # parent in the same commit? (probably can). Could
642 642 # cause problems if instead of revnum -1,
643 643 # we have to look in (copyfrom_path, revnum - 1)
644 644 entrypath = get_entry_from_path("/" + child, module=old_module)
645 645 if entrypath:
646 646 entry = self.recode(entrypath.decode(self.encoding))
647 647 if entry in copies:
648 648 # deleted file within a copy
649 649 del copies[entry]
650 650 else:
651 651 entries.append(entry)
652 652 else:
653 653 self.ui.debug('unknown path in revision %d: %s\n' % \
654 654 (revnum, path))
655 655 elif kind == svn.core.svn_node_dir:
656 656 # Should probably synthesize normal file entries
657 657 # and handle as above to clean up copy/rename handling.
658 658
659 659 # If the directory just had a prop change,
660 660 # then we shouldn't need to look for its children.
661 661 if ent.action == 'M':
662 662 continue
663 663
664 664 # Also this could create duplicate entries. Not sure
665 665 # whether this will matter. Maybe should make entries a set.
666 666 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
667 667 # This will fail if a directory was copied
668 668 # from another branch and then some of its files
669 669 # were deleted in the same transaction.
670 670 children = self._find_children(path, revnum)
671 671 children.sort()
672 672 for child in children:
673 673 # Can we move a child directory and its
674 674 # parent in the same commit? (probably can). Could
675 675 # cause problems if instead of revnum -1,
676 676 # we have to look in (copyfrom_path, revnum - 1)
677 677 entrypath = get_entry_from_path("/" + child, module=self.module)
678 678 # print child, self.module, entrypath
679 679 if entrypath:
680 680 # Need to filter out directories here...
681 681 kind = svn.ra.check_path(self.ra, entrypath, revnum)
682 682 if kind != svn.core.svn_node_dir:
683 683 entries.append(self.recode(entrypath))
684 684
685 685 # Copies here (must copy all from source)
686 686 # Probably not a real problem for us if
687 687 # source does not exist
688 688
689 689 # Can do this with the copy command "hg copy"
690 690 # if ent.copyfrom_path:
691 691 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
692 692 # module=self.module)
693 693 # copyto_entry = entrypath
694 694 #
695 695 # print "copy directory", copyfrom_entry, 'to', copyto_entry
696 696 #
697 697 # copies.append((copyfrom_entry, copyto_entry))
698 698
699 699 if ent.copyfrom_path:
700 700 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
701 701 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
702 702 if copyfrom_entry:
703 703 copyfrom[path] = ent
704 704 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
705 705
706 706 # Good, /probably/ a regular copy. Really should check
707 707 # to see whether the parent revision actually contains
708 708 # the directory in question.
709 709 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
710 710 children.sort()
711 711 for child in children:
712 712 entrypath = get_entry_from_path("/" + child, module=self.module)
713 713 if entrypath:
714 714 entry = entrypath.decode(self.encoding)
715 715 # print "COPY COPY From", copyfrom_entry, entry
716 716 copyto_path = path + entry[len(copyfrom_entry):]
717 717 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
718 718 # print "COPY", entry, "COPY To", copyto_entry
719 719 copies[self.recode(copyto_entry)] = self.recode(entry)
720 720 # copy from quux splort/quuxfile
721 721
722 722 return (util.unique(entries), copies)
723 723
724 724 def _fetch_revisions(self, from_revnum, to_revnum):
725 725 if from_revnum < to_revnum:
726 726 from_revnum, to_revnum = to_revnum, from_revnum
727 727
728 728 self.child_cset = None
729 729 def parselogentry(orig_paths, revnum, author, date, message):
730 730 """Return the parsed commit object or None, and True if
731 731 the revision is a branch root.
732 732 """
733 733 self.ui.debug("parsing revision %d (%d changes)\n" %
734 734 (revnum, len(orig_paths)))
735 735
736 736 branched = False
737 737 rev = self.revid(revnum)
738 738 # branch log might return entries for a parent we already have
739 739
740 740 if (rev in self.commits or revnum < to_revnum):
741 741 return None, branched
742 742
743 743 parents = []
744 744 # check whether this revision is the start of a branch or part
745 745 # of a branch renaming
746 746 orig_paths = orig_paths.items()
747 747 orig_paths.sort()
748 748 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
749 749 if root_paths:
750 750 path, ent = root_paths[-1]
751 751 if ent.copyfrom_path:
752 752 branched = True
753 753 newpath = ent.copyfrom_path + self.module[len(path):]
754 754 # ent.copyfrom_rev may not be the actual last revision
755 755 previd = self.latest(newpath, ent.copyfrom_rev)
756 756 if previd is not None:
757 757 prevmodule, prevnum = self.revsplit(previd)[1:]
758 758 if prevnum >= self.startrev:
759 759 parents = [previd]
760 760 self.ui.note('found parent of branch %s at %d: %s\n' %
761 761 (self.module, prevnum, prevmodule))
762 762 else:
763 763 self.ui.debug("No copyfrom path, don't know what to do.\n")
764 764
765 765 paths = []
766 766 # filter out unrelated paths
767 767 for path, ent in orig_paths:
768 768 if not path.startswith(self.module):
769 769 self.ui.debug("boring@%s: %s\n" % (revnum, path))
770 770 continue
771 771 paths.append((path, ent))
772 772
773 773 # Example SVN datetime. Includes microseconds.
774 774 # ISO-8601 conformant
775 775 # '2007-01-04T17:35:00.902377Z'
776 776 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
777 777
778 778 log = message and self.recode(message) or ''
779 779 author = author and self.recode(author) or ''
780 780 try:
781 781 branch = self.module.split("/")[-1]
782 782 if branch == 'trunk':
783 783 branch = ''
784 784 except IndexError:
785 785 branch = None
786 786
787 787 cset = commit(author=author,
788 788 date=util.datestr(date),
789 789 desc=log,
790 790 parents=parents,
791 791 branch=branch,
792 792 rev=rev.encode('utf-8'))
793 793
794 794 self.commits[rev] = cset
795 795 # The parents list is *shared* among self.paths and the
796 796 # commit object. Both will be updated below.
797 797 self.paths[rev] = (paths, cset.parents)
798 798 if self.child_cset and not self.child_cset.parents:
799 799 self.child_cset.parents[:] = [rev]
800 800 self.child_cset = cset
801 801 return cset, branched
802 802
803 803 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
804 804 (self.module, from_revnum, to_revnum))
805 805
806 806 try:
807 807 firstcset = None
808 808 lastonbranch = False
809 809 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
810 810 try:
811 811 for entry in stream:
812 812 paths, revnum, author, date, message = entry
813 813 if revnum < self.startrev:
814 814 lastonbranch = True
815 815 break
816 816 if self.is_blacklisted(revnum):
817 817 self.ui.note('skipping blacklisted revision %d\n'
818 818 % revnum)
819 819 continue
820 820 if paths is None:
821 821 self.ui.debug('revision %d has no entries\n' % revnum)
822 822 continue
823 823 cset, lastonbranch = parselogentry(paths, revnum, author,
824 824 date, message)
825 825 if cset:
826 826 firstcset = cset
827 827 if lastonbranch:
828 828 break
829 829 finally:
830 830 stream.close()
831 831
832 832 if not lastonbranch and firstcset and not firstcset.parents:
833 833 # The first revision of the sequence (the last fetched one)
834 834 # has invalid parents if not a branch root. Find the parent
835 835 # revision now, if any.
836 836 try:
837 837 firstrevnum = self.revnum(firstcset.rev)
838 838 if firstrevnum > 1:
839 839 latest = self.latest(self.module, firstrevnum - 1)
840 840 if latest:
841 841 firstcset.parents.append(latest)
842 842 except util.Abort:
843 843 pass
844 844 except SubversionException, (inst, num):
845 845 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
846 846 raise util.Abort('svn: branch has no revision %s' % to_revnum)
847 847 raise
848 848
849 849 def _getfile(self, file, rev):
850 850 io = StringIO()
851 851 # TODO: ra.get_file transmits the whole file instead of diffs.
852 852 mode = ''
853 853 try:
854 854 new_module, revnum = self.revsplit(rev)[1:]
855 855 if self.module != new_module:
856 856 self.module = new_module
857 857 self.reparent(self.module)
858 858 info = svn.ra.get_file(self.ra, file, revnum, io)
859 859 if isinstance(info, list):
860 860 info = info[-1]
861 861 mode = ("svn:executable" in info) and 'x' or ''
862 862 mode = ("svn:special" in info) and 'l' or mode
863 863 except SubversionException, e:
864 864 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
865 865 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
866 866 if e.apr_err in notfound: # File not found
867 867 raise IOError()
868 868 raise
869 869 data = io.getvalue()
870 870 if mode == 'l':
871 871 link_prefix = "link "
872 872 if data.startswith(link_prefix):
873 873 data = data[len(link_prefix):]
874 874 return data, mode
875 875
876 876 def _find_children(self, path, revnum):
877 877 path = path.strip('/')
878 878 pool = Pool()
879 879 rpath = '/'.join([self.base, path]).strip('/')
880 880 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
881 881
882 882 pre_revprop_change = '''#!/bin/sh
883 883
884 884 REPOS="$1"
885 885 REV="$2"
886 886 USER="$3"
887 887 PROPNAME="$4"
888 888 ACTION="$5"
889 889
890 890 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
891 891 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
892 892 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
893 893
894 894 echo "Changing prohibited revision property" >&2
895 895 exit 1
896 896 '''
897 897
898 898 class svn_sink(converter_sink, commandline):
899 899 commit_re = re.compile(r'Committed revision (\d+).', re.M)
900 900
901 901 def prerun(self):
902 902 if self.wc:
903 903 os.chdir(self.wc)
904 904
905 905 def postrun(self):
906 906 if self.wc:
907 907 os.chdir(self.cwd)
908 908
909 909 def join(self, name):
910 910 return os.path.join(self.wc, '.svn', name)
911 911
912 912 def revmapfile(self):
913 913 return self.join('hg-shamap')
914 914
915 915 def authorfile(self):
916 916 return self.join('hg-authormap')
917 917
918 918 def __init__(self, ui, path):
919 919 converter_sink.__init__(self, ui, path)
920 920 commandline.__init__(self, ui, 'svn')
921 921 self.delete = []
922 922 self.setexec = []
923 923 self.delexec = []
924 924 self.copies = []
925 925 self.wc = None
926 926 self.cwd = os.getcwd()
927 927
928 928 path = os.path.realpath(path)
929 929
930 930 created = False
931 931 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
932 932 self.wc = path
933 933 self.run0('update')
934 934 else:
935 935 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
936 936
937 937 if os.path.isdir(os.path.dirname(path)):
938 938 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
939 939 ui.status(_('initializing svn repo %r\n') %
940 940 os.path.basename(path))
941 941 commandline(ui, 'svnadmin').run0('create', path)
942 942 created = path
943 943 path = util.normpath(path)
944 944 if not path.startswith('/'):
945 945 path = '/' + path
946 946 path = 'file://' + path
947 947
948 948 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
949 949 self.run0('checkout', path, wcpath)
950 950
951 951 self.wc = wcpath
952 952 self.opener = util.opener(self.wc)
953 953 self.wopener = util.opener(self.wc)
954 954 self.childmap = mapfile(ui, self.join('hg-childmap'))
955 955 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
956 956
957 957 if created:
958 958 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
959 959 fp = open(hook, 'w')
960 960 fp.write(pre_revprop_change)
961 961 fp.close()
962 962 util.set_flags(hook, "x")
963 963
964 964 xport = transport.SvnRaTransport(url=geturl(path))
965 965 self.uuid = svn.ra.get_uuid(xport.ra)
966 966
967 967 def wjoin(self, *names):
968 968 return os.path.join(self.wc, *names)
969 969
970 970 def putfile(self, filename, flags, data):
971 971 if 'l' in flags:
972 972 self.wopener.symlink(data, filename)
973 973 else:
974 974 try:
975 975 if os.path.islink(self.wjoin(filename)):
976 976 os.unlink(filename)
977 977 except OSError:
978 978 pass
979 979 self.wopener(filename, 'w').write(data)
980 980
981 981 if self.is_exec:
982 982 was_exec = self.is_exec(self.wjoin(filename))
983 983 else:
984 984 # On filesystems not supporting execute-bit, there is no way
985 985 # to know if it is set but asking subversion. Setting it
986 986 # systematically is just as expensive and much simpler.
987 987 was_exec = 'x' not in flags
988 988
989 989 util.set_flags(self.wjoin(filename), flags)
990 990 if was_exec:
991 991 if 'x' not in flags:
992 992 self.delexec.append(filename)
993 993 else:
994 994 if 'x' in flags:
995 995 self.setexec.append(filename)
996 996
997 997 def delfile(self, name):
998 998 self.delete.append(name)
999 999
1000 1000 def copyfile(self, source, dest):
1001 1001 self.copies.append([source, dest])
1002 1002
1003 1003 def _copyfile(self, source, dest):
1004 1004 # SVN's copy command pukes if the destination file exists, but
1005 1005 # our copyfile method expects to record a copy that has
1006 1006 # already occurred. Cross the semantic gap.
1007 1007 wdest = self.wjoin(dest)
1008 1008 exists = os.path.exists(wdest)
1009 1009 if exists:
1010 1010 fd, tempname = tempfile.mkstemp(
1011 1011 prefix='hg-copy-', dir=os.path.dirname(wdest))
1012 1012 os.close(fd)
1013 1013 os.unlink(tempname)
1014 1014 os.rename(wdest, tempname)
1015 1015 try:
1016 1016 self.run0('copy', source, dest)
1017 1017 finally:
1018 1018 if exists:
1019 1019 try:
1020 1020 os.unlink(wdest)
1021 1021 except OSError:
1022 1022 pass
1023 1023 os.rename(tempname, wdest)
1024 1024
1025 1025 def dirs_of(self, files):
1026 1026 dirs = util.set()
1027 1027 for f in files:
1028 1028 if os.path.isdir(self.wjoin(f)):
1029 1029 dirs.add(f)
1030 1030 for i in strutil.rfindall(f, '/'):
1031 1031 dirs.add(f[:i])
1032 1032 return dirs
1033 1033
1034 1034 def add_dirs(self, files):
1035 1035 add_dirs = [d for d in self.dirs_of(files)
1036 1036 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1037 1037 if add_dirs:
1038 1038 add_dirs.sort()
1039 1039 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1040 1040 return add_dirs
1041 1041
1042 1042 def add_files(self, files):
1043 1043 if files:
1044 1044 self.xargs(files, 'add', quiet=True)
1045 1045 return files
1046 1046
1047 1047 def tidy_dirs(self, names):
1048 1048 dirs = list(self.dirs_of(names))
1049 1049 dirs.sort()
1050 1050 dirs.reverse()
1051 1051 deleted = []
1052 1052 for d in dirs:
1053 1053 wd = self.wjoin(d)
1054 1054 if os.listdir(wd) == '.svn':
1055 1055 self.run0('delete', d)
1056 1056 deleted.append(d)
1057 1057 return deleted
1058 1058
1059 1059 def addchild(self, parent, child):
1060 1060 self.childmap[parent] = child
1061 1061
1062 1062 def revid(self, rev):
1063 1063 return u"svn:%s@%s" % (self.uuid, rev)
1064 1064
1065 1065 def putcommit(self, files, parents, commit):
1066 1066 for parent in parents:
1067 1067 try:
1068 1068 return self.revid(self.childmap[parent])
1069 1069 except KeyError:
1070 1070 pass
1071 1071 entries = util.set(self.delete)
1072 1072 files = util.frozenset(files)
1073 1073 entries.update(self.add_dirs(files.difference(entries)))
1074 1074 if self.copies:
1075 1075 for s, d in self.copies:
1076 1076 self._copyfile(s, d)
1077 1077 self.copies = []
1078 1078 if self.delete:
1079 1079 self.xargs(self.delete, 'delete')
1080 1080 self.delete = []
1081 1081 entries.update(self.add_files(files.difference(entries)))
1082 1082 entries.update(self.tidy_dirs(entries))
1083 1083 if self.delexec:
1084 1084 self.xargs(self.delexec, 'propdel', 'svn:executable')
1085 1085 self.delexec = []
1086 1086 if self.setexec:
1087 1087 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1088 1088 self.setexec = []
1089 1089
1090 1090 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1091 1091 fp = os.fdopen(fd, 'w')
1092 1092 fp.write(commit.desc)
1093 1093 fp.close()
1094 1094 try:
1095 1095 output = self.run0('commit',
1096 1096 username=util.shortuser(commit.author),
1097 1097 file=messagefile,
1098 1098 encoding='utf-8')
1099 1099 try:
1100 1100 rev = self.commit_re.search(output).group(1)
1101 1101 except AttributeError:
1102 1102 self.ui.warn(_('unexpected svn output:\n'))
1103 1103 self.ui.warn(output)
1104 1104 raise util.Abort(_('unable to cope with svn output'))
1105 1105 if commit.rev:
1106 1106 self.run('propset', 'hg:convert-rev', commit.rev,
1107 1107 revprop=True, revision=rev)
1108 1108 if commit.branch and commit.branch != 'default':
1109 1109 self.run('propset', 'hg:convert-branch', commit.branch,
1110 1110 revprop=True, revision=rev)
1111 1111 for parent in parents:
1112 1112 self.addchild(parent, rev)
1113 1113 return self.revid(rev)
1114 1114 finally:
1115 1115 os.unlink(messagefile)
1116 1116
1117 1117 def puttags(self, tags):
1118 1118 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now