##// END OF EJS Templates
convert/svn: drop undocumented and untested revision 'blacklist'
Patrick Mezard -
r8888:bd93d0e0 default
parent child Browse files
Show More
@@ -1,1165 +1,1136 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4
5 5 import locale
6 6 import os
7 7 import re
8 8 import sys
9 9 import cPickle as pickle
10 10 import tempfile
11 11 import urllib
12 12
13 13 from mercurial import strutil, util, encoding
14 14 from mercurial.i18n import _
15 15
16 16 # Subversion stuff. Works best with very recent Python SVN bindings
17 17 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
18 18 # these bindings.
19 19
20 20 from cStringIO import StringIO
21 21
22 22 from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
23 23 from common import commandline, converter_source, converter_sink, mapfile
24 24
25 25 try:
26 26 from svn.core import SubversionException, Pool
27 27 import svn
28 28 import svn.client
29 29 import svn.core
30 30 import svn.ra
31 31 import svn.delta
32 32 import transport
33 33 import warnings
34 34 warnings.filterwarnings('ignore',
35 35 module='svn.core',
36 36 category=DeprecationWarning)
37 37
38 38 except ImportError:
39 39 pass
40 40
41 41 class SvnPathNotFound(Exception):
42 42 pass
43 43
44 44 def geturl(path):
45 45 try:
46 46 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
47 47 except SubversionException:
48 48 pass
49 49 if os.path.isdir(path):
50 50 path = os.path.normpath(os.path.abspath(path))
51 51 if os.name == 'nt':
52 52 path = '/' + util.normpath(path)
53 53 # Module URL is later compared with the repository URL returned
54 54 # by svn API, which is UTF-8.
55 55 path = encoding.tolocal(path)
56 56 return 'file://%s' % urllib.quote(path)
57 57 return path
58 58
59 59 def optrev(number):
60 60 optrev = svn.core.svn_opt_revision_t()
61 61 optrev.kind = svn.core.svn_opt_revision_number
62 62 optrev.value.number = number
63 63 return optrev
64 64
65 65 class changedpath(object):
66 66 def __init__(self, p):
67 67 self.copyfrom_path = p.copyfrom_path
68 68 self.copyfrom_rev = p.copyfrom_rev
69 69 self.action = p.action
70 70
71 71 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
72 72 strict_node_history=False):
73 73 protocol = -1
74 74 def receiver(orig_paths, revnum, author, date, message, pool):
75 75 if orig_paths is not None:
76 76 for k, v in orig_paths.iteritems():
77 77 orig_paths[k] = changedpath(v)
78 78 pickle.dump((orig_paths, revnum, author, date, message),
79 79 fp, protocol)
80 80
81 81 try:
82 82 # Use an ra of our own so that our parent can consume
83 83 # our results without confusing the server.
84 84 t = transport.SvnRaTransport(url=url)
85 85 svn.ra.get_log(t.ra, paths, start, end, limit,
86 86 discover_changed_paths,
87 87 strict_node_history,
88 88 receiver)
89 89 except SubversionException, (inst, num):
90 90 pickle.dump(num, fp, protocol)
91 91 except IOError:
92 92 # Caller may interrupt the iteration
93 93 pickle.dump(None, fp, protocol)
94 94 else:
95 95 pickle.dump(None, fp, protocol)
96 96 fp.close()
97 97 # With large history, cleanup process goes crazy and suddenly
98 98 # consumes *huge* amount of memory. The output file being closed,
99 99 # there is no need for clean termination.
100 100 os._exit(0)
101 101
102 102 def debugsvnlog(ui, **opts):
103 103 """Fetch SVN log in a subprocess and channel them back to parent to
104 104 avoid memory collection issues.
105 105 """
106 106 util.set_binary(sys.stdin)
107 107 util.set_binary(sys.stdout)
108 108 args = decodeargs(sys.stdin.read())
109 109 get_log_child(sys.stdout, *args)
110 110
111 111 class logstream(object):
112 112 """Interruptible revision log iterator."""
113 113 def __init__(self, stdout):
114 114 self._stdout = stdout
115 115
116 116 def __iter__(self):
117 117 while True:
118 118 entry = pickle.load(self._stdout)
119 119 try:
120 120 orig_paths, revnum, author, date, message = entry
121 121 except:
122 122 if entry is None:
123 123 break
124 124 raise SubversionException("child raised exception", entry)
125 125 yield entry
126 126
127 127 def close(self):
128 128 if self._stdout:
129 129 self._stdout.close()
130 130 self._stdout = None
131 131
132 132
133 133 # Check to see if the given path is a local Subversion repo. Verify this by
134 134 # looking for several svn-specific files and directories in the given
135 135 # directory.
136 136 def filecheck(path, proto):
137 137 for x in ('locks', 'hooks', 'format', 'db', ):
138 138 if not os.path.exists(os.path.join(path, x)):
139 139 return False
140 140 return True
141 141
142 142 # Check to see if a given path is the root of an svn repo over http. We verify
143 143 # this by requesting a version-controlled URL we know can't exist and looking
144 144 # for the svn-specific "not found" XML.
145 145 def httpcheck(path, proto):
146 146 return ('<m:human-readable errcode="160013">' in
147 147 urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
148 148
149 149 protomap = {'http': httpcheck,
150 150 'https': httpcheck,
151 151 'file': filecheck,
152 152 }
153 153 def issvnurl(url):
154 154 try:
155 155 proto, path = url.split('://', 1)
156 156 path = urllib.url2pathname(path)
157 157 except ValueError:
158 158 proto = 'file'
159 159 path = os.path.abspath(url)
160 160 path = path.replace(os.sep, '/')
161 161 check = protomap.get(proto, lambda p, p2: False)
162 162 while '/' in path:
163 163 if check(path, proto):
164 164 return True
165 165 path = path.rsplit('/', 1)[0]
166 166 return False
167 167
168 168 # SVN conversion code stolen from bzr-svn and tailor
169 169 #
170 170 # Subversion looks like a versioned filesystem, branches structures
171 171 # are defined by conventions and not enforced by the tool. First,
172 172 # we define the potential branches (modules) as "trunk" and "branches"
173 173 # children directories. Revisions are then identified by their
174 174 # module and revision number (and a repository identifier).
175 175 #
176 176 # The revision graph is really a tree (or a forest). By default, a
177 177 # revision parent is the previous revision in the same module. If the
178 178 # module directory is copied/moved from another module then the
179 179 # revision is the module root and its parent the source revision in
180 180 # the parent module. A revision has at most one parent.
181 181 #
182 182 class svn_source(converter_source):
183 183 def __init__(self, ui, url, rev=None):
184 184 super(svn_source, self).__init__(ui, url, rev=rev)
185 185
186 186 if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
187 187 (os.path.exists(url) and
188 188 os.path.exists(os.path.join(url, '.svn'))) or
189 189 issvnurl(url)):
190 190 raise NoRepo("%s does not look like a Subversion repo" % url)
191 191
192 192 try:
193 193 SubversionException
194 194 except NameError:
195 195 raise MissingTool(_('Subversion python bindings could not be loaded'))
196 196
197 197 try:
198 198 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
199 199 if version < (1, 4):
200 200 raise MissingTool(_('Subversion python bindings %d.%d found, '
201 201 '1.4 or later required') % version)
202 202 except AttributeError:
203 203 raise MissingTool(_('Subversion python bindings are too old, 1.4 '
204 204 'or later required'))
205 205
206 206 self.lastrevs = {}
207 207
208 208 latest = None
209 209 try:
210 210 # Support file://path@rev syntax. Useful e.g. to convert
211 211 # deleted branches.
212 212 at = url.rfind('@')
213 213 if at >= 0:
214 214 latest = int(url[at+1:])
215 215 url = url[:at]
216 216 except ValueError:
217 217 pass
218 218 self.url = geturl(url)
219 219 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
220 220 try:
221 221 self.transport = transport.SvnRaTransport(url=self.url)
222 222 self.ra = self.transport.ra
223 223 self.ctx = self.transport.client
224 224 self.baseurl = svn.ra.get_repos_root(self.ra)
225 225 # Module is either empty or a repository path starting with
226 226 # a slash and not ending with a slash.
227 227 self.module = urllib.unquote(self.url[len(self.baseurl):])
228 228 self.prevmodule = None
229 229 self.rootmodule = self.module
230 230 self.commits = {}
231 231 self.paths = {}
232 232 self.uuid = svn.ra.get_uuid(self.ra)
233 233 except SubversionException:
234 234 ui.traceback()
235 235 raise NoRepo("%s does not look like a Subversion repo" % self.url)
236 236
237 237 if rev:
238 238 try:
239 239 latest = int(rev)
240 240 except ValueError:
241 241 raise util.Abort(_('svn: revision %s is not an integer') % rev)
242 242
243 243 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
244 244 try:
245 245 self.startrev = int(self.startrev)
246 246 if self.startrev < 0:
247 247 self.startrev = 0
248 248 except ValueError:
249 249 raise util.Abort(_('svn: start revision %s is not an integer')
250 250 % self.startrev)
251 251
252 try:
253 self.get_blacklist()
254 except IOError:
255 pass
256
257 252 self.head = self.latest(self.module, latest)
258 253 if not self.head:
259 254 raise util.Abort(_('no revision found in module %s')
260 255 % self.module)
261 256 self.last_changed = self.revnum(self.head)
262 257
263 258 self._changescache = None
264 259
265 260 if os.path.exists(os.path.join(url, '.svn/entries')):
266 261 self.wc = url
267 262 else:
268 263 self.wc = None
269 264 self.convertfp = None
270 265
271 266 def setrevmap(self, revmap):
272 267 lastrevs = {}
273 268 for revid in revmap.iterkeys():
274 269 uuid, module, revnum = self.revsplit(revid)
275 270 lastrevnum = lastrevs.setdefault(module, revnum)
276 271 if revnum > lastrevnum:
277 272 lastrevs[module] = revnum
278 273 self.lastrevs = lastrevs
279 274
280 275 def exists(self, path, optrev):
281 276 try:
282 277 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
283 278 optrev, False, self.ctx)
284 279 return True
285 280 except SubversionException:
286 281 return False
287 282
288 283 def getheads(self):
289 284
290 285 def isdir(path, revnum):
291 286 kind = self._checkpath(path, revnum)
292 287 return kind == svn.core.svn_node_dir
293 288
294 289 def getcfgpath(name, rev):
295 290 cfgpath = self.ui.config('convert', 'svn.' + name)
296 291 if cfgpath is not None and cfgpath.strip() == '':
297 292 return None
298 293 path = (cfgpath or name).strip('/')
299 294 if not self.exists(path, rev):
300 295 if cfgpath:
301 296 raise util.Abort(_('expected %s to be at %r, but not found')
302 297 % (name, path))
303 298 return None
304 299 self.ui.note(_('found %s at %r\n') % (name, path))
305 300 return path
306 301
307 302 rev = optrev(self.last_changed)
308 303 oldmodule = ''
309 304 trunk = getcfgpath('trunk', rev)
310 305 self.tags = getcfgpath('tags', rev)
311 306 branches = getcfgpath('branches', rev)
312 307
313 308 # If the project has a trunk or branches, we will extract heads
314 309 # from them. We keep the project root otherwise.
315 310 if trunk:
316 311 oldmodule = self.module or ''
317 312 self.module += '/' + trunk
318 313 self.head = self.latest(self.module, self.last_changed)
319 314 if not self.head:
320 315 raise util.Abort(_('no revision found in module %s')
321 316 % self.module)
322 317
323 318 # First head in the list is the module's head
324 319 self.heads = [self.head]
325 320 if self.tags is not None:
326 321 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
327 322
328 323 # Check if branches bring a few more heads to the list
329 324 if branches:
330 325 rpath = self.url.strip('/')
331 326 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
332 327 rev, False, self.ctx)
333 328 for branch in branchnames.keys():
334 329 module = '%s/%s/%s' % (oldmodule, branches, branch)
335 330 if not isdir(module, self.last_changed):
336 331 continue
337 332 brevid = self.latest(module, self.last_changed)
338 333 if not brevid:
339 334 self.ui.note(_('ignoring empty branch %s\n') % branch)
340 335 continue
341 336 self.ui.note(_('found branch %s at %d\n') %
342 337 (branch, self.revnum(brevid)))
343 338 self.heads.append(brevid)
344 339
345 340 if self.startrev and self.heads:
346 341 if len(self.heads) > 1:
347 342 raise util.Abort(_('svn: start revision is not supported '
348 343 'with more than one branch'))
349 344 revnum = self.revnum(self.heads[0])
350 345 if revnum < self.startrev:
351 346 raise util.Abort(_('svn: no revision found after start revision %d')
352 347 % self.startrev)
353 348
354 349 return self.heads
355 350
356 351 def getfile(self, file, rev):
357 352 data, mode = self._getfile(file, rev)
358 353 self.modecache[(file, rev)] = mode
359 354 return data
360 355
361 356 def getmode(self, file, rev):
362 357 return self.modecache[(file, rev)]
363 358
364 359 def getchanges(self, rev):
365 360 if self._changescache and self._changescache[0] == rev:
366 361 return self._changescache[1]
367 362 self._changescache = None
368 363 self.modecache = {}
369 364 (paths, parents) = self.paths[rev]
370 365 if parents:
371 366 files, copies = self.expandpaths(rev, paths, parents)
372 367 else:
373 368 # Perform a full checkout on roots
374 369 uuid, module, revnum = self.revsplit(rev)
375 370 entries = svn.client.ls(self.baseurl + urllib.quote(module),
376 371 optrev(revnum), True, self.ctx)
377 372 files = [n for n,e in entries.iteritems()
378 373 if e.kind == svn.core.svn_node_file]
379 374 copies = {}
380 375
381 376 files.sort()
382 377 files = zip(files, [rev] * len(files))
383 378
384 379 # caller caches the result, so free it here to release memory
385 380 del self.paths[rev]
386 381 return (files, copies)
387 382
388 383 def getchangedfiles(self, rev, i):
389 384 changes = self.getchanges(rev)
390 385 self._changescache = (rev, changes)
391 386 return [f[0] for f in changes[0]]
392 387
393 388 def getcommit(self, rev):
394 389 if rev not in self.commits:
395 390 uuid, module, revnum = self.revsplit(rev)
396 391 self.module = module
397 392 self.reparent(module)
398 393 # We assume that:
399 394 # - requests for revisions after "stop" come from the
400 395 # revision graph backward traversal. Cache all of them
401 396 # down to stop, they will be used eventually.
402 397 # - requests for revisions before "stop" come to get
403 398 # isolated branches parents. Just fetch what is needed.
404 399 stop = self.lastrevs.get(module, 0)
405 400 if revnum < stop:
406 401 stop = revnum + 1
407 402 self._fetch_revisions(revnum, stop)
408 403 commit = self.commits[rev]
409 404 # caller caches the result, so free it here to release memory
410 405 del self.commits[rev]
411 406 return commit
412 407
413 408 def gettags(self):
414 409 tags = {}
415 410 if self.tags is None:
416 411 return tags
417 412
418 413 # svn tags are just a convention, project branches left in a
419 414 # 'tags' directory. There is no other relationship than
420 415 # ancestry, which is expensive to discover and makes them hard
421 416 # to update incrementally. Worse, past revisions may be
422 417 # referenced by tags far away in the future, requiring a deep
423 418 # history traversal on every calculation. Current code
424 419 # performs a single backward traversal, tracking moves within
425 420 # the tags directory (tag renaming) and recording a new tag
426 421 # everytime a project is copied from outside the tags
427 422 # directory. It also lists deleted tags, this behaviour may
428 423 # change in the future.
429 424 pendings = []
430 425 tagspath = self.tags
431 426 start = svn.ra.get_latest_revnum(self.ra)
432 427 try:
433 428 for entry in self._getlog([self.tags], start, self.startrev):
434 429 origpaths, revnum, author, date, message = entry
435 430 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
436 431 in origpaths.iteritems() if e.copyfrom_path]
437 432 # Apply moves/copies from more specific to general
438 433 copies.sort(reverse=True)
439 434
440 435 srctagspath = tagspath
441 436 if copies and copies[-1][2] == tagspath:
442 437 # Track tags directory moves
443 438 srctagspath = copies.pop()[0]
444 439
445 440 for source, sourcerev, dest in copies:
446 441 if not dest.startswith(tagspath + '/'):
447 442 continue
448 443 for tag in pendings:
449 444 if tag[0].startswith(dest):
450 445 tagpath = source + tag[0][len(dest):]
451 446 tag[:2] = [tagpath, sourcerev]
452 447 break
453 448 else:
454 449 pendings.append([source, sourcerev, dest])
455 450
456 451 # Filter out tags with children coming from different
457 452 # parts of the repository like:
458 453 # /tags/tag.1 (from /trunk:10)
459 454 # /tags/tag.1/foo (from /branches/foo:12)
460 455 # Here/tags/tag.1 discarded as well as its children.
461 456 # It happens with tools like cvs2svn. Such tags cannot
462 457 # be represented in mercurial.
463 458 addeds = dict((p, e.copyfrom_path) for p, e
464 459 in origpaths.iteritems() if e.action == 'A')
465 460 badroots = set()
466 461 for destroot in addeds:
467 462 for source, sourcerev, dest in pendings:
468 463 if (not dest.startswith(destroot + '/')
469 464 or source.startswith(addeds[destroot] + '/')):
470 465 continue
471 466 badroots.add(destroot)
472 467 break
473 468
474 469 for badroot in badroots:
475 470 pendings = [p for p in pendings if p[2] != badroot
476 471 and not p[2].startswith(badroot + '/')]
477 472
478 473 # Tell tag renamings from tag creations
479 474 remainings = []
480 475 for source, sourcerev, dest in pendings:
481 476 tagname = dest.split('/')[-1]
482 477 if source.startswith(srctagspath):
483 478 remainings.append([source, sourcerev, tagname])
484 479 continue
485 480 if tagname in tags:
486 481 # Keep the latest tag value
487 482 continue
488 483 # From revision may be fake, get one with changes
489 484 try:
490 485 tagid = self.latest(source, sourcerev)
491 486 if tagid and tagname not in tags:
492 487 tags[tagname] = tagid
493 488 except SvnPathNotFound:
494 489 # It happens when we are following directories
495 490 # we assumed were copied with their parents
496 491 # but were really created in the tag
497 492 # directory.
498 493 pass
499 494 pendings = remainings
500 495 tagspath = srctagspath
501 496
502 497 except SubversionException:
503 498 self.ui.note(_('no tags found at revision %d\n') % start)
504 499 return tags
505 500
506 501 def converted(self, rev, destrev):
507 502 if not self.wc:
508 503 return
509 504 if self.convertfp is None:
510 505 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
511 506 'a')
512 507 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
513 508 self.convertfp.flush()
514 509
515 510 def revid(self, revnum, module=None):
516 511 return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
517 512
518 513 def revnum(self, rev):
519 514 return int(rev.split('@')[-1])
520 515
521 516 def revsplit(self, rev):
522 517 url, revnum = rev.rsplit('@', 1)
523 518 revnum = int(revnum)
524 519 parts = url.split('/', 1)
525 520 uuid = parts.pop(0)[4:]
526 521 mod = ''
527 522 if parts:
528 523 mod = '/' + parts[0]
529 524 return uuid, mod, revnum
530 525
531 526 def latest(self, path, stop=0):
532 527 """Find the latest revid affecting path, up to stop. It may return
533 528 a revision in a different module, since a branch may be moved without
534 529 a change being reported. Return None if computed module does not
535 530 belong to rootmodule subtree.
536 531 """
537 532 if not path.startswith(self.rootmodule):
538 533 # Requests on foreign branches may be forbidden at server level
539 534 self.ui.debug(_('ignoring foreign branch %r\n') % path)
540 535 return None
541 536
542 537 if not stop:
543 538 stop = svn.ra.get_latest_revnum(self.ra)
544 539 try:
545 540 prevmodule = self.reparent('')
546 541 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
547 542 self.reparent(prevmodule)
548 543 except SubversionException:
549 544 dirent = None
550 545 if not dirent:
551 546 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
552 547
553 548 # stat() gives us the previous revision on this line of
554 549 # development, but it might be in *another module*. Fetch the
555 550 # log and detect renames down to the latest revision.
556 551 stream = self._getlog([path], stop, dirent.created_rev)
557 552 try:
558 553 for entry in stream:
559 554 paths, revnum, author, date, message = entry
560 555 if revnum <= dirent.created_rev:
561 556 break
562 557
563 558 for p in paths:
564 559 if not path.startswith(p) or not paths[p].copyfrom_path:
565 560 continue
566 561 newpath = paths[p].copyfrom_path + path[len(p):]
567 562 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
568 563 (path, newpath, revnum))
569 564 path = newpath
570 565 break
571 566 finally:
572 567 stream.close()
573 568
574 569 if not path.startswith(self.rootmodule):
575 570 self.ui.debug(_('ignoring foreign branch %r\n') % path)
576 571 return None
577 572 return self.revid(dirent.created_rev, path)
578 573
579 def get_blacklist(self):
580 """Avoid certain revision numbers.
581 It is not uncommon for two nearby revisions to cancel each other
582 out, e.g. 'I copied trunk into a subdirectory of itself instead
583 of making a branch'. The converted repository is significantly
584 smaller if we ignore such revisions.
585 """
586 self.blacklist = set()
587 blacklist = self.blacklist
588 for line in file("blacklist.txt", "r"):
589 if not line.startswith("#"):
590 try:
591 svn_rev = int(line.strip())
592 blacklist.add(svn_rev)
593 except ValueError:
594 pass # not an integer or a comment
595
596 def is_blacklisted(self, svn_rev):
597 return svn_rev in self.blacklist
598
599 574 def reparent(self, module):
600 575 """Reparent the svn transport and return the previous parent."""
601 576 if self.prevmodule == module:
602 577 return module
603 578 svnurl = self.baseurl + urllib.quote(module)
604 579 prevmodule = self.prevmodule
605 580 if prevmodule is None:
606 581 prevmodule = ''
607 582 self.ui.debug(_("reparent to %s\n") % svnurl)
608 583 svn.ra.reparent(self.ra, svnurl)
609 584 self.prevmodule = module
610 585 return prevmodule
611 586
612 587 def expandpaths(self, rev, paths, parents):
613 588 entries = []
614 589 # Map of entrypath, revision for finding source of deleted
615 590 # revisions.
616 591 copyfrom = {}
617 592 copies = {}
618 593
619 594 new_module, revnum = self.revsplit(rev)[1:]
620 595 if new_module != self.module:
621 596 self.module = new_module
622 597 self.reparent(self.module)
623 598
624 599 for path, ent in paths:
625 600 entrypath = self.getrelpath(path)
626 601
627 602 kind = self._checkpath(entrypath, revnum)
628 603 if kind == svn.core.svn_node_file:
629 604 entries.append(self.recode(entrypath))
630 605 if not ent.copyfrom_path or not parents:
631 606 continue
632 607 # Copy sources not in parent revisions cannot be
633 608 # represented, ignore their origin for now
634 609 pmodule, prevnum = self.revsplit(parents[0])[1:]
635 610 if ent.copyfrom_rev < prevnum:
636 611 continue
637 612 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
638 613 if not copyfrom_path:
639 614 continue
640 615 self.ui.debug(_("copied to %s from %s@%s\n") %
641 616 (entrypath, copyfrom_path, ent.copyfrom_rev))
642 617 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
643 618 elif kind == 0: # gone, but had better be a deleted *file*
644 619 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
645 620 pmodule, prevnum = self.revsplit(parents[0])[1:]
646 621 parentpath = pmodule + "/" + entrypath
647 622 self.ui.debug(_("entry %s\n") % parentpath)
648 623
649 624 # We can avoid the reparent calls if the module has
650 625 # not changed but it probably does not worth the pain.
651 626 prevmodule = self.reparent('')
652 627 fromkind = svn.ra.check_path(self.ra, parentpath.strip('/'), prevnum)
653 628 self.reparent(prevmodule)
654 629
655 630 if fromkind == svn.core.svn_node_file:
656 631 entries.append(self.recode(entrypath))
657 632 elif fromkind == svn.core.svn_node_dir:
658 633 if ent.action == 'C':
659 634 children = self._find_children(path, prevnum)
660 635 else:
661 636 oroot = parentpath.strip('/')
662 637 nroot = path.strip('/')
663 638 children = self._find_children(oroot, prevnum)
664 639 children = [s.replace(oroot,nroot) for s in children]
665 640
666 641 for child in children:
667 642 childpath = self.getrelpath("/" + child, pmodule)
668 643 if not childpath:
669 644 continue
670 645 if childpath in copies:
671 646 del copies[childpath]
672 647 entries.append(childpath)
673 648 else:
674 649 self.ui.debug(_('unknown path in revision %d: %s\n') % \
675 650 (revnum, path))
676 651 elif kind == svn.core.svn_node_dir:
677 652 # If the directory just had a prop change,
678 653 # then we shouldn't need to look for its children.
679 654 if ent.action == 'M':
680 655 continue
681 656
682 657 children = sorted(self._find_children(path, revnum))
683 658 for child in children:
684 659 # Can we move a child directory and its
685 660 # parent in the same commit? (probably can). Could
686 661 # cause problems if instead of revnum -1,
687 662 # we have to look in (copyfrom_path, revnum - 1)
688 663 entrypath = self.getrelpath("/" + child)
689 664 if entrypath:
690 665 # Need to filter out directories here...
691 666 kind = self._checkpath(entrypath, revnum)
692 667 if kind != svn.core.svn_node_dir:
693 668 entries.append(self.recode(entrypath))
694 669
695 670 # Handle directory copies
696 671 if not ent.copyfrom_path or not parents:
697 672 continue
698 673 # Copy sources not in parent revisions cannot be
699 674 # represented, ignore their origin for now
700 675 pmodule, prevnum = self.revsplit(parents[0])[1:]
701 676 if ent.copyfrom_rev < prevnum:
702 677 continue
703 678 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
704 679 if not copyfrompath:
705 680 continue
706 681 copyfrom[path] = ent
707 682 self.ui.debug(_("mark %s came from %s:%d\n")
708 683 % (path, copyfrompath, ent.copyfrom_rev))
709 684 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
710 685 children.sort()
711 686 for child in children:
712 687 entrypath = self.getrelpath("/" + child, pmodule)
713 688 if not entrypath:
714 689 continue
715 690 copytopath = path + entrypath[len(copyfrompath):]
716 691 copytopath = self.getrelpath(copytopath)
717 692 copies[self.recode(copytopath)] = self.recode(entrypath)
718 693
719 694 return (list(set(entries)), copies)
720 695
721 696 def _fetch_revisions(self, from_revnum, to_revnum):
722 697 if from_revnum < to_revnum:
723 698 from_revnum, to_revnum = to_revnum, from_revnum
724 699
725 700 self.child_cset = None
726 701
727 702 def parselogentry(orig_paths, revnum, author, date, message):
728 703 """Return the parsed commit object or None, and True if
729 704 the revision is a branch root.
730 705 """
731 706 self.ui.debug(_("parsing revision %d (%d changes)\n") %
732 707 (revnum, len(orig_paths)))
733 708
734 709 branched = False
735 710 rev = self.revid(revnum)
736 711 # branch log might return entries for a parent we already have
737 712
738 713 if rev in self.commits or revnum < to_revnum:
739 714 return None, branched
740 715
741 716 parents = []
742 717 # check whether this revision is the start of a branch or part
743 718 # of a branch renaming
744 719 orig_paths = sorted(orig_paths.iteritems())
745 720 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
746 721 if root_paths:
747 722 path, ent = root_paths[-1]
748 723 if ent.copyfrom_path:
749 724 branched = True
750 725 newpath = ent.copyfrom_path + self.module[len(path):]
751 726 # ent.copyfrom_rev may not be the actual last revision
752 727 previd = self.latest(newpath, ent.copyfrom_rev)
753 728 if previd is not None:
754 729 prevmodule, prevnum = self.revsplit(previd)[1:]
755 730 if prevnum >= self.startrev:
756 731 parents = [previd]
757 732 self.ui.note(_('found parent of branch %s at %d: %s\n') %
758 733 (self.module, prevnum, prevmodule))
759 734 else:
760 735 self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
761 736
762 737 paths = []
763 738 # filter out unrelated paths
764 739 for path, ent in orig_paths:
765 740 if self.getrelpath(path) is None:
766 741 continue
767 742 paths.append((path, ent))
768 743
769 744 # Example SVN datetime. Includes microseconds.
770 745 # ISO-8601 conformant
771 746 # '2007-01-04T17:35:00.902377Z'
772 747 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
773 748
774 749 log = message and self.recode(message) or ''
775 750 author = author and self.recode(author) or ''
776 751 try:
777 752 branch = self.module.split("/")[-1]
778 753 if branch == 'trunk':
779 754 branch = ''
780 755 except IndexError:
781 756 branch = None
782 757
783 758 cset = commit(author=author,
784 759 date=util.datestr(date),
785 760 desc=log,
786 761 parents=parents,
787 762 branch=branch,
788 763 rev=rev)
789 764
790 765 self.commits[rev] = cset
791 766 # The parents list is *shared* among self.paths and the
792 767 # commit object. Both will be updated below.
793 768 self.paths[rev] = (paths, cset.parents)
794 769 if self.child_cset and not self.child_cset.parents:
795 770 self.child_cset.parents[:] = [rev]
796 771 self.child_cset = cset
797 772 return cset, branched
798 773
799 774 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
800 775 (self.module, from_revnum, to_revnum))
801 776
802 777 try:
803 778 firstcset = None
804 779 lastonbranch = False
805 780 stream = self._getlog([self.module], from_revnum, to_revnum)
806 781 try:
807 782 for entry in stream:
808 783 paths, revnum, author, date, message = entry
809 784 if revnum < self.startrev:
810 785 lastonbranch = True
811 786 break
812 if self.is_blacklisted(revnum):
813 self.ui.note(_('skipping blacklisted revision %d\n')
814 % revnum)
815 continue
816 787 if not paths:
817 788 self.ui.debug(_('revision %d has no entries\n') % revnum)
818 789 continue
819 790 cset, lastonbranch = parselogentry(paths, revnum, author,
820 791 date, message)
821 792 if cset:
822 793 firstcset = cset
823 794 if lastonbranch:
824 795 break
825 796 finally:
826 797 stream.close()
827 798
828 799 if not lastonbranch and firstcset and not firstcset.parents:
829 800 # The first revision of the sequence (the last fetched one)
830 801 # has invalid parents if not a branch root. Find the parent
831 802 # revision now, if any.
832 803 try:
833 804 firstrevnum = self.revnum(firstcset.rev)
834 805 if firstrevnum > 1:
835 806 latest = self.latest(self.module, firstrevnum - 1)
836 807 if latest:
837 808 firstcset.parents.append(latest)
838 809 except SvnPathNotFound:
839 810 pass
840 811 except SubversionException, (inst, num):
841 812 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
842 813 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
843 814 raise
844 815
845 816 def _getfile(self, file, rev):
846 817 # TODO: ra.get_file transmits the whole file instead of diffs.
847 818 mode = ''
848 819 try:
849 820 new_module, revnum = self.revsplit(rev)[1:]
850 821 if self.module != new_module:
851 822 self.module = new_module
852 823 self.reparent(self.module)
853 824 io = StringIO()
854 825 info = svn.ra.get_file(self.ra, file, revnum, io)
855 826 data = io.getvalue()
856 827 # ra.get_files() seems to keep a reference on the input buffer
857 828 # preventing collection. Release it explicitely.
858 829 io.close()
859 830 if isinstance(info, list):
860 831 info = info[-1]
861 832 mode = ("svn:executable" in info) and 'x' or ''
862 833 mode = ("svn:special" in info) and 'l' or mode
863 834 except SubversionException, e:
864 835 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
865 836 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
866 837 if e.apr_err in notfound: # File not found
867 838 raise IOError()
868 839 raise
869 840 if mode == 'l':
870 841 link_prefix = "link "
871 842 if data.startswith(link_prefix):
872 843 data = data[len(link_prefix):]
873 844 return data, mode
874 845
875 846 def _find_children(self, path, revnum):
876 847 path = path.strip('/')
877 848 pool = Pool()
878 849 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
879 850 return ['%s/%s' % (path, x) for x in
880 851 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
881 852
882 853 def getrelpath(self, path, module=None):
883 854 if module is None:
884 855 module = self.module
885 856 # Given the repository url of this wc, say
886 857 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
887 858 # extract the "entry" portion (a relative path) from what
888 859 # svn log --xml says, ie
889 860 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
890 861 # that is to say "tests/PloneTestCase.py"
891 862 if path.startswith(module):
892 863 relative = path.rstrip('/')[len(module):]
893 864 if relative.startswith('/'):
894 865 return relative[1:]
895 866 elif relative == '':
896 867 return relative
897 868
898 869 # The path is outside our tracked tree...
899 870 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
900 871 return None
901 872
902 873 def _checkpath(self, path, revnum):
903 874 # ra.check_path does not like leading slashes very much, it leads
904 875 # to PROPFIND subversion errors
905 876 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
906 877
907 878 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
908 879 strict_node_history=False):
909 880 # Normalize path names, svn >= 1.5 only wants paths relative to
910 881 # supplied URL
911 882 relpaths = []
912 883 for p in paths:
913 884 if not p.startswith('/'):
914 885 p = self.module + '/' + p
915 886 relpaths.append(p.strip('/'))
916 887 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
917 888 strict_node_history]
918 889 arg = encodeargs(args)
919 890 hgexe = util.hgexecutable()
920 891 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
921 892 stdin, stdout = util.popen2(cmd)
922 893 stdin.write(arg)
923 894 stdin.close()
924 895 return logstream(stdout)
925 896
926 897 pre_revprop_change = '''#!/bin/sh
927 898
928 899 REPOS="$1"
929 900 REV="$2"
930 901 USER="$3"
931 902 PROPNAME="$4"
932 903 ACTION="$5"
933 904
934 905 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
935 906 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
936 907 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
937 908
938 909 echo "Changing prohibited revision property" >&2
939 910 exit 1
940 911 '''
941 912
942 913 class svn_sink(converter_sink, commandline):
943 914 commit_re = re.compile(r'Committed revision (\d+).', re.M)
944 915
945 916 def prerun(self):
946 917 if self.wc:
947 918 os.chdir(self.wc)
948 919
949 920 def postrun(self):
950 921 if self.wc:
951 922 os.chdir(self.cwd)
952 923
953 924 def join(self, name):
954 925 return os.path.join(self.wc, '.svn', name)
955 926
956 927 def revmapfile(self):
957 928 return self.join('hg-shamap')
958 929
959 930 def authorfile(self):
960 931 return self.join('hg-authormap')
961 932
962 933 def __init__(self, ui, path):
963 934 converter_sink.__init__(self, ui, path)
964 935 commandline.__init__(self, ui, 'svn')
965 936 self.delete = []
966 937 self.setexec = []
967 938 self.delexec = []
968 939 self.copies = []
969 940 self.wc = None
970 941 self.cwd = os.getcwd()
971 942
972 943 path = os.path.realpath(path)
973 944
974 945 created = False
975 946 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
976 947 self.wc = path
977 948 self.run0('update')
978 949 else:
979 950 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
980 951
981 952 if os.path.isdir(os.path.dirname(path)):
982 953 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
983 954 ui.status(_('initializing svn repo %r\n') %
984 955 os.path.basename(path))
985 956 commandline(ui, 'svnadmin').run0('create', path)
986 957 created = path
987 958 path = util.normpath(path)
988 959 if not path.startswith('/'):
989 960 path = '/' + path
990 961 path = 'file://' + path
991 962
992 963 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
993 964 self.run0('checkout', path, wcpath)
994 965
995 966 self.wc = wcpath
996 967 self.opener = util.opener(self.wc)
997 968 self.wopener = util.opener(self.wc)
998 969 self.childmap = mapfile(ui, self.join('hg-childmap'))
999 970 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1000 971
1001 972 if created:
1002 973 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1003 974 fp = open(hook, 'w')
1004 975 fp.write(pre_revprop_change)
1005 976 fp.close()
1006 977 util.set_flags(hook, False, True)
1007 978
1008 979 xport = transport.SvnRaTransport(url=geturl(path))
1009 980 self.uuid = svn.ra.get_uuid(xport.ra)
1010 981
1011 982 def wjoin(self, *names):
1012 983 return os.path.join(self.wc, *names)
1013 984
1014 985 def putfile(self, filename, flags, data):
1015 986 if 'l' in flags:
1016 987 self.wopener.symlink(data, filename)
1017 988 else:
1018 989 try:
1019 990 if os.path.islink(self.wjoin(filename)):
1020 991 os.unlink(filename)
1021 992 except OSError:
1022 993 pass
1023 994 self.wopener(filename, 'w').write(data)
1024 995
1025 996 if self.is_exec:
1026 997 was_exec = self.is_exec(self.wjoin(filename))
1027 998 else:
1028 999 # On filesystems not supporting execute-bit, there is no way
1029 1000 # to know if it is set but asking subversion. Setting it
1030 1001 # systematically is just as expensive and much simpler.
1031 1002 was_exec = 'x' not in flags
1032 1003
1033 1004 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1034 1005 if was_exec:
1035 1006 if 'x' not in flags:
1036 1007 self.delexec.append(filename)
1037 1008 else:
1038 1009 if 'x' in flags:
1039 1010 self.setexec.append(filename)
1040 1011
1041 1012 def _copyfile(self, source, dest):
1042 1013 # SVN's copy command pukes if the destination file exists, but
1043 1014 # our copyfile method expects to record a copy that has
1044 1015 # already occurred. Cross the semantic gap.
1045 1016 wdest = self.wjoin(dest)
1046 1017 exists = os.path.exists(wdest)
1047 1018 if exists:
1048 1019 fd, tempname = tempfile.mkstemp(
1049 1020 prefix='hg-copy-', dir=os.path.dirname(wdest))
1050 1021 os.close(fd)
1051 1022 os.unlink(tempname)
1052 1023 os.rename(wdest, tempname)
1053 1024 try:
1054 1025 self.run0('copy', source, dest)
1055 1026 finally:
1056 1027 if exists:
1057 1028 try:
1058 1029 os.unlink(wdest)
1059 1030 except OSError:
1060 1031 pass
1061 1032 os.rename(tempname, wdest)
1062 1033
1063 1034 def dirs_of(self, files):
1064 1035 dirs = set()
1065 1036 for f in files:
1066 1037 if os.path.isdir(self.wjoin(f)):
1067 1038 dirs.add(f)
1068 1039 for i in strutil.rfindall(f, '/'):
1069 1040 dirs.add(f[:i])
1070 1041 return dirs
1071 1042
1072 1043 def add_dirs(self, files):
1073 1044 add_dirs = [d for d in sorted(self.dirs_of(files))
1074 1045 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1075 1046 if add_dirs:
1076 1047 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1077 1048 return add_dirs
1078 1049
1079 1050 def add_files(self, files):
1080 1051 if files:
1081 1052 self.xargs(files, 'add', quiet=True)
1082 1053 return files
1083 1054
1084 1055 def tidy_dirs(self, names):
1085 1056 deleted = []
1086 1057 for d in sorted(self.dirs_of(names), reverse=True):
1087 1058 wd = self.wjoin(d)
1088 1059 if os.listdir(wd) == '.svn':
1089 1060 self.run0('delete', d)
1090 1061 deleted.append(d)
1091 1062 return deleted
1092 1063
1093 1064 def addchild(self, parent, child):
1094 1065 self.childmap[parent] = child
1095 1066
1096 1067 def revid(self, rev):
1097 1068 return u"svn:%s@%s" % (self.uuid, rev)
1098 1069
1099 1070 def putcommit(self, files, copies, parents, commit, source, revmap):
1100 1071 # Apply changes to working copy
1101 1072 for f, v in files:
1102 1073 try:
1103 1074 data = source.getfile(f, v)
1104 1075 except IOError:
1105 1076 self.delete.append(f)
1106 1077 else:
1107 1078 e = source.getmode(f, v)
1108 1079 self.putfile(f, e, data)
1109 1080 if f in copies:
1110 1081 self.copies.append([copies[f], f])
1111 1082 files = [f[0] for f in files]
1112 1083
1113 1084 for parent in parents:
1114 1085 try:
1115 1086 return self.revid(self.childmap[parent])
1116 1087 except KeyError:
1117 1088 pass
1118 1089 entries = set(self.delete)
1119 1090 files = frozenset(files)
1120 1091 entries.update(self.add_dirs(files.difference(entries)))
1121 1092 if self.copies:
1122 1093 for s, d in self.copies:
1123 1094 self._copyfile(s, d)
1124 1095 self.copies = []
1125 1096 if self.delete:
1126 1097 self.xargs(self.delete, 'delete')
1127 1098 self.delete = []
1128 1099 entries.update(self.add_files(files.difference(entries)))
1129 1100 entries.update(self.tidy_dirs(entries))
1130 1101 if self.delexec:
1131 1102 self.xargs(self.delexec, 'propdel', 'svn:executable')
1132 1103 self.delexec = []
1133 1104 if self.setexec:
1134 1105 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1135 1106 self.setexec = []
1136 1107
1137 1108 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1138 1109 fp = os.fdopen(fd, 'w')
1139 1110 fp.write(commit.desc)
1140 1111 fp.close()
1141 1112 try:
1142 1113 output = self.run0('commit',
1143 1114 username=util.shortuser(commit.author),
1144 1115 file=messagefile,
1145 1116 encoding='utf-8')
1146 1117 try:
1147 1118 rev = self.commit_re.search(output).group(1)
1148 1119 except AttributeError:
1149 1120 self.ui.warn(_('unexpected svn output:\n'))
1150 1121 self.ui.warn(output)
1151 1122 raise util.Abort(_('unable to cope with svn output'))
1152 1123 if commit.rev:
1153 1124 self.run('propset', 'hg:convert-rev', commit.rev,
1154 1125 revprop=True, revision=rev)
1155 1126 if commit.branch and commit.branch != 'default':
1156 1127 self.run('propset', 'hg:convert-branch', commit.branch,
1157 1128 revprop=True, revision=rev)
1158 1129 for parent in parents:
1159 1130 self.addchild(parent, rev)
1160 1131 return self.revid(rev)
1161 1132 finally:
1162 1133 os.unlink(messagefile)
1163 1134
1164 1135 def puttags(self, tags):
1165 1136 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now