##// END OF EJS Templates
Merge with crew-stable
Patrick Mezard -
r7382:f5f6b7dc merge default
parent child Browse files
Show More
@@ -1,1163 +1,1172 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24 import urllib
25 25
26 26 from mercurial import strutil, util
27 27 from mercurial.i18n import _
28 28
29 29 # Subversion stuff. Works best with very recent Python SVN bindings
30 30 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
31 31 # these bindings.
32 32
33 33 from cStringIO import StringIO
34 34
35 35 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
36 36 from common import commandline, converter_sink, mapfile
37 37
38 38 try:
39 39 from svn.core import SubversionException, Pool
40 40 import svn
41 41 import svn.client
42 42 import svn.core
43 43 import svn.ra
44 44 import svn.delta
45 45 import transport
46 46 except ImportError:
47 47 pass
48 48
49 class SvnPathNotFound(Exception):
50 pass
51
49 52 def geturl(path):
50 53 try:
51 54 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
52 55 except SubversionException:
53 56 pass
54 57 if os.path.isdir(path):
55 58 path = os.path.normpath(os.path.abspath(path))
56 59 if os.name == 'nt':
57 60 path = '/' + util.normpath(path)
58 61 return 'file://%s' % urllib.quote(path)
59 62 return path
60 63
61 64 def optrev(number):
62 65 optrev = svn.core.svn_opt_revision_t()
63 66 optrev.kind = svn.core.svn_opt_revision_number
64 67 optrev.value.number = number
65 68 return optrev
66 69
67 70 class changedpath(object):
68 71 def __init__(self, p):
69 72 self.copyfrom_path = p.copyfrom_path
70 73 self.copyfrom_rev = p.copyfrom_rev
71 74 self.action = p.action
72 75
73 76 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
74 77 strict_node_history=False):
75 78 protocol = -1
76 79 def receiver(orig_paths, revnum, author, date, message, pool):
77 80 if orig_paths is not None:
78 81 for k, v in orig_paths.iteritems():
79 82 orig_paths[k] = changedpath(v)
80 83 pickle.dump((orig_paths, revnum, author, date, message),
81 84 fp, protocol)
82 85
83 86 try:
84 87 # Use an ra of our own so that our parent can consume
85 88 # our results without confusing the server.
86 89 t = transport.SvnRaTransport(url=url)
87 90 svn.ra.get_log(t.ra, paths, start, end, limit,
88 91 discover_changed_paths,
89 92 strict_node_history,
90 93 receiver)
91 94 except SubversionException, (inst, num):
92 95 pickle.dump(num, fp, protocol)
93 96 except IOError:
94 97 # Caller may interrupt the iteration
95 98 pickle.dump(None, fp, protocol)
96 99 else:
97 100 pickle.dump(None, fp, protocol)
98 101 fp.close()
99 102 # With large history, cleanup process goes crazy and suddenly
100 103 # consumes *huge* amount of memory. The output file being closed,
101 104 # there is no need for clean termination.
102 105 os._exit(0)
103 106
104 107 def debugsvnlog(ui, **opts):
105 108 """Fetch SVN log in a subprocess and channel them back to parent to
106 109 avoid memory collection issues.
107 110 """
108 111 util.set_binary(sys.stdin)
109 112 util.set_binary(sys.stdout)
110 113 args = decodeargs(sys.stdin.read())
111 114 get_log_child(sys.stdout, *args)
112 115
113 116 class logstream:
114 117 """Interruptible revision log iterator."""
115 118 def __init__(self, stdout):
116 119 self._stdout = stdout
117 120
118 121 def __iter__(self):
119 122 while True:
120 123 entry = pickle.load(self._stdout)
121 124 try:
122 125 orig_paths, revnum, author, date, message = entry
123 126 except:
124 127 if entry is None:
125 128 break
126 129 raise SubversionException("child raised exception", entry)
127 130 yield entry
128 131
129 132 def close(self):
130 133 if self._stdout:
131 134 self._stdout.close()
132 135 self._stdout = None
133 136
134 137 # SVN conversion code stolen from bzr-svn and tailor
135 138 #
136 139 # Subversion looks like a versioned filesystem, branches structures
137 140 # are defined by conventions and not enforced by the tool. First,
138 141 # we define the potential branches (modules) as "trunk" and "branches"
139 142 # children directories. Revisions are then identified by their
140 143 # module and revision number (and a repository identifier).
141 144 #
142 145 # The revision graph is really a tree (or a forest). By default, a
143 146 # revision parent is the previous revision in the same module. If the
144 147 # module directory is copied/moved from another module then the
145 148 # revision is the module root and its parent the source revision in
146 149 # the parent module. A revision has at most one parent.
147 150 #
148 151 class svn_source(converter_source):
149 152 def __init__(self, ui, url, rev=None):
150 153 super(svn_source, self).__init__(ui, url, rev=rev)
151 154
152 155 try:
153 156 SubversionException
154 157 except NameError:
155 158 raise NoRepo('Subversion python bindings could not be loaded')
156 159
157 160 self.encoding = locale.getpreferredencoding()
158 161 self.lastrevs = {}
159 162
160 163 latest = None
161 164 try:
162 165 # Support file://path@rev syntax. Useful e.g. to convert
163 166 # deleted branches.
164 167 at = url.rfind('@')
165 168 if at >= 0:
166 169 latest = int(url[at+1:])
167 170 url = url[:at]
168 171 except ValueError, e:
169 172 pass
170 173 self.url = geturl(url)
171 174 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
172 175 try:
173 176 self.transport = transport.SvnRaTransport(url=self.url)
174 177 self.ra = self.transport.ra
175 178 self.ctx = self.transport.client
176 179 self.baseurl = svn.ra.get_repos_root(self.ra)
177 180 # Module is either empty or a repository path starting with
178 181 # a slash and not ending with a slash.
179 182 self.module = urllib.unquote(self.url[len(self.baseurl):])
180 183 self.prevmodule = None
181 184 self.rootmodule = self.module
182 185 self.commits = {}
183 186 self.paths = {}
184 187 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
185 188 except SubversionException, e:
186 189 ui.print_exc()
187 190 raise NoRepo("%s does not look like a Subversion repo" % self.url)
188 191
189 192 if rev:
190 193 try:
191 194 latest = int(rev)
192 195 except ValueError:
193 196 raise util.Abort(_('svn: revision %s is not an integer') % rev)
194 197
195 198 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
196 199 try:
197 200 self.startrev = int(self.startrev)
198 201 if self.startrev < 0:
199 202 self.startrev = 0
200 203 except ValueError:
201 204 raise util.Abort(_('svn: start revision %s is not an integer')
202 205 % self.startrev)
203 206
204 207 try:
205 208 self.get_blacklist()
206 209 except IOError, e:
207 210 pass
208 211
209 212 self.head = self.latest(self.module, latest)
210 213 if not self.head:
211 214 raise util.Abort(_('no revision found in module %s') %
212 215 self.module.encode(self.encoding))
213 216 self.last_changed = self.revnum(self.head)
214 217
215 218 self._changescache = None
216 219
217 220 if os.path.exists(os.path.join(url, '.svn/entries')):
218 221 self.wc = url
219 222 else:
220 223 self.wc = None
221 224 self.convertfp = None
222 225
223 226 def setrevmap(self, revmap):
224 227 lastrevs = {}
225 228 for revid in revmap.iterkeys():
226 229 uuid, module, revnum = self.revsplit(revid)
227 230 lastrevnum = lastrevs.setdefault(module, revnum)
228 231 if revnum > lastrevnum:
229 232 lastrevs[module] = revnum
230 233 self.lastrevs = lastrevs
231 234
232 235 def exists(self, path, optrev):
233 236 try:
234 237 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
235 238 optrev, False, self.ctx)
236 239 return True
237 240 except SubversionException, err:
238 241 return False
239 242
240 243 def getheads(self):
241 244
242 245 def isdir(path, revnum):
243 246 kind = self._checkpath(path, revnum)
244 247 return kind == svn.core.svn_node_dir
245 248
246 249 def getcfgpath(name, rev):
247 250 cfgpath = self.ui.config('convert', 'svn.' + name)
248 251 if cfgpath is not None and cfgpath.strip() == '':
249 252 return None
250 253 path = (cfgpath or name).strip('/')
251 254 if not self.exists(path, rev):
252 255 if cfgpath:
253 256 raise util.Abort(_('expected %s to be at %r, but not found')
254 257 % (name, path))
255 258 return None
256 259 self.ui.note(_('found %s at %r\n') % (name, path))
257 260 return path
258 261
259 262 rev = optrev(self.last_changed)
260 263 oldmodule = ''
261 264 trunk = getcfgpath('trunk', rev)
262 265 self.tags = getcfgpath('tags', rev)
263 266 branches = getcfgpath('branches', rev)
264 267
265 268 # If the project has a trunk or branches, we will extract heads
266 269 # from them. We keep the project root otherwise.
267 270 if trunk:
268 271 oldmodule = self.module or ''
269 272 self.module += '/' + trunk
270 273 self.head = self.latest(self.module, self.last_changed)
271 274 if not self.head:
272 275 raise util.Abort(_('no revision found in module %s') %
273 276 self.module.encode(self.encoding))
274 277
275 278 # First head in the list is the module's head
276 279 self.heads = [self.head]
277 280 if self.tags is not None:
278 281 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
279 282
280 283 # Check if branches bring a few more heads to the list
281 284 if branches:
282 285 rpath = self.url.strip('/')
283 286 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
284 287 rev, False, self.ctx)
285 288 for branch in branchnames.keys():
286 289 module = '%s/%s/%s' % (oldmodule, branches, branch)
287 290 if not isdir(module, self.last_changed):
288 291 continue
289 292 brevid = self.latest(module, self.last_changed)
290 293 if not brevid:
291 294 self.ui.note(_('ignoring empty branch %s\n') %
292 295 branch.encode(self.encoding))
293 296 continue
294 297 self.ui.note(_('found branch %s at %d\n') %
295 298 (branch, self.revnum(brevid)))
296 299 self.heads.append(brevid)
297 300
298 301 if self.startrev and self.heads:
299 302 if len(self.heads) > 1:
300 303 raise util.Abort(_('svn: start revision is not supported with '
301 304 'with more than one branch'))
302 305 revnum = self.revnum(self.heads[0])
303 306 if revnum < self.startrev:
304 307 raise util.Abort(_('svn: no revision found after start revision %d')
305 308 % self.startrev)
306 309
307 310 return self.heads
308 311
309 312 def getfile(self, file, rev):
310 313 data, mode = self._getfile(file, rev)
311 314 self.modecache[(file, rev)] = mode
312 315 return data
313 316
314 317 def getmode(self, file, rev):
315 318 return self.modecache[(file, rev)]
316 319
317 320 def getchanges(self, rev):
318 321 if self._changescache and self._changescache[0] == rev:
319 322 return self._changescache[1]
320 323 self._changescache = None
321 324 self.modecache = {}
322 325 (paths, parents) = self.paths[rev]
323 326 if parents:
324 327 files, copies = self.expandpaths(rev, paths, parents)
325 328 else:
326 329 # Perform a full checkout on roots
327 330 uuid, module, revnum = self.revsplit(rev)
328 331 entries = svn.client.ls(self.baseurl + urllib.quote(module),
329 332 optrev(revnum), True, self.ctx)
330 333 files = [n for n,e in entries.iteritems()
331 334 if e.kind == svn.core.svn_node_file]
332 335 copies = {}
333 336
334 337 files.sort()
335 338 files = zip(files, [rev] * len(files))
336 339
337 340 # caller caches the result, so free it here to release memory
338 341 del self.paths[rev]
339 342 return (files, copies)
340 343
341 344 def getchangedfiles(self, rev, i):
342 345 changes = self.getchanges(rev)
343 346 self._changescache = (rev, changes)
344 347 return [f[0] for f in changes[0]]
345 348
346 349 def getcommit(self, rev):
347 350 if rev not in self.commits:
348 351 uuid, module, revnum = self.revsplit(rev)
349 352 self.module = module
350 353 self.reparent(module)
351 354 # We assume that:
352 355 # - requests for revisions after "stop" come from the
353 356 # revision graph backward traversal. Cache all of them
354 357 # down to stop, they will be used eventually.
355 358 # - requests for revisions before "stop" come to get
356 359 # isolated branches parents. Just fetch what is needed.
357 360 stop = self.lastrevs.get(module, 0)
358 361 if revnum < stop:
359 362 stop = revnum + 1
360 363 self._fetch_revisions(revnum, stop)
361 364 commit = self.commits[rev]
362 365 # caller caches the result, so free it here to release memory
363 366 del self.commits[rev]
364 367 return commit
365 368
366 369 def gettags(self):
367 370 tags = {}
368 371 if self.tags is None:
369 372 return tags
370 373
371 374 # svn tags are just a convention, project branches left in a
372 375 # 'tags' directory. There is no other relationship than
373 376 # ancestry, which is expensive to discover and makes them hard
374 377 # to update incrementally. Worse, past revisions may be
375 378 # referenced by tags far away in the future, requiring a deep
376 379 # history traversal on every calculation. Current code
377 380 # performs a single backward traversal, tracking moves within
378 381 # the tags directory (tag renaming) and recording a new tag
379 382 # everytime a project is copied from outside the tags
380 383 # directory. It also lists deleted tags, this behaviour may
381 384 # change in the future.
382 385 pendings = []
383 386 tagspath = self.tags
384 387 start = svn.ra.get_latest_revnum(self.ra)
385 388 try:
386 389 for entry in self._getlog([self.tags], start, self.startrev):
387 390 origpaths, revnum, author, date, message = entry
388 391 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
389 392 in origpaths.iteritems() if e.copyfrom_path]
390 393 copies.sort()
391 394 # Apply moves/copies from more specific to general
392 395 copies.reverse()
393 396
394 397 srctagspath = tagspath
395 398 if copies and copies[-1][2] == tagspath:
396 399 # Track tags directory moves
397 400 srctagspath = copies.pop()[0]
398 401
399 402 for source, sourcerev, dest in copies:
400 403 if not dest.startswith(tagspath + '/'):
401 404 continue
402 405 for tag in pendings:
403 406 if tag[0].startswith(dest):
404 407 tagpath = source + tag[0][len(dest):]
405 408 tag[:2] = [tagpath, sourcerev]
406 409 break
407 410 else:
408 411 pendings.append([source, sourcerev, dest.split('/')[-1]])
409 412
410 413 # Tell tag renamings from tag creations
411 414 remainings = []
412 415 for source, sourcerev, tagname in pendings:
413 416 if source.startswith(srctagspath):
414 417 remainings.append([source, sourcerev, tagname])
415 418 continue
416 419 # From revision may be fake, get one with changes
420 try:
417 421 tagid = self.latest(source, sourcerev)
418 422 if tagid:
419 423 tags[tagname] = tagid
424 except SvnPathNotFound:
425 # It happens when we are following directories we assumed
426 # were copied with their parents but were really created
427 # in the tag directory.
428 pass
420 429 pendings = remainings
421 430 tagspath = srctagspath
422 431
423 432 except SubversionException, (inst, num):
424 433 self.ui.note(_('no tags found at revision %d\n') % start)
425 434 return tags
426 435
427 436 def converted(self, rev, destrev):
428 437 if not self.wc:
429 438 return
430 439 if self.convertfp is None:
431 440 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
432 441 'a')
433 442 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
434 443 self.convertfp.flush()
435 444
436 445 # -- helper functions --
437 446
438 447 def revid(self, revnum, module=None):
439 448 if not module:
440 449 module = self.module
441 450 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
442 451 revnum)
443 452
444 453 def revnum(self, rev):
445 454 return int(rev.split('@')[-1])
446 455
447 456 def revsplit(self, rev):
448 457 url, revnum = rev.encode(self.encoding).split('@', 1)
449 458 revnum = int(revnum)
450 459 parts = url.split('/', 1)
451 460 uuid = parts.pop(0)[4:]
452 461 mod = ''
453 462 if parts:
454 463 mod = '/' + parts[0]
455 464 return uuid, mod, revnum
456 465
457 466 def latest(self, path, stop=0):
458 467 """Find the latest revid affecting path, up to stop. It may return
459 468 a revision in a different module, since a branch may be moved without
460 469 a change being reported. Return None if computed module does not
461 470 belong to rootmodule subtree.
462 471 """
463 472 if not path.startswith(self.rootmodule):
464 473 # Requests on foreign branches may be forbidden at server level
465 474 self.ui.debug(_('ignoring foreign branch %r\n') % path)
466 475 return None
467 476
468 477 if not stop:
469 478 stop = svn.ra.get_latest_revnum(self.ra)
470 479 try:
471 480 prevmodule = self.reparent('')
472 481 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
473 482 self.reparent(prevmodule)
474 483 except SubversionException:
475 484 dirent = None
476 485 if not dirent:
477 raise util.Abort(_('%s not found up to revision %d') % (path, stop))
486 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
478 487
479 488 # stat() gives us the previous revision on this line of development, but
480 489 # it might be in *another module*. Fetch the log and detect renames down
481 490 # to the latest revision.
482 491 stream = self._getlog([path], stop, dirent.created_rev)
483 492 try:
484 493 for entry in stream:
485 494 paths, revnum, author, date, message = entry
486 495 if revnum <= dirent.created_rev:
487 496 break
488 497
489 498 for p in paths:
490 499 if not path.startswith(p) or not paths[p].copyfrom_path:
491 500 continue
492 501 newpath = paths[p].copyfrom_path + path[len(p):]
493 502 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
494 503 (path, newpath, revnum))
495 504 path = newpath
496 505 break
497 506 finally:
498 507 stream.close()
499 508
500 509 if not path.startswith(self.rootmodule):
501 510 self.ui.debug(_('ignoring foreign branch %r\n') % path)
502 511 return None
503 512 return self.revid(dirent.created_rev, path)
504 513
505 514 def get_blacklist(self):
506 515 """Avoid certain revision numbers.
507 516 It is not uncommon for two nearby revisions to cancel each other
508 517 out, e.g. 'I copied trunk into a subdirectory of itself instead
509 518 of making a branch'. The converted repository is significantly
510 519 smaller if we ignore such revisions."""
511 520 self.blacklist = util.set()
512 521 blacklist = self.blacklist
513 522 for line in file("blacklist.txt", "r"):
514 523 if not line.startswith("#"):
515 524 try:
516 525 svn_rev = int(line.strip())
517 526 blacklist.add(svn_rev)
518 527 except ValueError, e:
519 528 pass # not an integer or a comment
520 529
521 530 def is_blacklisted(self, svn_rev):
522 531 return svn_rev in self.blacklist
523 532
524 533 def reparent(self, module):
525 534 """Reparent the svn transport and return the previous parent."""
526 535 if self.prevmodule == module:
527 536 return module
528 537 svnurl = self.baseurl + urllib.quote(module)
529 538 prevmodule = self.prevmodule
530 539 if prevmodule is None:
531 540 prevmodule = ''
532 541 self.ui.debug(_("reparent to %s\n") % svnurl)
533 542 svn.ra.reparent(self.ra, svnurl)
534 543 self.prevmodule = module
535 544 return prevmodule
536 545
537 546 def expandpaths(self, rev, paths, parents):
538 547 entries = []
539 548 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
540 549 copies = {}
541 550
542 551 new_module, revnum = self.revsplit(rev)[1:]
543 552 if new_module != self.module:
544 553 self.module = new_module
545 554 self.reparent(self.module)
546 555
547 556 for path, ent in paths:
548 557 entrypath = self.getrelpath(path)
549 558 entry = entrypath.decode(self.encoding)
550 559
551 560 kind = self._checkpath(entrypath, revnum)
552 561 if kind == svn.core.svn_node_file:
553 562 entries.append(self.recode(entry))
554 563 if not ent.copyfrom_path or not parents:
555 564 continue
556 565 # Copy sources not in parent revisions cannot be represented,
557 566 # ignore their origin for now
558 567 pmodule, prevnum = self.revsplit(parents[0])[1:]
559 568 if ent.copyfrom_rev < prevnum:
560 569 continue
561 570 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
562 571 if not copyfrom_path:
563 572 continue
564 573 self.ui.debug(_("copied to %s from %s@%s\n") %
565 574 (entrypath, copyfrom_path, ent.copyfrom_rev))
566 575 copies[self.recode(entry)] = self.recode(copyfrom_path)
567 576 elif kind == 0: # gone, but had better be a deleted *file*
568 577 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
569 578
570 579 # if a branch is created but entries are removed in the same
571 580 # changeset, get the right fromrev
572 581 # parents cannot be empty here, you cannot remove things from
573 582 # a root revision.
574 583 uuid, old_module, fromrev = self.revsplit(parents[0])
575 584
576 585 basepath = old_module + "/" + self.getrelpath(path)
577 586 entrypath = basepath
578 587
579 588 def lookup_parts(p):
580 589 rc = None
581 590 parts = p.split("/")
582 591 for i in range(len(parts)):
583 592 part = "/".join(parts[:i])
584 593 info = part, copyfrom.get(part, None)
585 594 if info[1] is not None:
586 595 self.ui.debug(_("Found parent directory %s\n") % info[1])
587 596 rc = info
588 597 return rc
589 598
590 599 self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
591 600
592 601 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
593 602
594 603 # need to remove fragment from lookup_parts and replace with copyfrom_path
595 604 if frompath is not None:
596 605 self.ui.debug(_("munge-o-matic\n"))
597 606 self.ui.debug(entrypath + '\n')
598 607 self.ui.debug(entrypath[len(frompath):] + '\n')
599 608 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
600 609 fromrev = froment.copyfrom_rev
601 610 self.ui.debug(_("Info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
602 611
603 612 # We can avoid the reparent calls if the module has not changed
604 613 # but it probably does not worth the pain.
605 614 prevmodule = self.reparent('')
606 615 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
607 616 self.reparent(prevmodule)
608 617
609 618 if fromkind == svn.core.svn_node_file: # a deleted file
610 619 entries.append(self.recode(entry))
611 620 elif fromkind == svn.core.svn_node_dir:
612 621 # print "Deleted/moved non-file:", revnum, path, ent
613 622 # children = self._find_children(path, revnum - 1)
614 623 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
615 624 # Sometimes this is tricky. For example: in
616 625 # The Subversion Repository revision 6940 a dir
617 626 # was copied and one of its files was deleted
618 627 # from the new location in the same commit. This
619 628 # code can't deal with that yet.
620 629 if ent.action == 'C':
621 630 children = self._find_children(path, fromrev)
622 631 else:
623 632 oroot = entrypath.strip('/')
624 633 nroot = path.strip('/')
625 634 children = self._find_children(oroot, fromrev)
626 635 children = [s.replace(oroot,nroot) for s in children]
627 636 # Mark all [files, not directories] as deleted.
628 637 for child in children:
629 638 # Can we move a child directory and its
630 639 # parent in the same commit? (probably can). Could
631 640 # cause problems if instead of revnum -1,
632 641 # we have to look in (copyfrom_path, revnum - 1)
633 642 entrypath = self.getrelpath("/" + child, module=old_module)
634 643 if entrypath:
635 644 entry = self.recode(entrypath.decode(self.encoding))
636 645 if entry in copies:
637 646 # deleted file within a copy
638 647 del copies[entry]
639 648 else:
640 649 entries.append(entry)
641 650 else:
642 651 self.ui.debug(_('unknown path in revision %d: %s\n') % \
643 652 (revnum, path))
644 653 elif kind == svn.core.svn_node_dir:
645 654 # Should probably synthesize normal file entries
646 655 # and handle as above to clean up copy/rename handling.
647 656
648 657 # If the directory just had a prop change,
649 658 # then we shouldn't need to look for its children.
650 659 if ent.action == 'M':
651 660 continue
652 661
653 662 # Also this could create duplicate entries. Not sure
654 663 # whether this will matter. Maybe should make entries a set.
655 664 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
656 665 # This will fail if a directory was copied
657 666 # from another branch and then some of its files
658 667 # were deleted in the same transaction.
659 668 children = util.sort(self._find_children(path, revnum))
660 669 for child in children:
661 670 # Can we move a child directory and its
662 671 # parent in the same commit? (probably can). Could
663 672 # cause problems if instead of revnum -1,
664 673 # we have to look in (copyfrom_path, revnum - 1)
665 674 entrypath = self.getrelpath("/" + child)
666 675 # print child, self.module, entrypath
667 676 if entrypath:
668 677 # Need to filter out directories here...
669 678 kind = self._checkpath(entrypath, revnum)
670 679 if kind != svn.core.svn_node_dir:
671 680 entries.append(self.recode(entrypath))
672 681
673 682 # Copies here (must copy all from source)
674 683 # Probably not a real problem for us if
675 684 # source does not exist
676 685 if not ent.copyfrom_path or not parents:
677 686 continue
678 687 # Copy sources not in parent revisions cannot be represented,
679 688 # ignore their origin for now
680 689 pmodule, prevnum = self.revsplit(parents[0])[1:]
681 690 if ent.copyfrom_rev < prevnum:
682 691 continue
683 692 copyfrompath = ent.copyfrom_path.decode(self.encoding)
684 693 copyfrompath = self.getrelpath(copyfrompath, pmodule)
685 694 if not copyfrompath:
686 695 continue
687 696 copyfrom[path] = ent
688 697 self.ui.debug(_("mark %s came from %s:%d\n")
689 698 % (path, copyfrompath, ent.copyfrom_rev))
690 699 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
691 700 children.sort()
692 701 for child in children:
693 702 entrypath = self.getrelpath("/" + child, pmodule)
694 703 if not entrypath:
695 704 continue
696 705 entry = entrypath.decode(self.encoding)
697 706 copytopath = path + entry[len(copyfrompath):]
698 707 copytopath = self.getrelpath(copytopath)
699 708 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
700 709
701 710 return (util.unique(entries), copies)
702 711
703 712 def _fetch_revisions(self, from_revnum, to_revnum):
704 713 if from_revnum < to_revnum:
705 714 from_revnum, to_revnum = to_revnum, from_revnum
706 715
707 716 self.child_cset = None
708 717
709 718 def isdescendantof(parent, child):
710 719 if not child or not parent or not child.startswith(parent):
711 720 return False
712 721 subpath = child[len(parent):]
713 722 return len(subpath) > 1 and subpath[0] == '/'
714 723
715 724 def parselogentry(orig_paths, revnum, author, date, message):
716 725 """Return the parsed commit object or None, and True if
717 726 the revision is a branch root.
718 727 """
719 728 self.ui.debug(_("parsing revision %d (%d changes)\n") %
720 729 (revnum, len(orig_paths)))
721 730
722 731 branched = False
723 732 rev = self.revid(revnum)
724 733 # branch log might return entries for a parent we already have
725 734
726 735 if (rev in self.commits or revnum < to_revnum):
727 736 return None, branched
728 737
729 738 parents = []
730 739 # check whether this revision is the start of a branch or part
731 740 # of a branch renaming
732 741 orig_paths = util.sort(orig_paths.items())
733 742 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
734 743 if root_paths:
735 744 path, ent = root_paths[-1]
736 745 if ent.copyfrom_path:
737 746 # If dir was moved while one of its file was removed
738 747 # the log may look like:
739 748 # A /dir (from /dir:x)
740 749 # A /dir/a (from /dir/a:y)
741 750 # A /dir/b (from /dir/b:z)
742 751 # ...
743 752 # for all remaining children.
744 753 # Let's take the highest child element from rev as source.
745 754 copies = [(p,e) for p,e in orig_paths[:-1]
746 755 if isdescendantof(ent.copyfrom_path, e.copyfrom_path)]
747 756 fromrev = max([e.copyfrom_rev for p,e in copies] + [ent.copyfrom_rev])
748 757 branched = True
749 758 newpath = ent.copyfrom_path + self.module[len(path):]
750 759 # ent.copyfrom_rev may not be the actual last revision
751 760 previd = self.latest(newpath, fromrev)
752 761 if previd is not None:
753 762 prevmodule, prevnum = self.revsplit(previd)[1:]
754 763 if prevnum >= self.startrev:
755 764 parents = [previd]
756 765 self.ui.note(_('found parent of branch %s at %d: %s\n') %
757 766 (self.module, prevnum, prevmodule))
758 767 else:
759 768 self.ui.debug(_("No copyfrom path, don't know what to do.\n"))
760 769
761 770 paths = []
762 771 # filter out unrelated paths
763 772 for path, ent in orig_paths:
764 773 if self.getrelpath(path) is None:
765 774 continue
766 775 paths.append((path, ent))
767 776
768 777 # Example SVN datetime. Includes microseconds.
769 778 # ISO-8601 conformant
770 779 # '2007-01-04T17:35:00.902377Z'
771 780 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
772 781
773 782 log = message and self.recode(message) or ''
774 783 author = author and self.recode(author) or ''
775 784 try:
776 785 branch = self.module.split("/")[-1]
777 786 if branch == 'trunk':
778 787 branch = ''
779 788 except IndexError:
780 789 branch = None
781 790
782 791 cset = commit(author=author,
783 792 date=util.datestr(date),
784 793 desc=log,
785 794 parents=parents,
786 795 branch=branch,
787 796 rev=rev.encode('utf-8'))
788 797
789 798 self.commits[rev] = cset
790 799 # The parents list is *shared* among self.paths and the
791 800 # commit object. Both will be updated below.
792 801 self.paths[rev] = (paths, cset.parents)
793 802 if self.child_cset and not self.child_cset.parents:
794 803 self.child_cset.parents[:] = [rev]
795 804 self.child_cset = cset
796 805 return cset, branched
797 806
798 807 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
799 808 (self.module, from_revnum, to_revnum))
800 809
801 810 try:
802 811 firstcset = None
803 812 lastonbranch = False
804 813 stream = self._getlog([self.module], from_revnum, to_revnum)
805 814 try:
806 815 for entry in stream:
807 816 paths, revnum, author, date, message = entry
808 817 if revnum < self.startrev:
809 818 lastonbranch = True
810 819 break
811 820 if self.is_blacklisted(revnum):
812 821 self.ui.note(_('skipping blacklisted revision %d\n')
813 822 % revnum)
814 823 continue
815 824 if paths is None:
816 825 self.ui.debug(_('revision %d has no entries\n') % revnum)
817 826 continue
818 827 cset, lastonbranch = parselogentry(paths, revnum, author,
819 828 date, message)
820 829 if cset:
821 830 firstcset = cset
822 831 if lastonbranch:
823 832 break
824 833 finally:
825 834 stream.close()
826 835
827 836 if not lastonbranch and firstcset and not firstcset.parents:
828 837 # The first revision of the sequence (the last fetched one)
829 838 # has invalid parents if not a branch root. Find the parent
830 839 # revision now, if any.
831 840 try:
832 841 firstrevnum = self.revnum(firstcset.rev)
833 842 if firstrevnum > 1:
834 843 latest = self.latest(self.module, firstrevnum - 1)
835 844 if latest:
836 845 firstcset.parents.append(latest)
837 except util.Abort:
846 except SvnPathNotFound:
838 847 pass
839 848 except SubversionException, (inst, num):
840 849 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
841 850 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
842 851 raise
843 852
844 853 def _getfile(self, file, rev):
845 854 io = StringIO()
846 855 # TODO: ra.get_file transmits the whole file instead of diffs.
847 856 mode = ''
848 857 try:
849 858 new_module, revnum = self.revsplit(rev)[1:]
850 859 if self.module != new_module:
851 860 self.module = new_module
852 861 self.reparent(self.module)
853 862 info = svn.ra.get_file(self.ra, file, revnum, io)
854 863 if isinstance(info, list):
855 864 info = info[-1]
856 865 mode = ("svn:executable" in info) and 'x' or ''
857 866 mode = ("svn:special" in info) and 'l' or mode
858 867 except SubversionException, e:
859 868 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
860 869 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
861 870 if e.apr_err in notfound: # File not found
862 871 raise IOError()
863 872 raise
864 873 data = io.getvalue()
865 874 if mode == 'l':
866 875 link_prefix = "link "
867 876 if data.startswith(link_prefix):
868 877 data = data[len(link_prefix):]
869 878 return data, mode
870 879
871 880 def _find_children(self, path, revnum):
872 881 path = path.strip('/')
873 882 pool = Pool()
874 883 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
875 884 return ['%s/%s' % (path, x) for x in
876 885 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
877 886
878 887 def getrelpath(self, path, module=None):
879 888 if module is None:
880 889 module = self.module
881 890 # Given the repository url of this wc, say
882 891 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
883 892 # extract the "entry" portion (a relative path) from what
884 893 # svn log --xml says, ie
885 894 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
886 895 # that is to say "tests/PloneTestCase.py"
887 896 if path.startswith(module):
888 897 relative = path.rstrip('/')[len(module):]
889 898 if relative.startswith('/'):
890 899 return relative[1:]
891 900 elif relative == '':
892 901 return relative
893 902
894 903 # The path is outside our tracked tree...
895 904 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
896 905 return None
897 906
898 907 def _checkpath(self, path, revnum):
899 908 # ra.check_path does not like leading slashes very much, it leads
900 909 # to PROPFIND subversion errors
901 910 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
902 911
903 912 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
904 913 strict_node_history=False):
905 914 # Normalize path names, svn >= 1.5 only wants paths relative to
906 915 # supplied URL
907 916 relpaths = []
908 917 for p in paths:
909 918 if not p.startswith('/'):
910 919 p = self.module + '/' + p
911 920 relpaths.append(p.strip('/'))
912 921 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
913 922 strict_node_history]
914 923 arg = encodeargs(args)
915 924 hgexe = util.hgexecutable()
916 925 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
917 926 stdin, stdout = util.popen2(cmd, 'b')
918 927 stdin.write(arg)
919 928 stdin.close()
920 929 return logstream(stdout)
921 930
922 931 pre_revprop_change = '''#!/bin/sh
923 932
924 933 REPOS="$1"
925 934 REV="$2"
926 935 USER="$3"
927 936 PROPNAME="$4"
928 937 ACTION="$5"
929 938
930 939 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
931 940 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
932 941 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
933 942
934 943 echo "Changing prohibited revision property" >&2
935 944 exit 1
936 945 '''
937 946
938 947 class svn_sink(converter_sink, commandline):
939 948 commit_re = re.compile(r'Committed revision (\d+).', re.M)
940 949
941 950 def prerun(self):
942 951 if self.wc:
943 952 os.chdir(self.wc)
944 953
945 954 def postrun(self):
946 955 if self.wc:
947 956 os.chdir(self.cwd)
948 957
949 958 def join(self, name):
950 959 return os.path.join(self.wc, '.svn', name)
951 960
952 961 def revmapfile(self):
953 962 return self.join('hg-shamap')
954 963
955 964 def authorfile(self):
956 965 return self.join('hg-authormap')
957 966
958 967 def __init__(self, ui, path):
959 968 converter_sink.__init__(self, ui, path)
960 969 commandline.__init__(self, ui, 'svn')
961 970 self.delete = []
962 971 self.setexec = []
963 972 self.delexec = []
964 973 self.copies = []
965 974 self.wc = None
966 975 self.cwd = os.getcwd()
967 976
968 977 path = os.path.realpath(path)
969 978
970 979 created = False
971 980 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
972 981 self.wc = path
973 982 self.run0('update')
974 983 else:
975 984 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
976 985
977 986 if os.path.isdir(os.path.dirname(path)):
978 987 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
979 988 ui.status(_('initializing svn repo %r\n') %
980 989 os.path.basename(path))
981 990 commandline(ui, 'svnadmin').run0('create', path)
982 991 created = path
983 992 path = util.normpath(path)
984 993 if not path.startswith('/'):
985 994 path = '/' + path
986 995 path = 'file://' + path
987 996
988 997 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
989 998 self.run0('checkout', path, wcpath)
990 999
991 1000 self.wc = wcpath
992 1001 self.opener = util.opener(self.wc)
993 1002 self.wopener = util.opener(self.wc)
994 1003 self.childmap = mapfile(ui, self.join('hg-childmap'))
995 1004 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
996 1005
997 1006 if created:
998 1007 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
999 1008 fp = open(hook, 'w')
1000 1009 fp.write(pre_revprop_change)
1001 1010 fp.close()
1002 1011 util.set_flags(hook, False, True)
1003 1012
1004 1013 xport = transport.SvnRaTransport(url=geturl(path))
1005 1014 self.uuid = svn.ra.get_uuid(xport.ra)
1006 1015
1007 1016 def wjoin(self, *names):
1008 1017 return os.path.join(self.wc, *names)
1009 1018
1010 1019 def putfile(self, filename, flags, data):
1011 1020 if 'l' in flags:
1012 1021 self.wopener.symlink(data, filename)
1013 1022 else:
1014 1023 try:
1015 1024 if os.path.islink(self.wjoin(filename)):
1016 1025 os.unlink(filename)
1017 1026 except OSError:
1018 1027 pass
1019 1028 self.wopener(filename, 'w').write(data)
1020 1029
1021 1030 if self.is_exec:
1022 1031 was_exec = self.is_exec(self.wjoin(filename))
1023 1032 else:
1024 1033 # On filesystems not supporting execute-bit, there is no way
1025 1034 # to know if it is set but asking subversion. Setting it
1026 1035 # systematically is just as expensive and much simpler.
1027 1036 was_exec = 'x' not in flags
1028 1037
1029 1038 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1030 1039 if was_exec:
1031 1040 if 'x' not in flags:
1032 1041 self.delexec.append(filename)
1033 1042 else:
1034 1043 if 'x' in flags:
1035 1044 self.setexec.append(filename)
1036 1045
1037 1046 def _copyfile(self, source, dest):
1038 1047 # SVN's copy command pukes if the destination file exists, but
1039 1048 # our copyfile method expects to record a copy that has
1040 1049 # already occurred. Cross the semantic gap.
1041 1050 wdest = self.wjoin(dest)
1042 1051 exists = os.path.exists(wdest)
1043 1052 if exists:
1044 1053 fd, tempname = tempfile.mkstemp(
1045 1054 prefix='hg-copy-', dir=os.path.dirname(wdest))
1046 1055 os.close(fd)
1047 1056 os.unlink(tempname)
1048 1057 os.rename(wdest, tempname)
1049 1058 try:
1050 1059 self.run0('copy', source, dest)
1051 1060 finally:
1052 1061 if exists:
1053 1062 try:
1054 1063 os.unlink(wdest)
1055 1064 except OSError:
1056 1065 pass
1057 1066 os.rename(tempname, wdest)
1058 1067
1059 1068 def dirs_of(self, files):
1060 1069 dirs = util.set()
1061 1070 for f in files:
1062 1071 if os.path.isdir(self.wjoin(f)):
1063 1072 dirs.add(f)
1064 1073 for i in strutil.rfindall(f, '/'):
1065 1074 dirs.add(f[:i])
1066 1075 return dirs
1067 1076
1068 1077 def add_dirs(self, files):
1069 1078 add_dirs = [d for d in util.sort(self.dirs_of(files))
1070 1079 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1071 1080 if add_dirs:
1072 1081 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1073 1082 return add_dirs
1074 1083
1075 1084 def add_files(self, files):
1076 1085 if files:
1077 1086 self.xargs(files, 'add', quiet=True)
1078 1087 return files
1079 1088
1080 1089 def tidy_dirs(self, names):
1081 1090 dirs = util.sort(self.dirs_of(names))
1082 1091 dirs.reverse()
1083 1092 deleted = []
1084 1093 for d in dirs:
1085 1094 wd = self.wjoin(d)
1086 1095 if os.listdir(wd) == '.svn':
1087 1096 self.run0('delete', d)
1088 1097 deleted.append(d)
1089 1098 return deleted
1090 1099
1091 1100 def addchild(self, parent, child):
1092 1101 self.childmap[parent] = child
1093 1102
1094 1103 def revid(self, rev):
1095 1104 return u"svn:%s@%s" % (self.uuid, rev)
1096 1105
1097 1106 def putcommit(self, files, copies, parents, commit, source):
1098 1107 # Apply changes to working copy
1099 1108 for f, v in files:
1100 1109 try:
1101 1110 data = source.getfile(f, v)
1102 1111 except IOError, inst:
1103 1112 self.delete.append(f)
1104 1113 else:
1105 1114 e = source.getmode(f, v)
1106 1115 self.putfile(f, e, data)
1107 1116 if f in copies:
1108 1117 self.copies.append([copies[f], f])
1109 1118 files = [f[0] for f in files]
1110 1119
1111 1120 for parent in parents:
1112 1121 try:
1113 1122 return self.revid(self.childmap[parent])
1114 1123 except KeyError:
1115 1124 pass
1116 1125 entries = util.set(self.delete)
1117 1126 files = util.frozenset(files)
1118 1127 entries.update(self.add_dirs(files.difference(entries)))
1119 1128 if self.copies:
1120 1129 for s, d in self.copies:
1121 1130 self._copyfile(s, d)
1122 1131 self.copies = []
1123 1132 if self.delete:
1124 1133 self.xargs(self.delete, 'delete')
1125 1134 self.delete = []
1126 1135 entries.update(self.add_files(files.difference(entries)))
1127 1136 entries.update(self.tidy_dirs(entries))
1128 1137 if self.delexec:
1129 1138 self.xargs(self.delexec, 'propdel', 'svn:executable')
1130 1139 self.delexec = []
1131 1140 if self.setexec:
1132 1141 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1133 1142 self.setexec = []
1134 1143
1135 1144 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1136 1145 fp = os.fdopen(fd, 'w')
1137 1146 fp.write(commit.desc)
1138 1147 fp.close()
1139 1148 try:
1140 1149 output = self.run0('commit',
1141 1150 username=util.shortuser(commit.author),
1142 1151 file=messagefile,
1143 1152 encoding='utf-8')
1144 1153 try:
1145 1154 rev = self.commit_re.search(output).group(1)
1146 1155 except AttributeError:
1147 1156 self.ui.warn(_('unexpected svn output:\n'))
1148 1157 self.ui.warn(output)
1149 1158 raise util.Abort(_('unable to cope with svn output'))
1150 1159 if commit.rev:
1151 1160 self.run('propset', 'hg:convert-rev', commit.rev,
1152 1161 revprop=True, revision=rev)
1153 1162 if commit.branch and commit.branch != 'default':
1154 1163 self.run('propset', 'hg:convert-branch', commit.branch,
1155 1164 revprop=True, revision=rev)
1156 1165 for parent in parents:
1157 1166 self.addchild(parent, rev)
1158 1167 return self.revid(rev)
1159 1168 finally:
1160 1169 os.unlink(messagefile)
1161 1170
1162 1171 def puttags(self, tags):
1163 1172 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now