##// END OF EJS Templates
convert: normalize paths sent to svn get_log (issue 1219)
Patrick Mezard -
r6850:c6bb8fae default
parent child Browse files
Show More
@@ -1,1151 +1,1158 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98 # With large history, cleanup process goes crazy and suddenly
99 99 # consumes *huge* amount of memory. The output file being closed,
100 100 # there is no need for clean termination.
101 101 os._exit(0)
102 102
103 103 def debugsvnlog(ui, **opts):
104 104 """Fetch SVN log in a subprocess and channel them back to parent to
105 105 avoid memory collection issues.
106 106 """
107 107 util.set_binary(sys.stdin)
108 108 util.set_binary(sys.stdout)
109 109 args = decodeargs(sys.stdin.read())
110 110 get_log_child(sys.stdout, *args)
111 111
112 112 class logstream:
113 113 """Interruptible revision log iterator."""
114 114 def __init__(self, stdout):
115 115 self._stdout = stdout
116 116
117 117 def __iter__(self):
118 118 while True:
119 119 entry = pickle.load(self._stdout)
120 120 try:
121 121 orig_paths, revnum, author, date, message = entry
122 122 except:
123 123 if entry is None:
124 124 break
125 125 raise SubversionException("child raised exception", entry)
126 126 yield entry
127 127
128 128 def close(self):
129 129 if self._stdout:
130 130 self._stdout.close()
131 131 self._stdout = None
132 132
133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
134 strict_node_history=False):
135 args = [url, paths, start, end, limit, discover_changed_paths,
136 strict_node_history]
137 arg = encodeargs(args)
138 hgexe = util.hgexecutable()
139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
140 stdin, stdout = os.popen2(cmd, 'b')
141 stdin.write(arg)
142 stdin.close()
143 return logstream(stdout)
144
145 133 # SVN conversion code stolen from bzr-svn and tailor
146 134 #
147 135 # Subversion looks like a versioned filesystem, branches structures
148 136 # are defined by conventions and not enforced by the tool. First,
149 137 # we define the potential branches (modules) as "trunk" and "branches"
150 138 # children directories. Revisions are then identified by their
151 139 # module and revision number (and a repository identifier).
152 140 #
153 141 # The revision graph is really a tree (or a forest). By default, a
154 142 # revision parent is the previous revision in the same module. If the
155 143 # module directory is copied/moved from another module then the
156 144 # revision is the module root and its parent the source revision in
157 145 # the parent module. A revision has at most one parent.
158 146 #
159 147 class svn_source(converter_source):
160 148 def __init__(self, ui, url, rev=None):
161 149 super(svn_source, self).__init__(ui, url, rev=rev)
162 150
163 151 try:
164 152 SubversionException
165 153 except NameError:
166 154 raise NoRepo('Subversion python bindings could not be loaded')
167 155
168 156 self.encoding = locale.getpreferredencoding()
169 157 self.lastrevs = {}
170 158
171 159 latest = None
172 160 try:
173 161 # Support file://path@rev syntax. Useful e.g. to convert
174 162 # deleted branches.
175 163 at = url.rfind('@')
176 164 if at >= 0:
177 165 latest = int(url[at+1:])
178 166 url = url[:at]
179 167 except ValueError, e:
180 168 pass
181 169 self.url = geturl(url)
182 170 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
183 171 try:
184 172 self.transport = transport.SvnRaTransport(url=self.url)
185 173 self.ra = self.transport.ra
186 174 self.ctx = self.transport.client
187 175 self.base = svn.ra.get_repos_root(self.ra)
188 176 # Module is either empty or a repository path starting with
189 177 # a slash and not ending with a slash.
190 178 self.module = self.url[len(self.base):]
191 179 self.prevmodule = None
192 180 self.rootmodule = self.module
193 181 self.commits = {}
194 182 self.paths = {}
195 183 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
196 184 except SubversionException, e:
197 185 ui.print_exc()
198 186 raise NoRepo("%s does not look like a Subversion repo" % self.url)
199 187
200 188 if rev:
201 189 try:
202 190 latest = int(rev)
203 191 except ValueError:
204 192 raise util.Abort('svn: revision %s is not an integer' % rev)
205 193
206 194 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
207 195 try:
208 196 self.startrev = int(self.startrev)
209 197 if self.startrev < 0:
210 198 self.startrev = 0
211 199 except ValueError:
212 200 raise util.Abort(_('svn: start revision %s is not an integer')
213 201 % self.startrev)
214 202
215 203 try:
216 204 self.get_blacklist()
217 205 except IOError, e:
218 206 pass
219 207
220 208 self.head = self.latest(self.module, latest)
221 209 if not self.head:
222 210 raise util.Abort(_('no revision found in module %s') %
223 211 self.module.encode(self.encoding))
224 212 self.last_changed = self.revnum(self.head)
225 213
226 214 self._changescache = None
227 215
228 216 if os.path.exists(os.path.join(url, '.svn/entries')):
229 217 self.wc = url
230 218 else:
231 219 self.wc = None
232 220 self.convertfp = None
233 221
234 222 def setrevmap(self, revmap):
235 223 lastrevs = {}
236 224 for revid in revmap.iterkeys():
237 225 uuid, module, revnum = self.revsplit(revid)
238 226 lastrevnum = lastrevs.setdefault(module, revnum)
239 227 if revnum > lastrevnum:
240 228 lastrevs[module] = revnum
241 229 self.lastrevs = lastrevs
242 230
243 231 def exists(self, path, optrev):
244 232 try:
245 233 svn.client.ls(self.url.rstrip('/') + '/' + path,
246 234 optrev, False, self.ctx)
247 235 return True
248 236 except SubversionException, err:
249 237 return False
250 238
251 239 def getheads(self):
252 240
253 241 def isdir(path, revnum):
254 242 kind = self._checkpath(path, revnum)
255 243 return kind == svn.core.svn_node_dir
256 244
257 245 def getcfgpath(name, rev):
258 246 cfgpath = self.ui.config('convert', 'svn.' + name)
259 247 if cfgpath is not None and cfgpath.strip() == '':
260 248 return None
261 249 path = (cfgpath or name).strip('/')
262 250 if not self.exists(path, rev):
263 251 if cfgpath:
264 252 raise util.Abort(_('expected %s to be at %r, but not found')
265 253 % (name, path))
266 254 return None
267 255 self.ui.note(_('found %s at %r\n') % (name, path))
268 256 return path
269 257
270 258 rev = optrev(self.last_changed)
271 259 oldmodule = ''
272 260 trunk = getcfgpath('trunk', rev)
273 261 self.tags = getcfgpath('tags', rev)
274 262 branches = getcfgpath('branches', rev)
275 263
276 264 # If the project has a trunk or branches, we will extract heads
277 265 # from them. We keep the project root otherwise.
278 266 if trunk:
279 267 oldmodule = self.module or ''
280 268 self.module += '/' + trunk
281 269 self.head = self.latest(self.module, self.last_changed)
282 270 if not self.head:
283 271 raise util.Abort(_('no revision found in module %s') %
284 272 self.module.encode(self.encoding))
285 273
286 274 # First head in the list is the module's head
287 275 self.heads = [self.head]
288 276 if self.tags is not None:
289 277 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
290 278
291 279 # Check if branches bring a few more heads to the list
292 280 if branches:
293 281 rpath = self.url.strip('/')
294 282 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
295 283 self.ctx)
296 284 for branch in branchnames.keys():
297 285 module = '%s/%s/%s' % (oldmodule, branches, branch)
298 286 if not isdir(module, self.last_changed):
299 287 continue
300 288 brevid = self.latest(module, self.last_changed)
301 289 if not brevid:
302 290 self.ui.note(_('ignoring empty branch %s\n') %
303 291 branch.encode(self.encoding))
304 292 continue
305 293 self.ui.note('found branch %s at %d\n' %
306 294 (branch, self.revnum(brevid)))
307 295 self.heads.append(brevid)
308 296
309 297 if self.startrev and self.heads:
310 298 if len(self.heads) > 1:
311 299 raise util.Abort(_('svn: start revision is not supported with '
312 300 'with more than one branch'))
313 301 revnum = self.revnum(self.heads[0])
314 302 if revnum < self.startrev:
315 303 raise util.Abort(_('svn: no revision found after start revision %d')
316 304 % self.startrev)
317 305
318 306 return self.heads
319 307
320 308 def getfile(self, file, rev):
321 309 data, mode = self._getfile(file, rev)
322 310 self.modecache[(file, rev)] = mode
323 311 return data
324 312
325 313 def getmode(self, file, rev):
326 314 return self.modecache[(file, rev)]
327 315
328 316 def getchanges(self, rev):
329 317 if self._changescache and self._changescache[0] == rev:
330 318 return self._changescache[1]
331 319 self._changescache = None
332 320 self.modecache = {}
333 321 (paths, parents) = self.paths[rev]
334 322 if parents:
335 323 files, copies = self.expandpaths(rev, paths, parents)
336 324 else:
337 325 # Perform a full checkout on roots
338 326 uuid, module, revnum = self.revsplit(rev)
339 327 entries = svn.client.ls(self.base + module, optrev(revnum),
340 328 True, self.ctx)
341 329 files = [n for n,e in entries.iteritems()
342 330 if e.kind == svn.core.svn_node_file]
343 331 copies = {}
344 332
345 333 files.sort()
346 334 files = zip(files, [rev] * len(files))
347 335
348 336 # caller caches the result, so free it here to release memory
349 337 del self.paths[rev]
350 338 return (files, copies)
351 339
352 340 def getchangedfiles(self, rev, i):
353 341 changes = self.getchanges(rev)
354 342 self._changescache = (rev, changes)
355 343 return [f[0] for f in changes[0]]
356 344
357 345 def getcommit(self, rev):
358 346 if rev not in self.commits:
359 347 uuid, module, revnum = self.revsplit(rev)
360 348 self.module = module
361 349 self.reparent(module)
362 350 # We assume that:
363 351 # - requests for revisions after "stop" come from the
364 352 # revision graph backward traversal. Cache all of them
365 353 # down to stop, they will be used eventually.
366 354 # - requests for revisions before "stop" come to get
367 355 # isolated branches parents. Just fetch what is needed.
368 356 stop = self.lastrevs.get(module, 0)
369 357 if revnum < stop:
370 358 stop = revnum + 1
371 359 self._fetch_revisions(revnum, stop)
372 360 commit = self.commits[rev]
373 361 # caller caches the result, so free it here to release memory
374 362 del self.commits[rev]
375 363 return commit
376 364
377 365 def gettags(self):
378 366 tags = {}
379 367 if self.tags is None:
380 368 return tags
381 369
382 370 # svn tags are just a convention, project branches left in a
383 371 # 'tags' directory. There is no other relationship than
384 372 # ancestry, which is expensive to discover and makes them hard
385 373 # to update incrementally. Worse, past revisions may be
386 374 # referenced by tags far away in the future, requiring a deep
387 375 # history traversal on every calculation. Current code
388 376 # performs a single backward traversal, tracking moves within
389 377 # the tags directory (tag renaming) and recording a new tag
390 378 # everytime a project is copied from outside the tags
391 379 # directory. It also lists deleted tags, this behaviour may
392 380 # change in the future.
393 381 pendings = []
394 382 tagspath = self.tags
395 383 start = svn.ra.get_latest_revnum(self.ra)
396 384 try:
397 for entry in get_log(self.url, [self.tags], start, self.startrev):
385 for entry in self._getlog([self.tags], start, self.startrev):
398 386 origpaths, revnum, author, date, message = entry
399 387 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
400 388 in origpaths.iteritems() if e.copyfrom_path]
401 389 copies.sort()
402 390 # Apply moves/copies from more specific to general
403 391 copies.reverse()
404 392
405 393 srctagspath = tagspath
406 394 if copies and copies[-1][2] == tagspath:
407 395 # Track tags directory moves
408 396 srctagspath = copies.pop()[0]
409 397
410 398 for source, sourcerev, dest in copies:
411 399 if not dest.startswith(tagspath + '/'):
412 400 continue
413 401 for tag in pendings:
414 402 if tag[0].startswith(dest):
415 403 tagpath = source + tag[0][len(dest):]
416 404 tag[:2] = [tagpath, sourcerev]
417 405 break
418 406 else:
419 407 pendings.append([source, sourcerev, dest.split('/')[-1]])
420 408
421 409 # Tell tag renamings from tag creations
422 410 remainings = []
423 411 for source, sourcerev, tagname in pendings:
424 412 if source.startswith(srctagspath):
425 413 remainings.append([source, sourcerev, tagname])
426 414 continue
427 415 # From revision may be fake, get one with changes
428 416 tagid = self.latest(source, sourcerev)
429 417 if tagid:
430 418 tags[tagname] = tagid
431 419 pendings = remainings
432 420 tagspath = srctagspath
433 421
434 422 except SubversionException, (inst, num):
435 423 self.ui.note('no tags found at revision %d\n' % start)
436 424 return tags
437 425
438 426 def converted(self, rev, destrev):
439 427 if not self.wc:
440 428 return
441 429 if self.convertfp is None:
442 430 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
443 431 'a')
444 432 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
445 433 self.convertfp.flush()
446 434
447 435 # -- helper functions --
448 436
449 437 def revid(self, revnum, module=None):
450 438 if not module:
451 439 module = self.module
452 440 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
453 441 revnum)
454 442
455 443 def revnum(self, rev):
456 444 return int(rev.split('@')[-1])
457 445
458 446 def revsplit(self, rev):
459 447 url, revnum = rev.encode(self.encoding).split('@', 1)
460 448 revnum = int(revnum)
461 449 parts = url.split('/', 1)
462 450 uuid = parts.pop(0)[4:]
463 451 mod = ''
464 452 if parts:
465 453 mod = '/' + parts[0]
466 454 return uuid, mod, revnum
467 455
468 456 def latest(self, path, stop=0):
469 457 """Find the latest revid affecting path, up to stop. It may return
470 458 a revision in a different module, since a branch may be moved without
471 459 a change being reported. Return None if computed module does not
472 460 belong to rootmodule subtree.
473 461 """
474 462 if not path.startswith(self.rootmodule):
475 463 # Requests on foreign branches may be forbidden at server level
476 464 self.ui.debug(_('ignoring foreign branch %r\n') % path)
477 465 return None
478 466
479 467 if not stop:
480 468 stop = svn.ra.get_latest_revnum(self.ra)
481 469 try:
482 470 prevmodule = self.reparent('')
483 471 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
484 472 self.reparent(prevmodule)
485 473 except SubversionException:
486 474 dirent = None
487 475 if not dirent:
488 476 raise util.Abort('%s not found up to revision %d' % (path, stop))
489 477
490 478 # stat() gives us the previous revision on this line of development, but
491 479 # it might be in *another module*. Fetch the log and detect renames down
492 480 # to the latest revision.
493 stream = get_log(self.url, [path], stop, dirent.created_rev)
481 stream = self._getlog([path], stop, dirent.created_rev)
494 482 try:
495 483 for entry in stream:
496 484 paths, revnum, author, date, message = entry
497 485 if revnum <= dirent.created_rev:
498 486 break
499 487
500 488 for p in paths:
501 489 if not path.startswith(p) or not paths[p].copyfrom_path:
502 490 continue
503 491 newpath = paths[p].copyfrom_path + path[len(p):]
504 492 self.ui.debug("branch renamed from %s to %s at %d\n" %
505 493 (path, newpath, revnum))
506 494 path = newpath
507 495 break
508 496 finally:
509 497 stream.close()
510 498
511 499 if not path.startswith(self.rootmodule):
512 500 self.ui.debug(_('ignoring foreign branch %r\n') % path)
513 501 return None
514 502 return self.revid(dirent.created_rev, path)
515 503
516 504 def get_blacklist(self):
517 505 """Avoid certain revision numbers.
518 506 It is not uncommon for two nearby revisions to cancel each other
519 507 out, e.g. 'I copied trunk into a subdirectory of itself instead
520 508 of making a branch'. The converted repository is significantly
521 509 smaller if we ignore such revisions."""
522 510 self.blacklist = util.set()
523 511 blacklist = self.blacklist
524 512 for line in file("blacklist.txt", "r"):
525 513 if not line.startswith("#"):
526 514 try:
527 515 svn_rev = int(line.strip())
528 516 blacklist.add(svn_rev)
529 517 except ValueError, e:
530 518 pass # not an integer or a comment
531 519
532 520 def is_blacklisted(self, svn_rev):
533 521 return svn_rev in self.blacklist
534 522
535 523 def reparent(self, module):
536 524 """Reparent the svn transport and return the previous parent."""
537 525 if self.prevmodule == module:
538 526 return module
539 527 svn_url = (self.base + module).encode(self.encoding)
540 528 prevmodule = self.prevmodule
541 529 if prevmodule is None:
542 530 prevmodule = ''
543 531 self.ui.debug("reparent to %s\n" % svn_url)
544 532 svn.ra.reparent(self.ra, svn_url)
545 533 self.prevmodule = module
546 534 return prevmodule
547 535
548 536 def expandpaths(self, rev, paths, parents):
549 537 entries = []
550 538 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
551 539 copies = {}
552 540
553 541 new_module, revnum = self.revsplit(rev)[1:]
554 542 if new_module != self.module:
555 543 self.module = new_module
556 544 self.reparent(self.module)
557 545
558 546 for path, ent in paths:
559 547 entrypath = self.getrelpath(path)
560 548 entry = entrypath.decode(self.encoding)
561 549
562 550 kind = self._checkpath(entrypath, revnum)
563 551 if kind == svn.core.svn_node_file:
564 552 entries.append(self.recode(entry))
565 553 if not ent.copyfrom_path or not parents:
566 554 continue
567 555 # Copy sources not in parent revisions cannot be represented,
568 556 # ignore their origin for now
569 557 pmodule, prevnum = self.revsplit(parents[0])[1:]
570 558 if ent.copyfrom_rev < prevnum:
571 559 continue
572 560 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
573 561 if not copyfrom_path:
574 562 continue
575 563 self.ui.debug("copied to %s from %s@%s\n" %
576 564 (entrypath, copyfrom_path, ent.copyfrom_rev))
577 565 copies[self.recode(entry)] = self.recode(copyfrom_path)
578 566 elif kind == 0: # gone, but had better be a deleted *file*
579 567 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
580 568
581 569 # if a branch is created but entries are removed in the same
582 570 # changeset, get the right fromrev
583 571 # parents cannot be empty here, you cannot remove things from
584 572 # a root revision.
585 573 uuid, old_module, fromrev = self.revsplit(parents[0])
586 574
587 575 basepath = old_module + "/" + self.getrelpath(path)
588 576 entrypath = basepath
589 577
590 578 def lookup_parts(p):
591 579 rc = None
592 580 parts = p.split("/")
593 581 for i in range(len(parts)):
594 582 part = "/".join(parts[:i])
595 583 info = part, copyfrom.get(part, None)
596 584 if info[1] is not None:
597 585 self.ui.debug("Found parent directory %s\n" % info[1])
598 586 rc = info
599 587 return rc
600 588
601 589 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
602 590
603 591 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
604 592
605 593 # need to remove fragment from lookup_parts and replace with copyfrom_path
606 594 if frompath is not None:
607 595 self.ui.debug("munge-o-matic\n")
608 596 self.ui.debug(entrypath + '\n')
609 597 self.ui.debug(entrypath[len(frompath):] + '\n')
610 598 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
611 599 fromrev = froment.copyfrom_rev
612 600 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
613 601
614 602 # We can avoid the reparent calls if the module has not changed
615 603 # but it probably does not worth the pain.
616 604 prevmodule = self.reparent('')
617 605 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
618 606 self.reparent(prevmodule)
619 607
620 608 if fromkind == svn.core.svn_node_file: # a deleted file
621 609 entries.append(self.recode(entry))
622 610 elif fromkind == svn.core.svn_node_dir:
623 611 # print "Deleted/moved non-file:", revnum, path, ent
624 612 # children = self._find_children(path, revnum - 1)
625 613 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
626 614 # Sometimes this is tricky. For example: in
627 615 # The Subversion Repository revision 6940 a dir
628 616 # was copied and one of its files was deleted
629 617 # from the new location in the same commit. This
630 618 # code can't deal with that yet.
631 619 if ent.action == 'C':
632 620 children = self._find_children(path, fromrev)
633 621 else:
634 622 oroot = entrypath.strip('/')
635 623 nroot = path.strip('/')
636 624 children = self._find_children(oroot, fromrev)
637 625 children = [s.replace(oroot,nroot) for s in children]
638 626 # Mark all [files, not directories] as deleted.
639 627 for child in children:
640 628 # Can we move a child directory and its
641 629 # parent in the same commit? (probably can). Could
642 630 # cause problems if instead of revnum -1,
643 631 # we have to look in (copyfrom_path, revnum - 1)
644 632 entrypath = self.getrelpath("/" + child, module=old_module)
645 633 if entrypath:
646 634 entry = self.recode(entrypath.decode(self.encoding))
647 635 if entry in copies:
648 636 # deleted file within a copy
649 637 del copies[entry]
650 638 else:
651 639 entries.append(entry)
652 640 else:
653 641 self.ui.debug('unknown path in revision %d: %s\n' % \
654 642 (revnum, path))
655 643 elif kind == svn.core.svn_node_dir:
656 644 # Should probably synthesize normal file entries
657 645 # and handle as above to clean up copy/rename handling.
658 646
659 647 # If the directory just had a prop change,
660 648 # then we shouldn't need to look for its children.
661 649 if ent.action == 'M':
662 650 continue
663 651
664 652 # Also this could create duplicate entries. Not sure
665 653 # whether this will matter. Maybe should make entries a set.
666 654 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
667 655 # This will fail if a directory was copied
668 656 # from another branch and then some of its files
669 657 # were deleted in the same transaction.
670 658 children = self._find_children(path, revnum)
671 659 children.sort()
672 660 for child in children:
673 661 # Can we move a child directory and its
674 662 # parent in the same commit? (probably can). Could
675 663 # cause problems if instead of revnum -1,
676 664 # we have to look in (copyfrom_path, revnum - 1)
677 665 entrypath = self.getrelpath("/" + child)
678 666 # print child, self.module, entrypath
679 667 if entrypath:
680 668 # Need to filter out directories here...
681 669 kind = self._checkpath(entrypath, revnum)
682 670 if kind != svn.core.svn_node_dir:
683 671 entries.append(self.recode(entrypath))
684 672
685 673 # Copies here (must copy all from source)
686 674 # Probably not a real problem for us if
687 675 # source does not exist
688 676 if not ent.copyfrom_path or not parents:
689 677 continue
690 678 # Copy sources not in parent revisions cannot be represented,
691 679 # ignore their origin for now
692 680 pmodule, prevnum = self.revsplit(parents[0])[1:]
693 681 if ent.copyfrom_rev < prevnum:
694 682 continue
695 683 copyfrompath = ent.copyfrom_path.decode(self.encoding)
696 684 copyfrompath = self.getrelpath(copyfrompath, pmodule)
697 685 if not copyfrompath:
698 686 continue
699 687 copyfrom[path] = ent
700 688 self.ui.debug("mark %s came from %s:%d\n"
701 689 % (path, copyfrompath, ent.copyfrom_rev))
702 690 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
703 691 children.sort()
704 692 for child in children:
705 693 entrypath = self.getrelpath("/" + child, pmodule)
706 694 if not entrypath:
707 695 continue
708 696 entry = entrypath.decode(self.encoding)
709 697 copytopath = path + entry[len(copyfrompath):]
710 698 copytopath = self.getrelpath(copytopath)
711 699 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
712 700
713 701 return (util.unique(entries), copies)
714 702
715 703 def _fetch_revisions(self, from_revnum, to_revnum):
716 704 if from_revnum < to_revnum:
717 705 from_revnum, to_revnum = to_revnum, from_revnum
718 706
719 707 self.child_cset = None
720 708
721 709 def isdescendantof(parent, child):
722 710 if not child or not parent or not child.startswith(parent):
723 711 return False
724 712 subpath = child[len(parent):]
725 713 return len(subpath) > 1 and subpath[0] == '/'
726 714
727 715 def parselogentry(orig_paths, revnum, author, date, message):
728 716 """Return the parsed commit object or None, and True if
729 717 the revision is a branch root.
730 718 """
731 719 self.ui.debug("parsing revision %d (%d changes)\n" %
732 720 (revnum, len(orig_paths)))
733 721
734 722 branched = False
735 723 rev = self.revid(revnum)
736 724 # branch log might return entries for a parent we already have
737 725
738 726 if (rev in self.commits or revnum < to_revnum):
739 727 return None, branched
740 728
741 729 parents = []
742 730 # check whether this revision is the start of a branch or part
743 731 # of a branch renaming
744 732 orig_paths = orig_paths.items()
745 733 orig_paths.sort()
746 734 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
747 735 if root_paths:
748 736 path, ent = root_paths[-1]
749 737 if ent.copyfrom_path:
750 738 # If dir was moved while one of its file was removed
751 739 # the log may look like:
752 740 # A /dir (from /dir:x)
753 741 # A /dir/a (from /dir/a:y)
754 742 # A /dir/b (from /dir/b:z)
755 743 # ...
756 744 # for all remaining children.
757 745 # Let's take the highest child element from rev as source.
758 746 copies = [(p,e) for p,e in orig_paths[:-1]
759 747 if isdescendantof(ent.copyfrom_path, e.copyfrom_path)]
760 748 fromrev = max([e.copyfrom_rev for p,e in copies] + [ent.copyfrom_rev])
761 749 branched = True
762 750 newpath = ent.copyfrom_path + self.module[len(path):]
763 751 # ent.copyfrom_rev may not be the actual last revision
764 752 previd = self.latest(newpath, fromrev)
765 753 if previd is not None:
766 754 prevmodule, prevnum = self.revsplit(previd)[1:]
767 755 if prevnum >= self.startrev:
768 756 parents = [previd]
769 757 self.ui.note('found parent of branch %s at %d: %s\n' %
770 758 (self.module, prevnum, prevmodule))
771 759 else:
772 760 self.ui.debug("No copyfrom path, don't know what to do.\n")
773 761
774 762 paths = []
775 763 # filter out unrelated paths
776 764 for path, ent in orig_paths:
777 765 if self.getrelpath(path) is None:
778 766 continue
779 767 paths.append((path, ent))
780 768
781 769 # Example SVN datetime. Includes microseconds.
782 770 # ISO-8601 conformant
783 771 # '2007-01-04T17:35:00.902377Z'
784 772 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
785 773
786 774 log = message and self.recode(message) or ''
787 775 author = author and self.recode(author) or ''
788 776 try:
789 777 branch = self.module.split("/")[-1]
790 778 if branch == 'trunk':
791 779 branch = ''
792 780 except IndexError:
793 781 branch = None
794 782
795 783 cset = commit(author=author,
796 784 date=util.datestr(date),
797 785 desc=log,
798 786 parents=parents,
799 787 branch=branch,
800 788 rev=rev.encode('utf-8'))
801 789
802 790 self.commits[rev] = cset
803 791 # The parents list is *shared* among self.paths and the
804 792 # commit object. Both will be updated below.
805 793 self.paths[rev] = (paths, cset.parents)
806 794 if self.child_cset and not self.child_cset.parents:
807 795 self.child_cset.parents[:] = [rev]
808 796 self.child_cset = cset
809 797 return cset, branched
810 798
811 799 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
812 800 (self.module, from_revnum, to_revnum))
813 801
814 802 try:
815 803 firstcset = None
816 804 lastonbranch = False
817 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
805 stream = self._getlog([self.module], from_revnum, to_revnum)
818 806 try:
819 807 for entry in stream:
820 808 paths, revnum, author, date, message = entry
821 809 if revnum < self.startrev:
822 810 lastonbranch = True
823 811 break
824 812 if self.is_blacklisted(revnum):
825 813 self.ui.note('skipping blacklisted revision %d\n'
826 814 % revnum)
827 815 continue
828 816 if paths is None:
829 817 self.ui.debug('revision %d has no entries\n' % revnum)
830 818 continue
831 819 cset, lastonbranch = parselogentry(paths, revnum, author,
832 820 date, message)
833 821 if cset:
834 822 firstcset = cset
835 823 if lastonbranch:
836 824 break
837 825 finally:
838 826 stream.close()
839 827
840 828 if not lastonbranch and firstcset and not firstcset.parents:
841 829 # The first revision of the sequence (the last fetched one)
842 830 # has invalid parents if not a branch root. Find the parent
843 831 # revision now, if any.
844 832 try:
845 833 firstrevnum = self.revnum(firstcset.rev)
846 834 if firstrevnum > 1:
847 835 latest = self.latest(self.module, firstrevnum - 1)
848 836 if latest:
849 837 firstcset.parents.append(latest)
850 838 except util.Abort:
851 839 pass
852 840 except SubversionException, (inst, num):
853 841 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
854 842 raise util.Abort('svn: branch has no revision %s' % to_revnum)
855 843 raise
856 844
857 845 def _getfile(self, file, rev):
858 846 io = StringIO()
859 847 # TODO: ra.get_file transmits the whole file instead of diffs.
860 848 mode = ''
861 849 try:
862 850 new_module, revnum = self.revsplit(rev)[1:]
863 851 if self.module != new_module:
864 852 self.module = new_module
865 853 self.reparent(self.module)
866 854 info = svn.ra.get_file(self.ra, file, revnum, io)
867 855 if isinstance(info, list):
868 856 info = info[-1]
869 857 mode = ("svn:executable" in info) and 'x' or ''
870 858 mode = ("svn:special" in info) and 'l' or mode
871 859 except SubversionException, e:
872 860 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
873 861 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
874 862 if e.apr_err in notfound: # File not found
875 863 raise IOError()
876 864 raise
877 865 data = io.getvalue()
878 866 if mode == 'l':
879 867 link_prefix = "link "
880 868 if data.startswith(link_prefix):
881 869 data = data[len(link_prefix):]
882 870 return data, mode
883 871
884 872 def _find_children(self, path, revnum):
885 873 path = path.strip('/')
886 874 pool = Pool()
887 875 rpath = '/'.join([self.base, path]).strip('/')
888 876 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
889 877
890 878 def getrelpath(self, path, module=None):
891 879 if module is None:
892 880 module = self.module
893 881 # Given the repository url of this wc, say
894 882 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
895 883 # extract the "entry" portion (a relative path) from what
896 884 # svn log --xml says, ie
897 885 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
898 886 # that is to say "tests/PloneTestCase.py"
899 887 if path.startswith(module):
900 888 relative = path.rstrip('/')[len(module):]
901 889 if relative.startswith('/'):
902 890 return relative[1:]
903 891 elif relative == '':
904 892 return relative
905 893
906 894 # The path is outside our tracked tree...
907 895 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
908 896 return None
909 897
910 898 def _checkpath(self, path, revnum):
911 899 # ra.check_path does not like leading slashes very much, it leads
912 900 # to PROPFIND subversion errors
913 901 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
914 902
903 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
904 strict_node_history=False):
905 # Normalize path names, svn >= 1.5 only wants paths relative to
906 # supplied URL
907 relpaths = []
908 for p in paths:
909 if not p.startswith('/'):
910 p = self.module + '/' + p
911 relpaths.append(p.strip('/'))
912 args = [self.base, relpaths, start, end, limit, discover_changed_paths,
913 strict_node_history]
914 arg = encodeargs(args)
915 hgexe = util.hgexecutable()
916 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
917 stdin, stdout = os.popen2(cmd, 'b')
918 stdin.write(arg)
919 stdin.close()
920 return logstream(stdout)
921
915 922 pre_revprop_change = '''#!/bin/sh
916 923
917 924 REPOS="$1"
918 925 REV="$2"
919 926 USER="$3"
920 927 PROPNAME="$4"
921 928 ACTION="$5"
922 929
923 930 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
924 931 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
925 932 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
926 933
927 934 echo "Changing prohibited revision property" >&2
928 935 exit 1
929 936 '''
930 937
931 938 class svn_sink(converter_sink, commandline):
932 939 commit_re = re.compile(r'Committed revision (\d+).', re.M)
933 940
934 941 def prerun(self):
935 942 if self.wc:
936 943 os.chdir(self.wc)
937 944
938 945 def postrun(self):
939 946 if self.wc:
940 947 os.chdir(self.cwd)
941 948
942 949 def join(self, name):
943 950 return os.path.join(self.wc, '.svn', name)
944 951
945 952 def revmapfile(self):
946 953 return self.join('hg-shamap')
947 954
948 955 def authorfile(self):
949 956 return self.join('hg-authormap')
950 957
951 958 def __init__(self, ui, path):
952 959 converter_sink.__init__(self, ui, path)
953 960 commandline.__init__(self, ui, 'svn')
954 961 self.delete = []
955 962 self.setexec = []
956 963 self.delexec = []
957 964 self.copies = []
958 965 self.wc = None
959 966 self.cwd = os.getcwd()
960 967
961 968 path = os.path.realpath(path)
962 969
963 970 created = False
964 971 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
965 972 self.wc = path
966 973 self.run0('update')
967 974 else:
968 975 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
969 976
970 977 if os.path.isdir(os.path.dirname(path)):
971 978 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
972 979 ui.status(_('initializing svn repo %r\n') %
973 980 os.path.basename(path))
974 981 commandline(ui, 'svnadmin').run0('create', path)
975 982 created = path
976 983 path = util.normpath(path)
977 984 if not path.startswith('/'):
978 985 path = '/' + path
979 986 path = 'file://' + path
980 987
981 988 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
982 989 self.run0('checkout', path, wcpath)
983 990
984 991 self.wc = wcpath
985 992 self.opener = util.opener(self.wc)
986 993 self.wopener = util.opener(self.wc)
987 994 self.childmap = mapfile(ui, self.join('hg-childmap'))
988 995 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
989 996
990 997 if created:
991 998 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
992 999 fp = open(hook, 'w')
993 1000 fp.write(pre_revprop_change)
994 1001 fp.close()
995 1002 util.set_flags(hook, "x")
996 1003
997 1004 xport = transport.SvnRaTransport(url=geturl(path))
998 1005 self.uuid = svn.ra.get_uuid(xport.ra)
999 1006
1000 1007 def wjoin(self, *names):
1001 1008 return os.path.join(self.wc, *names)
1002 1009
1003 1010 def putfile(self, filename, flags, data):
1004 1011 if 'l' in flags:
1005 1012 self.wopener.symlink(data, filename)
1006 1013 else:
1007 1014 try:
1008 1015 if os.path.islink(self.wjoin(filename)):
1009 1016 os.unlink(filename)
1010 1017 except OSError:
1011 1018 pass
1012 1019 self.wopener(filename, 'w').write(data)
1013 1020
1014 1021 if self.is_exec:
1015 1022 was_exec = self.is_exec(self.wjoin(filename))
1016 1023 else:
1017 1024 # On filesystems not supporting execute-bit, there is no way
1018 1025 # to know if it is set but asking subversion. Setting it
1019 1026 # systematically is just as expensive and much simpler.
1020 1027 was_exec = 'x' not in flags
1021 1028
1022 1029 util.set_flags(self.wjoin(filename), flags)
1023 1030 if was_exec:
1024 1031 if 'x' not in flags:
1025 1032 self.delexec.append(filename)
1026 1033 else:
1027 1034 if 'x' in flags:
1028 1035 self.setexec.append(filename)
1029 1036
1030 1037 def delfile(self, name):
1031 1038 self.delete.append(name)
1032 1039
1033 1040 def copyfile(self, source, dest):
1034 1041 self.copies.append([source, dest])
1035 1042
1036 1043 def _copyfile(self, source, dest):
1037 1044 # SVN's copy command pukes if the destination file exists, but
1038 1045 # our copyfile method expects to record a copy that has
1039 1046 # already occurred. Cross the semantic gap.
1040 1047 wdest = self.wjoin(dest)
1041 1048 exists = os.path.exists(wdest)
1042 1049 if exists:
1043 1050 fd, tempname = tempfile.mkstemp(
1044 1051 prefix='hg-copy-', dir=os.path.dirname(wdest))
1045 1052 os.close(fd)
1046 1053 os.unlink(tempname)
1047 1054 os.rename(wdest, tempname)
1048 1055 try:
1049 1056 self.run0('copy', source, dest)
1050 1057 finally:
1051 1058 if exists:
1052 1059 try:
1053 1060 os.unlink(wdest)
1054 1061 except OSError:
1055 1062 pass
1056 1063 os.rename(tempname, wdest)
1057 1064
1058 1065 def dirs_of(self, files):
1059 1066 dirs = util.set()
1060 1067 for f in files:
1061 1068 if os.path.isdir(self.wjoin(f)):
1062 1069 dirs.add(f)
1063 1070 for i in strutil.rfindall(f, '/'):
1064 1071 dirs.add(f[:i])
1065 1072 return dirs
1066 1073
1067 1074 def add_dirs(self, files):
1068 1075 add_dirs = [d for d in self.dirs_of(files)
1069 1076 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1070 1077 if add_dirs:
1071 1078 add_dirs.sort()
1072 1079 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1073 1080 return add_dirs
1074 1081
1075 1082 def add_files(self, files):
1076 1083 if files:
1077 1084 self.xargs(files, 'add', quiet=True)
1078 1085 return files
1079 1086
1080 1087 def tidy_dirs(self, names):
1081 1088 dirs = list(self.dirs_of(names))
1082 1089 dirs.sort()
1083 1090 dirs.reverse()
1084 1091 deleted = []
1085 1092 for d in dirs:
1086 1093 wd = self.wjoin(d)
1087 1094 if os.listdir(wd) == '.svn':
1088 1095 self.run0('delete', d)
1089 1096 deleted.append(d)
1090 1097 return deleted
1091 1098
1092 1099 def addchild(self, parent, child):
1093 1100 self.childmap[parent] = child
1094 1101
1095 1102 def revid(self, rev):
1096 1103 return u"svn:%s@%s" % (self.uuid, rev)
1097 1104
1098 1105 def putcommit(self, files, parents, commit):
1099 1106 for parent in parents:
1100 1107 try:
1101 1108 return self.revid(self.childmap[parent])
1102 1109 except KeyError:
1103 1110 pass
1104 1111 entries = util.set(self.delete)
1105 1112 files = util.frozenset(files)
1106 1113 entries.update(self.add_dirs(files.difference(entries)))
1107 1114 if self.copies:
1108 1115 for s, d in self.copies:
1109 1116 self._copyfile(s, d)
1110 1117 self.copies = []
1111 1118 if self.delete:
1112 1119 self.xargs(self.delete, 'delete')
1113 1120 self.delete = []
1114 1121 entries.update(self.add_files(files.difference(entries)))
1115 1122 entries.update(self.tidy_dirs(entries))
1116 1123 if self.delexec:
1117 1124 self.xargs(self.delexec, 'propdel', 'svn:executable')
1118 1125 self.delexec = []
1119 1126 if self.setexec:
1120 1127 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1121 1128 self.setexec = []
1122 1129
1123 1130 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1124 1131 fp = os.fdopen(fd, 'w')
1125 1132 fp.write(commit.desc)
1126 1133 fp.close()
1127 1134 try:
1128 1135 output = self.run0('commit',
1129 1136 username=util.shortuser(commit.author),
1130 1137 file=messagefile,
1131 1138 encoding='utf-8')
1132 1139 try:
1133 1140 rev = self.commit_re.search(output).group(1)
1134 1141 except AttributeError:
1135 1142 self.ui.warn(_('unexpected svn output:\n'))
1136 1143 self.ui.warn(output)
1137 1144 raise util.Abort(_('unable to cope with svn output'))
1138 1145 if commit.rev:
1139 1146 self.run('propset', 'hg:convert-rev', commit.rev,
1140 1147 revprop=True, revision=rev)
1141 1148 if commit.branch and commit.branch != 'default':
1142 1149 self.run('propset', 'hg:convert-branch', commit.branch,
1143 1150 revprop=True, revision=rev)
1144 1151 for parent in parents:
1145 1152 self.addchild(parent, rev)
1146 1153 return self.revid(rev)
1147 1154 finally:
1148 1155 os.unlink(messagefile)
1149 1156
1150 1157 def puttags(self, tags):
1151 1158 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now