##// END OF EJS Templates
convert: work around svn.ra.get_files() not releasing input buffer
Patrick Mezard -
r7446:2d2e0be7 default
parent child Browse files
Show More
@@ -1,1172 +1,1175
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24 import urllib
25 25
26 26 from mercurial import strutil, util
27 27 from mercurial.i18n import _
28 28
29 29 # Subversion stuff. Works best with very recent Python SVN bindings
30 30 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
31 31 # these bindings.
32 32
33 33 from cStringIO import StringIO
34 34
35 35 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
36 36 from common import commandline, converter_sink, mapfile
37 37
38 38 try:
39 39 from svn.core import SubversionException, Pool
40 40 import svn
41 41 import svn.client
42 42 import svn.core
43 43 import svn.ra
44 44 import svn.delta
45 45 import transport
46 46 except ImportError:
47 47 pass
48 48
49 49 class SvnPathNotFound(Exception):
50 50 pass
51 51
52 52 def geturl(path):
53 53 try:
54 54 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
55 55 except SubversionException:
56 56 pass
57 57 if os.path.isdir(path):
58 58 path = os.path.normpath(os.path.abspath(path))
59 59 if os.name == 'nt':
60 60 path = '/' + util.normpath(path)
61 61 return 'file://%s' % urllib.quote(path)
62 62 return path
63 63
64 64 def optrev(number):
65 65 optrev = svn.core.svn_opt_revision_t()
66 66 optrev.kind = svn.core.svn_opt_revision_number
67 67 optrev.value.number = number
68 68 return optrev
69 69
70 70 class changedpath(object):
71 71 def __init__(self, p):
72 72 self.copyfrom_path = p.copyfrom_path
73 73 self.copyfrom_rev = p.copyfrom_rev
74 74 self.action = p.action
75 75
76 76 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
77 77 strict_node_history=False):
78 78 protocol = -1
79 79 def receiver(orig_paths, revnum, author, date, message, pool):
80 80 if orig_paths is not None:
81 81 for k, v in orig_paths.iteritems():
82 82 orig_paths[k] = changedpath(v)
83 83 pickle.dump((orig_paths, revnum, author, date, message),
84 84 fp, protocol)
85 85
86 86 try:
87 87 # Use an ra of our own so that our parent can consume
88 88 # our results without confusing the server.
89 89 t = transport.SvnRaTransport(url=url)
90 90 svn.ra.get_log(t.ra, paths, start, end, limit,
91 91 discover_changed_paths,
92 92 strict_node_history,
93 93 receiver)
94 94 except SubversionException, (inst, num):
95 95 pickle.dump(num, fp, protocol)
96 96 except IOError:
97 97 # Caller may interrupt the iteration
98 98 pickle.dump(None, fp, protocol)
99 99 else:
100 100 pickle.dump(None, fp, protocol)
101 101 fp.close()
102 102 # With large history, cleanup process goes crazy and suddenly
103 103 # consumes *huge* amount of memory. The output file being closed,
104 104 # there is no need for clean termination.
105 105 os._exit(0)
106 106
107 107 def debugsvnlog(ui, **opts):
108 108 """Fetch SVN log in a subprocess and channel them back to parent to
109 109 avoid memory collection issues.
110 110 """
111 111 util.set_binary(sys.stdin)
112 112 util.set_binary(sys.stdout)
113 113 args = decodeargs(sys.stdin.read())
114 114 get_log_child(sys.stdout, *args)
115 115
116 116 class logstream:
117 117 """Interruptible revision log iterator."""
118 118 def __init__(self, stdout):
119 119 self._stdout = stdout
120 120
121 121 def __iter__(self):
122 122 while True:
123 123 entry = pickle.load(self._stdout)
124 124 try:
125 125 orig_paths, revnum, author, date, message = entry
126 126 except:
127 127 if entry is None:
128 128 break
129 129 raise SubversionException("child raised exception", entry)
130 130 yield entry
131 131
132 132 def close(self):
133 133 if self._stdout:
134 134 self._stdout.close()
135 135 self._stdout = None
136 136
137 137 # SVN conversion code stolen from bzr-svn and tailor
138 138 #
139 139 # Subversion looks like a versioned filesystem, branches structures
140 140 # are defined by conventions and not enforced by the tool. First,
141 141 # we define the potential branches (modules) as "trunk" and "branches"
142 142 # children directories. Revisions are then identified by their
143 143 # module and revision number (and a repository identifier).
144 144 #
145 145 # The revision graph is really a tree (or a forest). By default, a
146 146 # revision parent is the previous revision in the same module. If the
147 147 # module directory is copied/moved from another module then the
148 148 # revision is the module root and its parent the source revision in
149 149 # the parent module. A revision has at most one parent.
150 150 #
151 151 class svn_source(converter_source):
152 152 def __init__(self, ui, url, rev=None):
153 153 super(svn_source, self).__init__(ui, url, rev=rev)
154 154
155 155 try:
156 156 SubversionException
157 157 except NameError:
158 158 raise NoRepo('Subversion python bindings could not be loaded')
159 159
160 160 self.encoding = locale.getpreferredencoding()
161 161 self.lastrevs = {}
162 162
163 163 latest = None
164 164 try:
165 165 # Support file://path@rev syntax. Useful e.g. to convert
166 166 # deleted branches.
167 167 at = url.rfind('@')
168 168 if at >= 0:
169 169 latest = int(url[at+1:])
170 170 url = url[:at]
171 171 except ValueError, e:
172 172 pass
173 173 self.url = geturl(url)
174 174 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
175 175 try:
176 176 self.transport = transport.SvnRaTransport(url=self.url)
177 177 self.ra = self.transport.ra
178 178 self.ctx = self.transport.client
179 179 self.baseurl = svn.ra.get_repos_root(self.ra)
180 180 # Module is either empty or a repository path starting with
181 181 # a slash and not ending with a slash.
182 182 self.module = urllib.unquote(self.url[len(self.baseurl):])
183 183 self.prevmodule = None
184 184 self.rootmodule = self.module
185 185 self.commits = {}
186 186 self.paths = {}
187 187 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
188 188 except SubversionException, e:
189 189 ui.print_exc()
190 190 raise NoRepo("%s does not look like a Subversion repo" % self.url)
191 191
192 192 if rev:
193 193 try:
194 194 latest = int(rev)
195 195 except ValueError:
196 196 raise util.Abort(_('svn: revision %s is not an integer') % rev)
197 197
198 198 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
199 199 try:
200 200 self.startrev = int(self.startrev)
201 201 if self.startrev < 0:
202 202 self.startrev = 0
203 203 except ValueError:
204 204 raise util.Abort(_('svn: start revision %s is not an integer')
205 205 % self.startrev)
206 206
207 207 try:
208 208 self.get_blacklist()
209 209 except IOError, e:
210 210 pass
211 211
212 212 self.head = self.latest(self.module, latest)
213 213 if not self.head:
214 214 raise util.Abort(_('no revision found in module %s') %
215 215 self.module.encode(self.encoding))
216 216 self.last_changed = self.revnum(self.head)
217 217
218 218 self._changescache = None
219 219
220 220 if os.path.exists(os.path.join(url, '.svn/entries')):
221 221 self.wc = url
222 222 else:
223 223 self.wc = None
224 224 self.convertfp = None
225 225
226 226 def setrevmap(self, revmap):
227 227 lastrevs = {}
228 228 for revid in revmap.iterkeys():
229 229 uuid, module, revnum = self.revsplit(revid)
230 230 lastrevnum = lastrevs.setdefault(module, revnum)
231 231 if revnum > lastrevnum:
232 232 lastrevs[module] = revnum
233 233 self.lastrevs = lastrevs
234 234
235 235 def exists(self, path, optrev):
236 236 try:
237 237 svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
238 238 optrev, False, self.ctx)
239 239 return True
240 240 except SubversionException, err:
241 241 return False
242 242
243 243 def getheads(self):
244 244
245 245 def isdir(path, revnum):
246 246 kind = self._checkpath(path, revnum)
247 247 return kind == svn.core.svn_node_dir
248 248
249 249 def getcfgpath(name, rev):
250 250 cfgpath = self.ui.config('convert', 'svn.' + name)
251 251 if cfgpath is not None and cfgpath.strip() == '':
252 252 return None
253 253 path = (cfgpath or name).strip('/')
254 254 if not self.exists(path, rev):
255 255 if cfgpath:
256 256 raise util.Abort(_('expected %s to be at %r, but not found')
257 257 % (name, path))
258 258 return None
259 259 self.ui.note(_('found %s at %r\n') % (name, path))
260 260 return path
261 261
262 262 rev = optrev(self.last_changed)
263 263 oldmodule = ''
264 264 trunk = getcfgpath('trunk', rev)
265 265 self.tags = getcfgpath('tags', rev)
266 266 branches = getcfgpath('branches', rev)
267 267
268 268 # If the project has a trunk or branches, we will extract heads
269 269 # from them. We keep the project root otherwise.
270 270 if trunk:
271 271 oldmodule = self.module or ''
272 272 self.module += '/' + trunk
273 273 self.head = self.latest(self.module, self.last_changed)
274 274 if not self.head:
275 275 raise util.Abort(_('no revision found in module %s') %
276 276 self.module.encode(self.encoding))
277 277
278 278 # First head in the list is the module's head
279 279 self.heads = [self.head]
280 280 if self.tags is not None:
281 281 self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
282 282
283 283 # Check if branches bring a few more heads to the list
284 284 if branches:
285 285 rpath = self.url.strip('/')
286 286 branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
287 287 rev, False, self.ctx)
288 288 for branch in branchnames.keys():
289 289 module = '%s/%s/%s' % (oldmodule, branches, branch)
290 290 if not isdir(module, self.last_changed):
291 291 continue
292 292 brevid = self.latest(module, self.last_changed)
293 293 if not brevid:
294 294 self.ui.note(_('ignoring empty branch %s\n') %
295 295 branch.encode(self.encoding))
296 296 continue
297 297 self.ui.note(_('found branch %s at %d\n') %
298 298 (branch, self.revnum(brevid)))
299 299 self.heads.append(brevid)
300 300
301 301 if self.startrev and self.heads:
302 302 if len(self.heads) > 1:
303 303 raise util.Abort(_('svn: start revision is not supported with '
304 304 'with more than one branch'))
305 305 revnum = self.revnum(self.heads[0])
306 306 if revnum < self.startrev:
307 307 raise util.Abort(_('svn: no revision found after start revision %d')
308 308 % self.startrev)
309 309
310 310 return self.heads
311 311
312 312 def getfile(self, file, rev):
313 313 data, mode = self._getfile(file, rev)
314 314 self.modecache[(file, rev)] = mode
315 315 return data
316 316
317 317 def getmode(self, file, rev):
318 318 return self.modecache[(file, rev)]
319 319
320 320 def getchanges(self, rev):
321 321 if self._changescache and self._changescache[0] == rev:
322 322 return self._changescache[1]
323 323 self._changescache = None
324 324 self.modecache = {}
325 325 (paths, parents) = self.paths[rev]
326 326 if parents:
327 327 files, copies = self.expandpaths(rev, paths, parents)
328 328 else:
329 329 # Perform a full checkout on roots
330 330 uuid, module, revnum = self.revsplit(rev)
331 331 entries = svn.client.ls(self.baseurl + urllib.quote(module),
332 332 optrev(revnum), True, self.ctx)
333 333 files = [n for n,e in entries.iteritems()
334 334 if e.kind == svn.core.svn_node_file]
335 335 copies = {}
336 336
337 337 files.sort()
338 338 files = zip(files, [rev] * len(files))
339 339
340 340 # caller caches the result, so free it here to release memory
341 341 del self.paths[rev]
342 342 return (files, copies)
343 343
344 344 def getchangedfiles(self, rev, i):
345 345 changes = self.getchanges(rev)
346 346 self._changescache = (rev, changes)
347 347 return [f[0] for f in changes[0]]
348 348
349 349 def getcommit(self, rev):
350 350 if rev not in self.commits:
351 351 uuid, module, revnum = self.revsplit(rev)
352 352 self.module = module
353 353 self.reparent(module)
354 354 # We assume that:
355 355 # - requests for revisions after "stop" come from the
356 356 # revision graph backward traversal. Cache all of them
357 357 # down to stop, they will be used eventually.
358 358 # - requests for revisions before "stop" come to get
359 359 # isolated branches parents. Just fetch what is needed.
360 360 stop = self.lastrevs.get(module, 0)
361 361 if revnum < stop:
362 362 stop = revnum + 1
363 363 self._fetch_revisions(revnum, stop)
364 364 commit = self.commits[rev]
365 365 # caller caches the result, so free it here to release memory
366 366 del self.commits[rev]
367 367 return commit
368 368
369 369 def gettags(self):
370 370 tags = {}
371 371 if self.tags is None:
372 372 return tags
373 373
374 374 # svn tags are just a convention, project branches left in a
375 375 # 'tags' directory. There is no other relationship than
376 376 # ancestry, which is expensive to discover and makes them hard
377 377 # to update incrementally. Worse, past revisions may be
378 378 # referenced by tags far away in the future, requiring a deep
379 379 # history traversal on every calculation. Current code
380 380 # performs a single backward traversal, tracking moves within
381 381 # the tags directory (tag renaming) and recording a new tag
382 382 # everytime a project is copied from outside the tags
383 383 # directory. It also lists deleted tags, this behaviour may
384 384 # change in the future.
385 385 pendings = []
386 386 tagspath = self.tags
387 387 start = svn.ra.get_latest_revnum(self.ra)
388 388 try:
389 389 for entry in self._getlog([self.tags], start, self.startrev):
390 390 origpaths, revnum, author, date, message = entry
391 391 copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
392 392 in origpaths.iteritems() if e.copyfrom_path]
393 393 copies.sort()
394 394 # Apply moves/copies from more specific to general
395 395 copies.reverse()
396 396
397 397 srctagspath = tagspath
398 398 if copies and copies[-1][2] == tagspath:
399 399 # Track tags directory moves
400 400 srctagspath = copies.pop()[0]
401 401
402 402 for source, sourcerev, dest in copies:
403 403 if not dest.startswith(tagspath + '/'):
404 404 continue
405 405 for tag in pendings:
406 406 if tag[0].startswith(dest):
407 407 tagpath = source + tag[0][len(dest):]
408 408 tag[:2] = [tagpath, sourcerev]
409 409 break
410 410 else:
411 411 pendings.append([source, sourcerev, dest.split('/')[-1]])
412 412
413 413 # Tell tag renamings from tag creations
414 414 remainings = []
415 415 for source, sourcerev, tagname in pendings:
416 416 if source.startswith(srctagspath):
417 417 remainings.append([source, sourcerev, tagname])
418 418 continue
419 419 # From revision may be fake, get one with changes
420 420 try:
421 421 tagid = self.latest(source, sourcerev)
422 422 if tagid:
423 423 tags[tagname] = tagid
424 424 except SvnPathNotFound:
425 425 # It happens when we are following directories we assumed
426 426 # were copied with their parents but were really created
427 427 # in the tag directory.
428 428 pass
429 429 pendings = remainings
430 430 tagspath = srctagspath
431 431
432 432 except SubversionException, (inst, num):
433 433 self.ui.note(_('no tags found at revision %d\n') % start)
434 434 return tags
435 435
436 436 def converted(self, rev, destrev):
437 437 if not self.wc:
438 438 return
439 439 if self.convertfp is None:
440 440 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
441 441 'a')
442 442 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
443 443 self.convertfp.flush()
444 444
445 445 # -- helper functions --
446 446
447 447 def revid(self, revnum, module=None):
448 448 if not module:
449 449 module = self.module
450 450 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
451 451 revnum)
452 452
453 453 def revnum(self, rev):
454 454 return int(rev.split('@')[-1])
455 455
456 456 def revsplit(self, rev):
457 457 url, revnum = rev.encode(self.encoding).split('@', 1)
458 458 revnum = int(revnum)
459 459 parts = url.split('/', 1)
460 460 uuid = parts.pop(0)[4:]
461 461 mod = ''
462 462 if parts:
463 463 mod = '/' + parts[0]
464 464 return uuid, mod, revnum
465 465
466 466 def latest(self, path, stop=0):
467 467 """Find the latest revid affecting path, up to stop. It may return
468 468 a revision in a different module, since a branch may be moved without
469 469 a change being reported. Return None if computed module does not
470 470 belong to rootmodule subtree.
471 471 """
472 472 if not path.startswith(self.rootmodule):
473 473 # Requests on foreign branches may be forbidden at server level
474 474 self.ui.debug(_('ignoring foreign branch %r\n') % path)
475 475 return None
476 476
477 477 if not stop:
478 478 stop = svn.ra.get_latest_revnum(self.ra)
479 479 try:
480 480 prevmodule = self.reparent('')
481 481 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
482 482 self.reparent(prevmodule)
483 483 except SubversionException:
484 484 dirent = None
485 485 if not dirent:
486 486 raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
487 487
488 488 # stat() gives us the previous revision on this line of development, but
489 489 # it might be in *another module*. Fetch the log and detect renames down
490 490 # to the latest revision.
491 491 stream = self._getlog([path], stop, dirent.created_rev)
492 492 try:
493 493 for entry in stream:
494 494 paths, revnum, author, date, message = entry
495 495 if revnum <= dirent.created_rev:
496 496 break
497 497
498 498 for p in paths:
499 499 if not path.startswith(p) or not paths[p].copyfrom_path:
500 500 continue
501 501 newpath = paths[p].copyfrom_path + path[len(p):]
502 502 self.ui.debug(_("branch renamed from %s to %s at %d\n") %
503 503 (path, newpath, revnum))
504 504 path = newpath
505 505 break
506 506 finally:
507 507 stream.close()
508 508
509 509 if not path.startswith(self.rootmodule):
510 510 self.ui.debug(_('ignoring foreign branch %r\n') % path)
511 511 return None
512 512 return self.revid(dirent.created_rev, path)
513 513
514 514 def get_blacklist(self):
515 515 """Avoid certain revision numbers.
516 516 It is not uncommon for two nearby revisions to cancel each other
517 517 out, e.g. 'I copied trunk into a subdirectory of itself instead
518 518 of making a branch'. The converted repository is significantly
519 519 smaller if we ignore such revisions."""
520 520 self.blacklist = util.set()
521 521 blacklist = self.blacklist
522 522 for line in file("blacklist.txt", "r"):
523 523 if not line.startswith("#"):
524 524 try:
525 525 svn_rev = int(line.strip())
526 526 blacklist.add(svn_rev)
527 527 except ValueError, e:
528 528 pass # not an integer or a comment
529 529
530 530 def is_blacklisted(self, svn_rev):
531 531 return svn_rev in self.blacklist
532 532
533 533 def reparent(self, module):
534 534 """Reparent the svn transport and return the previous parent."""
535 535 if self.prevmodule == module:
536 536 return module
537 537 svnurl = self.baseurl + urllib.quote(module)
538 538 prevmodule = self.prevmodule
539 539 if prevmodule is None:
540 540 prevmodule = ''
541 541 self.ui.debug(_("reparent to %s\n") % svnurl)
542 542 svn.ra.reparent(self.ra, svnurl)
543 543 self.prevmodule = module
544 544 return prevmodule
545 545
546 546 def expandpaths(self, rev, paths, parents):
547 547 entries = []
548 548 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
549 549 copies = {}
550 550
551 551 new_module, revnum = self.revsplit(rev)[1:]
552 552 if new_module != self.module:
553 553 self.module = new_module
554 554 self.reparent(self.module)
555 555
556 556 for path, ent in paths:
557 557 entrypath = self.getrelpath(path)
558 558 entry = entrypath.decode(self.encoding)
559 559
560 560 kind = self._checkpath(entrypath, revnum)
561 561 if kind == svn.core.svn_node_file:
562 562 entries.append(self.recode(entry))
563 563 if not ent.copyfrom_path or not parents:
564 564 continue
565 565 # Copy sources not in parent revisions cannot be represented,
566 566 # ignore their origin for now
567 567 pmodule, prevnum = self.revsplit(parents[0])[1:]
568 568 if ent.copyfrom_rev < prevnum:
569 569 continue
570 570 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
571 571 if not copyfrom_path:
572 572 continue
573 573 self.ui.debug(_("copied to %s from %s@%s\n") %
574 574 (entrypath, copyfrom_path, ent.copyfrom_rev))
575 575 copies[self.recode(entry)] = self.recode(copyfrom_path)
576 576 elif kind == 0: # gone, but had better be a deleted *file*
577 577 self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
578 578
579 579 # if a branch is created but entries are removed in the same
580 580 # changeset, get the right fromrev
581 581 # parents cannot be empty here, you cannot remove things from
582 582 # a root revision.
583 583 uuid, old_module, fromrev = self.revsplit(parents[0])
584 584
585 585 basepath = old_module + "/" + self.getrelpath(path)
586 586 entrypath = basepath
587 587
588 588 def lookup_parts(p):
589 589 rc = None
590 590 parts = p.split("/")
591 591 for i in range(len(parts)):
592 592 part = "/".join(parts[:i])
593 593 info = part, copyfrom.get(part, None)
594 594 if info[1] is not None:
595 595 self.ui.debug(_("Found parent directory %s\n") % info[1])
596 596 rc = info
597 597 return rc
598 598
599 599 self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
600 600
601 601 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
602 602
603 603 # need to remove fragment from lookup_parts and replace with copyfrom_path
604 604 if frompath is not None:
605 605 self.ui.debug(_("munge-o-matic\n"))
606 606 self.ui.debug(entrypath + '\n')
607 607 self.ui.debug(entrypath[len(frompath):] + '\n')
608 608 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
609 609 fromrev = froment.copyfrom_rev
610 610 self.ui.debug(_("Info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
611 611
612 612 # We can avoid the reparent calls if the module has not changed
613 613 # but it probably does not worth the pain.
614 614 prevmodule = self.reparent('')
615 615 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
616 616 self.reparent(prevmodule)
617 617
618 618 if fromkind == svn.core.svn_node_file: # a deleted file
619 619 entries.append(self.recode(entry))
620 620 elif fromkind == svn.core.svn_node_dir:
621 621 # print "Deleted/moved non-file:", revnum, path, ent
622 622 # children = self._find_children(path, revnum - 1)
623 623 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
624 624 # Sometimes this is tricky. For example: in
625 625 # The Subversion Repository revision 6940 a dir
626 626 # was copied and one of its files was deleted
627 627 # from the new location in the same commit. This
628 628 # code can't deal with that yet.
629 629 if ent.action == 'C':
630 630 children = self._find_children(path, fromrev)
631 631 else:
632 632 oroot = entrypath.strip('/')
633 633 nroot = path.strip('/')
634 634 children = self._find_children(oroot, fromrev)
635 635 children = [s.replace(oroot,nroot) for s in children]
636 636 # Mark all [files, not directories] as deleted.
637 637 for child in children:
638 638 # Can we move a child directory and its
639 639 # parent in the same commit? (probably can). Could
640 640 # cause problems if instead of revnum -1,
641 641 # we have to look in (copyfrom_path, revnum - 1)
642 642 entrypath = self.getrelpath("/" + child, module=old_module)
643 643 if entrypath:
644 644 entry = self.recode(entrypath.decode(self.encoding))
645 645 if entry in copies:
646 646 # deleted file within a copy
647 647 del copies[entry]
648 648 else:
649 649 entries.append(entry)
650 650 else:
651 651 self.ui.debug(_('unknown path in revision %d: %s\n') % \
652 652 (revnum, path))
653 653 elif kind == svn.core.svn_node_dir:
654 654 # Should probably synthesize normal file entries
655 655 # and handle as above to clean up copy/rename handling.
656 656
657 657 # If the directory just had a prop change,
658 658 # then we shouldn't need to look for its children.
659 659 if ent.action == 'M':
660 660 continue
661 661
662 662 # Also this could create duplicate entries. Not sure
663 663 # whether this will matter. Maybe should make entries a set.
664 664 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
665 665 # This will fail if a directory was copied
666 666 # from another branch and then some of its files
667 667 # were deleted in the same transaction.
668 668 children = util.sort(self._find_children(path, revnum))
669 669 for child in children:
670 670 # Can we move a child directory and its
671 671 # parent in the same commit? (probably can). Could
672 672 # cause problems if instead of revnum -1,
673 673 # we have to look in (copyfrom_path, revnum - 1)
674 674 entrypath = self.getrelpath("/" + child)
675 675 # print child, self.module, entrypath
676 676 if entrypath:
677 677 # Need to filter out directories here...
678 678 kind = self._checkpath(entrypath, revnum)
679 679 if kind != svn.core.svn_node_dir:
680 680 entries.append(self.recode(entrypath))
681 681
682 682 # Copies here (must copy all from source)
683 683 # Probably not a real problem for us if
684 684 # source does not exist
685 685 if not ent.copyfrom_path or not parents:
686 686 continue
687 687 # Copy sources not in parent revisions cannot be represented,
688 688 # ignore their origin for now
689 689 pmodule, prevnum = self.revsplit(parents[0])[1:]
690 690 if ent.copyfrom_rev < prevnum:
691 691 continue
692 692 copyfrompath = ent.copyfrom_path.decode(self.encoding)
693 693 copyfrompath = self.getrelpath(copyfrompath, pmodule)
694 694 if not copyfrompath:
695 695 continue
696 696 copyfrom[path] = ent
697 697 self.ui.debug(_("mark %s came from %s:%d\n")
698 698 % (path, copyfrompath, ent.copyfrom_rev))
699 699 children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
700 700 children.sort()
701 701 for child in children:
702 702 entrypath = self.getrelpath("/" + child, pmodule)
703 703 if not entrypath:
704 704 continue
705 705 entry = entrypath.decode(self.encoding)
706 706 copytopath = path + entry[len(copyfrompath):]
707 707 copytopath = self.getrelpath(copytopath)
708 708 copies[self.recode(copytopath)] = self.recode(entry, pmodule)
709 709
710 710 return (util.unique(entries), copies)
711 711
712 712 def _fetch_revisions(self, from_revnum, to_revnum):
713 713 if from_revnum < to_revnum:
714 714 from_revnum, to_revnum = to_revnum, from_revnum
715 715
716 716 self.child_cset = None
717 717
718 718 def isdescendantof(parent, child):
719 719 if not child or not parent or not child.startswith(parent):
720 720 return False
721 721 subpath = child[len(parent):]
722 722 return len(subpath) > 1 and subpath[0] == '/'
723 723
724 724 def parselogentry(orig_paths, revnum, author, date, message):
725 725 """Return the parsed commit object or None, and True if
726 726 the revision is a branch root.
727 727 """
728 728 self.ui.debug(_("parsing revision %d (%d changes)\n") %
729 729 (revnum, len(orig_paths)))
730 730
731 731 branched = False
732 732 rev = self.revid(revnum)
733 733 # branch log might return entries for a parent we already have
734 734
735 735 if (rev in self.commits or revnum < to_revnum):
736 736 return None, branched
737 737
738 738 parents = []
739 739 # check whether this revision is the start of a branch or part
740 740 # of a branch renaming
741 741 orig_paths = util.sort(orig_paths.items())
742 742 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
743 743 if root_paths:
744 744 path, ent = root_paths[-1]
745 745 if ent.copyfrom_path:
746 746 # If dir was moved while one of its file was removed
747 747 # the log may look like:
748 748 # A /dir (from /dir:x)
749 749 # A /dir/a (from /dir/a:y)
750 750 # A /dir/b (from /dir/b:z)
751 751 # ...
752 752 # for all remaining children.
753 753 # Let's take the highest child element from rev as source.
754 754 copies = [(p,e) for p,e in orig_paths[:-1]
755 755 if isdescendantof(ent.copyfrom_path, e.copyfrom_path)]
756 756 fromrev = max([e.copyfrom_rev for p,e in copies] + [ent.copyfrom_rev])
757 757 branched = True
758 758 newpath = ent.copyfrom_path + self.module[len(path):]
759 759 # ent.copyfrom_rev may not be the actual last revision
760 760 previd = self.latest(newpath, fromrev)
761 761 if previd is not None:
762 762 prevmodule, prevnum = self.revsplit(previd)[1:]
763 763 if prevnum >= self.startrev:
764 764 parents = [previd]
765 765 self.ui.note(_('found parent of branch %s at %d: %s\n') %
766 766 (self.module, prevnum, prevmodule))
767 767 else:
768 768 self.ui.debug(_("No copyfrom path, don't know what to do.\n"))
769 769
770 770 paths = []
771 771 # filter out unrelated paths
772 772 for path, ent in orig_paths:
773 773 if self.getrelpath(path) is None:
774 774 continue
775 775 paths.append((path, ent))
776 776
777 777 # Example SVN datetime. Includes microseconds.
778 778 # ISO-8601 conformant
779 779 # '2007-01-04T17:35:00.902377Z'
780 780 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
781 781
782 782 log = message and self.recode(message) or ''
783 783 author = author and self.recode(author) or ''
784 784 try:
785 785 branch = self.module.split("/")[-1]
786 786 if branch == 'trunk':
787 787 branch = ''
788 788 except IndexError:
789 789 branch = None
790 790
791 791 cset = commit(author=author,
792 792 date=util.datestr(date),
793 793 desc=log,
794 794 parents=parents,
795 795 branch=branch,
796 796 rev=rev.encode('utf-8'))
797 797
798 798 self.commits[rev] = cset
799 799 # The parents list is *shared* among self.paths and the
800 800 # commit object. Both will be updated below.
801 801 self.paths[rev] = (paths, cset.parents)
802 802 if self.child_cset and not self.child_cset.parents:
803 803 self.child_cset.parents[:] = [rev]
804 804 self.child_cset = cset
805 805 return cset, branched
806 806
807 807 self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
808 808 (self.module, from_revnum, to_revnum))
809 809
810 810 try:
811 811 firstcset = None
812 812 lastonbranch = False
813 813 stream = self._getlog([self.module], from_revnum, to_revnum)
814 814 try:
815 815 for entry in stream:
816 816 paths, revnum, author, date, message = entry
817 817 if revnum < self.startrev:
818 818 lastonbranch = True
819 819 break
820 820 if self.is_blacklisted(revnum):
821 821 self.ui.note(_('skipping blacklisted revision %d\n')
822 822 % revnum)
823 823 continue
824 824 if paths is None:
825 825 self.ui.debug(_('revision %d has no entries\n') % revnum)
826 826 continue
827 827 cset, lastonbranch = parselogentry(paths, revnum, author,
828 828 date, message)
829 829 if cset:
830 830 firstcset = cset
831 831 if lastonbranch:
832 832 break
833 833 finally:
834 834 stream.close()
835 835
836 836 if not lastonbranch and firstcset and not firstcset.parents:
837 837 # The first revision of the sequence (the last fetched one)
838 838 # has invalid parents if not a branch root. Find the parent
839 839 # revision now, if any.
840 840 try:
841 841 firstrevnum = self.revnum(firstcset.rev)
842 842 if firstrevnum > 1:
843 843 latest = self.latest(self.module, firstrevnum - 1)
844 844 if latest:
845 845 firstcset.parents.append(latest)
846 846 except SvnPathNotFound:
847 847 pass
848 848 except SubversionException, (inst, num):
849 849 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
850 850 raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
851 851 raise
852 852
853 853 def _getfile(self, file, rev):
854 io = StringIO()
855 854 # TODO: ra.get_file transmits the whole file instead of diffs.
856 855 mode = ''
857 856 try:
858 857 new_module, revnum = self.revsplit(rev)[1:]
859 858 if self.module != new_module:
860 859 self.module = new_module
861 860 self.reparent(self.module)
861 io = StringIO()
862 862 info = svn.ra.get_file(self.ra, file, revnum, io)
863 data = io.getvalue()
864 # ra.get_files() seems to keep a reference on the input buffer
865 # preventing collection. Release it explicitely.
866 io.close()
863 867 if isinstance(info, list):
864 868 info = info[-1]
865 869 mode = ("svn:executable" in info) and 'x' or ''
866 870 mode = ("svn:special" in info) and 'l' or mode
867 871 except SubversionException, e:
868 872 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
869 873 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
870 874 if e.apr_err in notfound: # File not found
871 875 raise IOError()
872 876 raise
873 data = io.getvalue()
874 877 if mode == 'l':
875 878 link_prefix = "link "
876 879 if data.startswith(link_prefix):
877 880 data = data[len(link_prefix):]
878 881 return data, mode
879 882
880 883 def _find_children(self, path, revnum):
881 884 path = path.strip('/')
882 885 pool = Pool()
883 886 rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
884 887 return ['%s/%s' % (path, x) for x in
885 888 svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
886 889
887 890 def getrelpath(self, path, module=None):
888 891 if module is None:
889 892 module = self.module
890 893 # Given the repository url of this wc, say
891 894 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
892 895 # extract the "entry" portion (a relative path) from what
893 896 # svn log --xml says, ie
894 897 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
895 898 # that is to say "tests/PloneTestCase.py"
896 899 if path.startswith(module):
897 900 relative = path.rstrip('/')[len(module):]
898 901 if relative.startswith('/'):
899 902 return relative[1:]
900 903 elif relative == '':
901 904 return relative
902 905
903 906 # The path is outside our tracked tree...
904 907 self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
905 908 return None
906 909
907 910 def _checkpath(self, path, revnum):
908 911 # ra.check_path does not like leading slashes very much, it leads
909 912 # to PROPFIND subversion errors
910 913 return svn.ra.check_path(self.ra, path.strip('/'), revnum)
911 914
912 915 def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
913 916 strict_node_history=False):
914 917 # Normalize path names, svn >= 1.5 only wants paths relative to
915 918 # supplied URL
916 919 relpaths = []
917 920 for p in paths:
918 921 if not p.startswith('/'):
919 922 p = self.module + '/' + p
920 923 relpaths.append(p.strip('/'))
921 924 args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
922 925 strict_node_history]
923 926 arg = encodeargs(args)
924 927 hgexe = util.hgexecutable()
925 928 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
926 929 stdin, stdout = util.popen2(cmd, 'b')
927 930 stdin.write(arg)
928 931 stdin.close()
929 932 return logstream(stdout)
930 933
931 934 pre_revprop_change = '''#!/bin/sh
932 935
933 936 REPOS="$1"
934 937 REV="$2"
935 938 USER="$3"
936 939 PROPNAME="$4"
937 940 ACTION="$5"
938 941
939 942 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
940 943 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
941 944 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
942 945
943 946 echo "Changing prohibited revision property" >&2
944 947 exit 1
945 948 '''
946 949
947 950 class svn_sink(converter_sink, commandline):
948 951 commit_re = re.compile(r'Committed revision (\d+).', re.M)
949 952
950 953 def prerun(self):
951 954 if self.wc:
952 955 os.chdir(self.wc)
953 956
954 957 def postrun(self):
955 958 if self.wc:
956 959 os.chdir(self.cwd)
957 960
958 961 def join(self, name):
959 962 return os.path.join(self.wc, '.svn', name)
960 963
961 964 def revmapfile(self):
962 965 return self.join('hg-shamap')
963 966
964 967 def authorfile(self):
965 968 return self.join('hg-authormap')
966 969
967 970 def __init__(self, ui, path):
968 971 converter_sink.__init__(self, ui, path)
969 972 commandline.__init__(self, ui, 'svn')
970 973 self.delete = []
971 974 self.setexec = []
972 975 self.delexec = []
973 976 self.copies = []
974 977 self.wc = None
975 978 self.cwd = os.getcwd()
976 979
977 980 path = os.path.realpath(path)
978 981
979 982 created = False
980 983 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
981 984 self.wc = path
982 985 self.run0('update')
983 986 else:
984 987 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
985 988
986 989 if os.path.isdir(os.path.dirname(path)):
987 990 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
988 991 ui.status(_('initializing svn repo %r\n') %
989 992 os.path.basename(path))
990 993 commandline(ui, 'svnadmin').run0('create', path)
991 994 created = path
992 995 path = util.normpath(path)
993 996 if not path.startswith('/'):
994 997 path = '/' + path
995 998 path = 'file://' + path
996 999
997 1000 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
998 1001 self.run0('checkout', path, wcpath)
999 1002
1000 1003 self.wc = wcpath
1001 1004 self.opener = util.opener(self.wc)
1002 1005 self.wopener = util.opener(self.wc)
1003 1006 self.childmap = mapfile(ui, self.join('hg-childmap'))
1004 1007 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
1005 1008
1006 1009 if created:
1007 1010 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
1008 1011 fp = open(hook, 'w')
1009 1012 fp.write(pre_revprop_change)
1010 1013 fp.close()
1011 1014 util.set_flags(hook, False, True)
1012 1015
1013 1016 xport = transport.SvnRaTransport(url=geturl(path))
1014 1017 self.uuid = svn.ra.get_uuid(xport.ra)
1015 1018
1016 1019 def wjoin(self, *names):
1017 1020 return os.path.join(self.wc, *names)
1018 1021
1019 1022 def putfile(self, filename, flags, data):
1020 1023 if 'l' in flags:
1021 1024 self.wopener.symlink(data, filename)
1022 1025 else:
1023 1026 try:
1024 1027 if os.path.islink(self.wjoin(filename)):
1025 1028 os.unlink(filename)
1026 1029 except OSError:
1027 1030 pass
1028 1031 self.wopener(filename, 'w').write(data)
1029 1032
1030 1033 if self.is_exec:
1031 1034 was_exec = self.is_exec(self.wjoin(filename))
1032 1035 else:
1033 1036 # On filesystems not supporting execute-bit, there is no way
1034 1037 # to know if it is set but asking subversion. Setting it
1035 1038 # systematically is just as expensive and much simpler.
1036 1039 was_exec = 'x' not in flags
1037 1040
1038 1041 util.set_flags(self.wjoin(filename), False, 'x' in flags)
1039 1042 if was_exec:
1040 1043 if 'x' not in flags:
1041 1044 self.delexec.append(filename)
1042 1045 else:
1043 1046 if 'x' in flags:
1044 1047 self.setexec.append(filename)
1045 1048
1046 1049 def _copyfile(self, source, dest):
1047 1050 # SVN's copy command pukes if the destination file exists, but
1048 1051 # our copyfile method expects to record a copy that has
1049 1052 # already occurred. Cross the semantic gap.
1050 1053 wdest = self.wjoin(dest)
1051 1054 exists = os.path.exists(wdest)
1052 1055 if exists:
1053 1056 fd, tempname = tempfile.mkstemp(
1054 1057 prefix='hg-copy-', dir=os.path.dirname(wdest))
1055 1058 os.close(fd)
1056 1059 os.unlink(tempname)
1057 1060 os.rename(wdest, tempname)
1058 1061 try:
1059 1062 self.run0('copy', source, dest)
1060 1063 finally:
1061 1064 if exists:
1062 1065 try:
1063 1066 os.unlink(wdest)
1064 1067 except OSError:
1065 1068 pass
1066 1069 os.rename(tempname, wdest)
1067 1070
1068 1071 def dirs_of(self, files):
1069 1072 dirs = util.set()
1070 1073 for f in files:
1071 1074 if os.path.isdir(self.wjoin(f)):
1072 1075 dirs.add(f)
1073 1076 for i in strutil.rfindall(f, '/'):
1074 1077 dirs.add(f[:i])
1075 1078 return dirs
1076 1079
1077 1080 def add_dirs(self, files):
1078 1081 add_dirs = [d for d in util.sort(self.dirs_of(files))
1079 1082 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
1080 1083 if add_dirs:
1081 1084 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
1082 1085 return add_dirs
1083 1086
1084 1087 def add_files(self, files):
1085 1088 if files:
1086 1089 self.xargs(files, 'add', quiet=True)
1087 1090 return files
1088 1091
1089 1092 def tidy_dirs(self, names):
1090 1093 dirs = util.sort(self.dirs_of(names))
1091 1094 dirs.reverse()
1092 1095 deleted = []
1093 1096 for d in dirs:
1094 1097 wd = self.wjoin(d)
1095 1098 if os.listdir(wd) == '.svn':
1096 1099 self.run0('delete', d)
1097 1100 deleted.append(d)
1098 1101 return deleted
1099 1102
1100 1103 def addchild(self, parent, child):
1101 1104 self.childmap[parent] = child
1102 1105
1103 1106 def revid(self, rev):
1104 1107 return u"svn:%s@%s" % (self.uuid, rev)
1105 1108
1106 1109 def putcommit(self, files, copies, parents, commit, source):
1107 1110 # Apply changes to working copy
1108 1111 for f, v in files:
1109 1112 try:
1110 1113 data = source.getfile(f, v)
1111 1114 except IOError, inst:
1112 1115 self.delete.append(f)
1113 1116 else:
1114 1117 e = source.getmode(f, v)
1115 1118 self.putfile(f, e, data)
1116 1119 if f in copies:
1117 1120 self.copies.append([copies[f], f])
1118 1121 files = [f[0] for f in files]
1119 1122
1120 1123 for parent in parents:
1121 1124 try:
1122 1125 return self.revid(self.childmap[parent])
1123 1126 except KeyError:
1124 1127 pass
1125 1128 entries = util.set(self.delete)
1126 1129 files = util.frozenset(files)
1127 1130 entries.update(self.add_dirs(files.difference(entries)))
1128 1131 if self.copies:
1129 1132 for s, d in self.copies:
1130 1133 self._copyfile(s, d)
1131 1134 self.copies = []
1132 1135 if self.delete:
1133 1136 self.xargs(self.delete, 'delete')
1134 1137 self.delete = []
1135 1138 entries.update(self.add_files(files.difference(entries)))
1136 1139 entries.update(self.tidy_dirs(entries))
1137 1140 if self.delexec:
1138 1141 self.xargs(self.delexec, 'propdel', 'svn:executable')
1139 1142 self.delexec = []
1140 1143 if self.setexec:
1141 1144 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1142 1145 self.setexec = []
1143 1146
1144 1147 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1145 1148 fp = os.fdopen(fd, 'w')
1146 1149 fp.write(commit.desc)
1147 1150 fp.close()
1148 1151 try:
1149 1152 output = self.run0('commit',
1150 1153 username=util.shortuser(commit.author),
1151 1154 file=messagefile,
1152 1155 encoding='utf-8')
1153 1156 try:
1154 1157 rev = self.commit_re.search(output).group(1)
1155 1158 except AttributeError:
1156 1159 self.ui.warn(_('unexpected svn output:\n'))
1157 1160 self.ui.warn(output)
1158 1161 raise util.Abort(_('unable to cope with svn output'))
1159 1162 if commit.rev:
1160 1163 self.run('propset', 'hg:convert-rev', commit.rev,
1161 1164 revprop=True, revision=rev)
1162 1165 if commit.branch and commit.branch != 'default':
1163 1166 self.run('propset', 'hg:convert-branch', commit.branch,
1164 1167 revprop=True, revision=rev)
1165 1168 for parent in parents:
1166 1169 self.addchild(parent, rev)
1167 1170 return self.revid(rev)
1168 1171 finally:
1169 1172 os.unlink(messagefile)
1170 1173
1171 1174 def puttags(self, tags):
1172 1175 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now