##// END OF EJS Templates
convert: avoid svn log retrieval process cleanup...
Patrick Mezard -
r6397:e1402cf5 default
parent child Browse files
Show More
@@ -1,1073 +1,1077
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 # With large history, cleanup process goes crazy and suddenly
99 # consumes *huge* amount of memory. The output file being closed,
100 # there is no need for clean termination.
101 os._exit(0)
98 102
99 103 def debugsvnlog(ui, **opts):
100 104 """Fetch SVN log in a subprocess and channel them back to parent to
101 105 avoid memory collection issues.
102 106 """
103 107 util.set_binary(sys.stdin)
104 108 util.set_binary(sys.stdout)
105 109 args = decodeargs(sys.stdin.read())
106 110 get_log_child(sys.stdout, *args)
107 111
108 112 class logstream:
109 113 """Interruptible revision log iterator."""
110 114 def __init__(self, stdout):
111 115 self._stdout = stdout
112 116
113 117 def __iter__(self):
114 118 while True:
115 119 entry = pickle.load(self._stdout)
116 120 try:
117 121 orig_paths, revnum, author, date, message = entry
118 122 except:
119 123 if entry is None:
120 124 break
121 125 raise SubversionException("child raised exception", entry)
122 126 yield entry
123 127
124 128 def close(self):
125 129 if self._stdout:
126 130 self._stdout.close()
127 131 self._stdout = None
128 132
129 133 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
130 134 strict_node_history=False):
131 135 args = [url, paths, start, end, limit, discover_changed_paths,
132 136 strict_node_history]
133 137 arg = encodeargs(args)
134 138 hgexe = util.hgexecutable()
135 139 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
136 140 stdin, stdout = os.popen2(cmd, 'b')
137 141 stdin.write(arg)
138 142 stdin.close()
139 143 return logstream(stdout)
140 144
141 145 # SVN conversion code stolen from bzr-svn and tailor
142 146 #
143 147 # Subversion looks like a versioned filesystem, branches structures
144 148 # are defined by conventions and not enforced by the tool. First,
145 149 # we define the potential branches (modules) as "trunk" and "branches"
146 150 # children directories. Revisions are then identified by their
147 151 # module and revision number (and a repository identifier).
148 152 #
149 153 # The revision graph is really a tree (or a forest). By default, a
150 154 # revision parent is the previous revision in the same module. If the
151 155 # module directory is copied/moved from another module then the
152 156 # revision is the module root and its parent the source revision in
153 157 # the parent module. A revision has at most one parent.
154 158 #
155 159 class svn_source(converter_source):
156 160 def __init__(self, ui, url, rev=None):
157 161 super(svn_source, self).__init__(ui, url, rev=rev)
158 162
159 163 try:
160 164 SubversionException
161 165 except NameError:
162 166 raise NoRepo('Subversion python bindings could not be loaded')
163 167
164 168 self.encoding = locale.getpreferredencoding()
165 169 self.lastrevs = {}
166 170
167 171 latest = None
168 172 try:
169 173 # Support file://path@rev syntax. Useful e.g. to convert
170 174 # deleted branches.
171 175 at = url.rfind('@')
172 176 if at >= 0:
173 177 latest = int(url[at+1:])
174 178 url = url[:at]
175 179 except ValueError, e:
176 180 pass
177 181 self.url = geturl(url)
178 182 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
179 183 try:
180 184 self.transport = transport.SvnRaTransport(url=self.url)
181 185 self.ra = self.transport.ra
182 186 self.ctx = self.transport.client
183 187 self.base = svn.ra.get_repos_root(self.ra)
184 188 self.module = self.url[len(self.base):]
185 189 self.rootmodule = self.module
186 190 self.commits = {}
187 191 self.paths = {}
188 192 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
189 193 except SubversionException, e:
190 194 ui.print_exc()
191 195 raise NoRepo("%s does not look like a Subversion repo" % self.url)
192 196
193 197 if rev:
194 198 try:
195 199 latest = int(rev)
196 200 except ValueError:
197 201 raise util.Abort('svn: revision %s is not an integer' % rev)
198 202
199 203 self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
200 204 try:
201 205 self.startrev = int(self.startrev)
202 206 if self.startrev < 0:
203 207 self.startrev = 0
204 208 except ValueError:
205 209 raise util.Abort(_('svn: start revision %s is not an integer')
206 210 % self.startrev)
207 211
208 212 try:
209 213 self.get_blacklist()
210 214 except IOError, e:
211 215 pass
212 216
213 217 self.head = self.latest(self.module, latest)
214 218 if not self.head:
215 219 raise util.Abort(_('no revision found in module %s') %
216 220 self.module.encode(self.encoding))
217 221 self.last_changed = self.revnum(self.head)
218 222
219 223 self._changescache = None
220 224
221 225 if os.path.exists(os.path.join(url, '.svn/entries')):
222 226 self.wc = url
223 227 else:
224 228 self.wc = None
225 229 self.convertfp = None
226 230
227 231 def setrevmap(self, revmap):
228 232 lastrevs = {}
229 233 for revid in revmap.iterkeys():
230 234 uuid, module, revnum = self.revsplit(revid)
231 235 lastrevnum = lastrevs.setdefault(module, revnum)
232 236 if revnum > lastrevnum:
233 237 lastrevs[module] = revnum
234 238 self.lastrevs = lastrevs
235 239
236 240 def exists(self, path, optrev):
237 241 try:
238 242 svn.client.ls(self.url.rstrip('/') + '/' + path,
239 243 optrev, False, self.ctx)
240 244 return True
241 245 except SubversionException, err:
242 246 return False
243 247
244 248 def getheads(self):
245 249
246 250 def getcfgpath(name, rev):
247 251 cfgpath = self.ui.config('convert', 'svn.' + name)
248 252 if cfgpath is not None and cfgpath.strip() == '':
249 253 return None
250 254 path = (cfgpath or name).strip('/')
251 255 if not self.exists(path, rev):
252 256 if cfgpath:
253 257 raise util.Abort(_('expected %s to be at %r, but not found')
254 258 % (name, path))
255 259 return None
256 260 self.ui.note(_('found %s at %r\n') % (name, path))
257 261 return path
258 262
259 263 rev = optrev(self.last_changed)
260 264 oldmodule = ''
261 265 trunk = getcfgpath('trunk', rev)
262 266 tags = getcfgpath('tags', rev)
263 267 branches = getcfgpath('branches', rev)
264 268
265 269 # If the project has a trunk or branches, we will extract heads
266 270 # from them. We keep the project root otherwise.
267 271 if trunk:
268 272 oldmodule = self.module or ''
269 273 self.module += '/' + trunk
270 274 self.head = self.latest(self.module, self.last_changed)
271 275 if not self.head:
272 276 raise util.Abort(_('no revision found in module %s') %
273 277 self.module.encode(self.encoding))
274 278
275 279 # First head in the list is the module's head
276 280 self.heads = [self.head]
277 281 self.tags = '%s/%s' % (oldmodule , (tags or 'tags'))
278 282
279 283 # Check if branches bring a few more heads to the list
280 284 if branches:
281 285 rpath = self.url.strip('/')
282 286 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
283 287 self.ctx)
284 288 for branch in branchnames.keys():
285 289 module = '%s/%s/%s' % (oldmodule, branches, branch)
286 290 brevid = self.latest(module, self.last_changed)
287 291 if not brevid:
288 292 self.ui.note(_('ignoring empty branch %s\n') %
289 293 branch.encode(self.encoding))
290 294 continue
291 295 self.ui.note('found branch %s at %d\n' %
292 296 (branch, self.revnum(brevid)))
293 297 self.heads.append(brevid)
294 298
295 299 if self.startrev and self.heads:
296 300 if len(self.heads) > 1:
297 301 raise util.Abort(_('svn: start revision is not supported with '
298 302 'with more than one branch'))
299 303 revnum = self.revnum(self.heads[0])
300 304 if revnum < self.startrev:
301 305 raise util.Abort(_('svn: no revision found after start revision %d')
302 306 % self.startrev)
303 307
304 308 return self.heads
305 309
306 310 def getfile(self, file, rev):
307 311 data, mode = self._getfile(file, rev)
308 312 self.modecache[(file, rev)] = mode
309 313 return data
310 314
311 315 def getmode(self, file, rev):
312 316 return self.modecache[(file, rev)]
313 317
314 318 def getchanges(self, rev):
315 319 if self._changescache and self._changescache[0] == rev:
316 320 return self._changescache[1]
317 321 self._changescache = None
318 322 self.modecache = {}
319 323 (paths, parents) = self.paths[rev]
320 324 if parents:
321 325 files, copies = self.expandpaths(rev, paths, parents)
322 326 else:
323 327 # Perform a full checkout on roots
324 328 uuid, module, revnum = self.revsplit(rev)
325 329 entries = svn.client.ls(self.base + module, optrev(revnum),
326 330 True, self.ctx)
327 331 files = [n for n,e in entries.iteritems()
328 332 if e.kind == svn.core.svn_node_file]
329 333 copies = {}
330 334
331 335 files.sort()
332 336 files = zip(files, [rev] * len(files))
333 337
334 338 # caller caches the result, so free it here to release memory
335 339 del self.paths[rev]
336 340 return (files, copies)
337 341
338 342 def getchangedfiles(self, rev, i):
339 343 changes = self.getchanges(rev)
340 344 self._changescache = (rev, changes)
341 345 return [f[0] for f in changes[0]]
342 346
343 347 def getcommit(self, rev):
344 348 if rev not in self.commits:
345 349 uuid, module, revnum = self.revsplit(rev)
346 350 self.module = module
347 351 self.reparent(module)
348 352 # We assume that:
349 353 # - requests for revisions after "stop" come from the
350 354 # revision graph backward traversal. Cache all of them
351 355 # down to stop, they will be used eventually.
352 356 # - requests for revisions before "stop" come to get
353 357 # isolated branches parents. Just fetch what is needed.
354 358 stop = self.lastrevs.get(module, 0)
355 359 if revnum < stop:
356 360 stop = revnum + 1
357 361 self._fetch_revisions(revnum, stop)
358 362 commit = self.commits[rev]
359 363 # caller caches the result, so free it here to release memory
360 364 del self.commits[rev]
361 365 return commit
362 366
363 367 def gettags(self):
364 368 tags = {}
365 369 if self.tags is None:
366 370 return tags
367 371
368 372 start = self.revnum(self.head)
369 373 try:
370 374 for entry in get_log(self.url, [self.tags], self.startrev, start):
371 375 orig_paths, revnum, author, date, message = entry
372 376 for path in orig_paths:
373 377 if not path.startswith(self.tags+'/'):
374 378 continue
375 379 ent = orig_paths[path]
376 380 source = ent.copyfrom_path
377 381 rev = ent.copyfrom_rev
378 382 tag = path.split('/')[-1]
379 383 tags[tag] = self.revid(rev, module=source)
380 384 except SubversionException, (inst, num):
381 385 self.ui.note('no tags found at revision %d\n' % start)
382 386 return tags
383 387
384 388 def converted(self, rev, destrev):
385 389 if not self.wc:
386 390 return
387 391 if self.convertfp is None:
388 392 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
389 393 'a')
390 394 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
391 395 self.convertfp.flush()
392 396
393 397 # -- helper functions --
394 398
395 399 def revid(self, revnum, module=None):
396 400 if not module:
397 401 module = self.module
398 402 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
399 403 revnum)
400 404
401 405 def revnum(self, rev):
402 406 return int(rev.split('@')[-1])
403 407
404 408 def revsplit(self, rev):
405 409 url, revnum = rev.encode(self.encoding).split('@', 1)
406 410 revnum = int(revnum)
407 411 parts = url.split('/', 1)
408 412 uuid = parts.pop(0)[4:]
409 413 mod = ''
410 414 if parts:
411 415 mod = '/' + parts[0]
412 416 return uuid, mod, revnum
413 417
414 418 def latest(self, path, stop=0):
415 419 """Find the latest revid affecting path, up to stop. It may return
416 420 a revision in a different module, since a branch may be moved without
417 421 a change being reported. Return None if computed module does not
418 422 belong to rootmodule subtree.
419 423 """
420 424 if not path.startswith(self.rootmodule):
421 425 # Requests on foreign branches may be forbidden at server level
422 426 self.ui.debug(_('ignoring foreign branch %r\n') % path)
423 427 return None
424 428
425 429 if not stop:
426 430 stop = svn.ra.get_latest_revnum(self.ra)
427 431 try:
428 432 self.reparent('')
429 433 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
430 434 self.reparent(self.module)
431 435 except SubversionException:
432 436 dirent = None
433 437 if not dirent:
434 438 raise util.Abort('%s not found up to revision %d' % (path, stop))
435 439
436 440 # stat() gives us the previous revision on this line of development, but
437 441 # it might be in *another module*. Fetch the log and detect renames down
438 442 # to the latest revision.
439 443 stream = get_log(self.url, [path], stop, dirent.created_rev)
440 444 try:
441 445 for entry in stream:
442 446 paths, revnum, author, date, message = entry
443 447 if revnum <= dirent.created_rev:
444 448 break
445 449
446 450 for p in paths:
447 451 if not path.startswith(p) or not paths[p].copyfrom_path:
448 452 continue
449 453 newpath = paths[p].copyfrom_path + path[len(p):]
450 454 self.ui.debug("branch renamed from %s to %s at %d\n" %
451 455 (path, newpath, revnum))
452 456 path = newpath
453 457 break
454 458 finally:
455 459 stream.close()
456 460
457 461 if not path.startswith(self.rootmodule):
458 462 self.ui.debug(_('ignoring foreign branch %r\n') % path)
459 463 return None
460 464 return self.revid(dirent.created_rev, path)
461 465
462 466 def get_blacklist(self):
463 467 """Avoid certain revision numbers.
464 468 It is not uncommon for two nearby revisions to cancel each other
465 469 out, e.g. 'I copied trunk into a subdirectory of itself instead
466 470 of making a branch'. The converted repository is significantly
467 471 smaller if we ignore such revisions."""
468 472 self.blacklist = util.set()
469 473 blacklist = self.blacklist
470 474 for line in file("blacklist.txt", "r"):
471 475 if not line.startswith("#"):
472 476 try:
473 477 svn_rev = int(line.strip())
474 478 blacklist.add(svn_rev)
475 479 except ValueError, e:
476 480 pass # not an integer or a comment
477 481
478 482 def is_blacklisted(self, svn_rev):
479 483 return svn_rev in self.blacklist
480 484
481 485 def reparent(self, module):
482 486 svn_url = self.base + module
483 487 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
484 488 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
485 489
486 490 def expandpaths(self, rev, paths, parents):
487 491 def get_entry_from_path(path, module=self.module):
488 492 # Given the repository url of this wc, say
489 493 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
490 494 # extract the "entry" portion (a relative path) from what
491 495 # svn log --xml says, ie
492 496 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
493 497 # that is to say "tests/PloneTestCase.py"
494 498 if path.startswith(module):
495 499 relative = path[len(module):]
496 500 if relative.startswith('/'):
497 501 return relative[1:]
498 502 else:
499 503 return relative
500 504
501 505 # The path is outside our tracked tree...
502 506 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
503 507 return None
504 508
505 509 entries = []
506 510 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
507 511 copies = {}
508 512
509 513 new_module, revnum = self.revsplit(rev)[1:]
510 514 if new_module != self.module:
511 515 self.module = new_module
512 516 self.reparent(self.module)
513 517
514 518 for path, ent in paths:
515 519 entrypath = get_entry_from_path(path, module=self.module)
516 520 entry = entrypath.decode(self.encoding)
517 521
518 522 kind = svn.ra.check_path(self.ra, entrypath, revnum)
519 523 if kind == svn.core.svn_node_file:
520 524 if ent.copyfrom_path:
521 525 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
522 526 if copyfrom_path:
523 527 self.ui.debug("Copied to %s from %s@%s\n" %
524 528 (entrypath, copyfrom_path,
525 529 ent.copyfrom_rev))
526 530 # It's probably important for hg that the source
527 531 # exists in the revision's parent, not just the
528 532 # ent.copyfrom_rev
529 533 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
530 534 if fromkind != 0:
531 535 copies[self.recode(entry)] = self.recode(copyfrom_path)
532 536 entries.append(self.recode(entry))
533 537 elif kind == 0: # gone, but had better be a deleted *file*
534 538 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
535 539
536 540 # if a branch is created but entries are removed in the same
537 541 # changeset, get the right fromrev
538 542 # parents cannot be empty here, you cannot remove things from
539 543 # a root revision.
540 544 uuid, old_module, fromrev = self.revsplit(parents[0])
541 545
542 546 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
543 547 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
544 548
545 549 def lookup_parts(p):
546 550 rc = None
547 551 parts = p.split("/")
548 552 for i in range(len(parts)):
549 553 part = "/".join(parts[:i])
550 554 info = part, copyfrom.get(part, None)
551 555 if info[1] is not None:
552 556 self.ui.debug("Found parent directory %s\n" % info[1])
553 557 rc = info
554 558 return rc
555 559
556 560 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
557 561
558 562 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
559 563
560 564 # need to remove fragment from lookup_parts and replace with copyfrom_path
561 565 if frompath is not None:
562 566 self.ui.debug("munge-o-matic\n")
563 567 self.ui.debug(entrypath + '\n')
564 568 self.ui.debug(entrypath[len(frompath):] + '\n')
565 569 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
566 570 fromrev = froment.copyfrom_rev
567 571 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
568 572
569 573 # We can avoid the reparent calls if the module has not changed
570 574 # but it probably does not worth the pain.
571 575 self.reparent('')
572 576 fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
573 577 self.reparent(self.module)
574 578
575 579 if fromkind == svn.core.svn_node_file: # a deleted file
576 580 entries.append(self.recode(entry))
577 581 elif fromkind == svn.core.svn_node_dir:
578 582 # print "Deleted/moved non-file:", revnum, path, ent
579 583 # children = self._find_children(path, revnum - 1)
580 584 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
581 585 # Sometimes this is tricky. For example: in
582 586 # The Subversion Repository revision 6940 a dir
583 587 # was copied and one of its files was deleted
584 588 # from the new location in the same commit. This
585 589 # code can't deal with that yet.
586 590 if ent.action == 'C':
587 591 children = self._find_children(path, fromrev)
588 592 else:
589 593 oroot = entrypath.strip('/')
590 594 nroot = path.strip('/')
591 595 children = self._find_children(oroot, fromrev)
592 596 children = [s.replace(oroot,nroot) for s in children]
593 597 # Mark all [files, not directories] as deleted.
594 598 for child in children:
595 599 # Can we move a child directory and its
596 600 # parent in the same commit? (probably can). Could
597 601 # cause problems if instead of revnum -1,
598 602 # we have to look in (copyfrom_path, revnum - 1)
599 603 entrypath = get_entry_from_path("/" + child, module=old_module)
600 604 if entrypath:
601 605 entry = self.recode(entrypath.decode(self.encoding))
602 606 if entry in copies:
603 607 # deleted file within a copy
604 608 del copies[entry]
605 609 else:
606 610 entries.append(entry)
607 611 else:
608 612 self.ui.debug('unknown path in revision %d: %s\n' % \
609 613 (revnum, path))
610 614 elif kind == svn.core.svn_node_dir:
611 615 # Should probably synthesize normal file entries
612 616 # and handle as above to clean up copy/rename handling.
613 617
614 618 # If the directory just had a prop change,
615 619 # then we shouldn't need to look for its children.
616 620 if ent.action == 'M':
617 621 continue
618 622
619 623 # Also this could create duplicate entries. Not sure
620 624 # whether this will matter. Maybe should make entries a set.
621 625 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
622 626 # This will fail if a directory was copied
623 627 # from another branch and then some of its files
624 628 # were deleted in the same transaction.
625 629 children = self._find_children(path, revnum)
626 630 children.sort()
627 631 for child in children:
628 632 # Can we move a child directory and its
629 633 # parent in the same commit? (probably can). Could
630 634 # cause problems if instead of revnum -1,
631 635 # we have to look in (copyfrom_path, revnum - 1)
632 636 entrypath = get_entry_from_path("/" + child, module=self.module)
633 637 # print child, self.module, entrypath
634 638 if entrypath:
635 639 # Need to filter out directories here...
636 640 kind = svn.ra.check_path(self.ra, entrypath, revnum)
637 641 if kind != svn.core.svn_node_dir:
638 642 entries.append(self.recode(entrypath))
639 643
640 644 # Copies here (must copy all from source)
641 645 # Probably not a real problem for us if
642 646 # source does not exist
643 647
644 648 # Can do this with the copy command "hg copy"
645 649 # if ent.copyfrom_path:
646 650 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
647 651 # module=self.module)
648 652 # copyto_entry = entrypath
649 653 #
650 654 # print "copy directory", copyfrom_entry, 'to', copyto_entry
651 655 #
652 656 # copies.append((copyfrom_entry, copyto_entry))
653 657
654 658 if ent.copyfrom_path:
655 659 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
656 660 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
657 661 if copyfrom_entry:
658 662 copyfrom[path] = ent
659 663 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
660 664
661 665 # Good, /probably/ a regular copy. Really should check
662 666 # to see whether the parent revision actually contains
663 667 # the directory in question.
664 668 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
665 669 children.sort()
666 670 for child in children:
667 671 entrypath = get_entry_from_path("/" + child, module=self.module)
668 672 if entrypath:
669 673 entry = entrypath.decode(self.encoding)
670 674 # print "COPY COPY From", copyfrom_entry, entry
671 675 copyto_path = path + entry[len(copyfrom_entry):]
672 676 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
673 677 # print "COPY", entry, "COPY To", copyto_entry
674 678 copies[self.recode(copyto_entry)] = self.recode(entry)
675 679 # copy from quux splort/quuxfile
676 680
677 681 return (util.unique(entries), copies)
678 682
679 683 def _fetch_revisions(self, from_revnum, to_revnum):
680 684 if from_revnum < to_revnum:
681 685 from_revnum, to_revnum = to_revnum, from_revnum
682 686
683 687 self.child_cset = None
684 688 def parselogentry(orig_paths, revnum, author, date, message):
685 689 """Return the parsed commit object or None, and True if
686 690 the revision is a branch root.
687 691 """
688 692 self.ui.debug("parsing revision %d (%d changes)\n" %
689 693 (revnum, len(orig_paths)))
690 694
691 695 branched = False
692 696 rev = self.revid(revnum)
693 697 # branch log might return entries for a parent we already have
694 698
695 699 if (rev in self.commits or revnum < to_revnum):
696 700 return None, branched
697 701
698 702 parents = []
699 703 # check whether this revision is the start of a branch or part
700 704 # of a branch renaming
701 705 orig_paths = orig_paths.items()
702 706 orig_paths.sort()
703 707 root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
704 708 if root_paths:
705 709 path, ent = root_paths[-1]
706 710 if ent.copyfrom_path:
707 711 branched = True
708 712 newpath = ent.copyfrom_path + self.module[len(path):]
709 713 # ent.copyfrom_rev may not be the actual last revision
710 714 previd = self.latest(newpath, ent.copyfrom_rev)
711 715 if previd is not None:
712 716 prevmodule, prevnum = self.revsplit(previd)[1:]
713 717 if prevnum >= self.startrev:
714 718 parents = [previd]
715 719 self.ui.note('found parent of branch %s at %d: %s\n' %
716 720 (self.module, prevnum, prevmodule))
717 721 else:
718 722 self.ui.debug("No copyfrom path, don't know what to do.\n")
719 723
720 724 paths = []
721 725 # filter out unrelated paths
722 726 for path, ent in orig_paths:
723 727 if not path.startswith(self.module):
724 728 self.ui.debug("boring@%s: %s\n" % (revnum, path))
725 729 continue
726 730 paths.append((path, ent))
727 731
728 732 # Example SVN datetime. Includes microseconds.
729 733 # ISO-8601 conformant
730 734 # '2007-01-04T17:35:00.902377Z'
731 735 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
732 736
733 737 log = message and self.recode(message) or ''
734 738 author = author and self.recode(author) or ''
735 739 try:
736 740 branch = self.module.split("/")[-1]
737 741 if branch == 'trunk':
738 742 branch = ''
739 743 except IndexError:
740 744 branch = None
741 745
742 746 cset = commit(author=author,
743 747 date=util.datestr(date),
744 748 desc=log,
745 749 parents=parents,
746 750 branch=branch,
747 751 rev=rev.encode('utf-8'))
748 752
749 753 self.commits[rev] = cset
750 754 # The parents list is *shared* among self.paths and the
751 755 # commit object. Both will be updated below.
752 756 self.paths[rev] = (paths, cset.parents)
753 757 if self.child_cset and not self.child_cset.parents:
754 758 self.child_cset.parents[:] = [rev]
755 759 self.child_cset = cset
756 760 return cset, branched
757 761
758 762 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
759 763 (self.module, from_revnum, to_revnum))
760 764
761 765 try:
762 766 firstcset = None
763 767 lastonbranch = False
764 768 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
765 769 try:
766 770 for entry in stream:
767 771 paths, revnum, author, date, message = entry
768 772 if revnum < self.startrev:
769 773 lastonbranch = True
770 774 break
771 775 if self.is_blacklisted(revnum):
772 776 self.ui.note('skipping blacklisted revision %d\n'
773 777 % revnum)
774 778 continue
775 779 if paths is None:
776 780 self.ui.debug('revision %d has no entries\n' % revnum)
777 781 continue
778 782 cset, lastonbranch = parselogentry(paths, revnum, author,
779 783 date, message)
780 784 if cset:
781 785 firstcset = cset
782 786 if lastonbranch:
783 787 break
784 788 finally:
785 789 stream.close()
786 790
787 791 if not lastonbranch and firstcset and not firstcset.parents:
788 792 # The first revision of the sequence (the last fetched one)
789 793 # has invalid parents if not a branch root. Find the parent
790 794 # revision now, if any.
791 795 try:
792 796 firstrevnum = self.revnum(firstcset.rev)
793 797 if firstrevnum > 1:
794 798 latest = self.latest(self.module, firstrevnum - 1)
795 799 if latest:
796 800 firstcset.parents.append(latest)
797 801 except util.Abort:
798 802 pass
799 803 except SubversionException, (inst, num):
800 804 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
801 805 raise util.Abort('svn: branch has no revision %s' % to_revnum)
802 806 raise
803 807
804 808 def _getfile(self, file, rev):
805 809 io = StringIO()
806 810 # TODO: ra.get_file transmits the whole file instead of diffs.
807 811 mode = ''
808 812 try:
809 813 new_module, revnum = self.revsplit(rev)[1:]
810 814 if self.module != new_module:
811 815 self.module = new_module
812 816 self.reparent(self.module)
813 817 info = svn.ra.get_file(self.ra, file, revnum, io)
814 818 if isinstance(info, list):
815 819 info = info[-1]
816 820 mode = ("svn:executable" in info) and 'x' or ''
817 821 mode = ("svn:special" in info) and 'l' or mode
818 822 except SubversionException, e:
819 823 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
820 824 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
821 825 if e.apr_err in notfound: # File not found
822 826 raise IOError()
823 827 raise
824 828 data = io.getvalue()
825 829 if mode == 'l':
826 830 link_prefix = "link "
827 831 if data.startswith(link_prefix):
828 832 data = data[len(link_prefix):]
829 833 return data, mode
830 834
831 835 def _find_children(self, path, revnum):
832 836 path = path.strip('/')
833 837 pool = Pool()
834 838 rpath = '/'.join([self.base, path]).strip('/')
835 839 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
836 840
837 841 pre_revprop_change = '''#!/bin/sh
838 842
839 843 REPOS="$1"
840 844 REV="$2"
841 845 USER="$3"
842 846 PROPNAME="$4"
843 847 ACTION="$5"
844 848
845 849 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
846 850 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
847 851 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
848 852
849 853 echo "Changing prohibited revision property" >&2
850 854 exit 1
851 855 '''
852 856
853 857 class svn_sink(converter_sink, commandline):
854 858 commit_re = re.compile(r'Committed revision (\d+).', re.M)
855 859
856 860 def prerun(self):
857 861 if self.wc:
858 862 os.chdir(self.wc)
859 863
860 864 def postrun(self):
861 865 if self.wc:
862 866 os.chdir(self.cwd)
863 867
864 868 def join(self, name):
865 869 return os.path.join(self.wc, '.svn', name)
866 870
867 871 def revmapfile(self):
868 872 return self.join('hg-shamap')
869 873
870 874 def authorfile(self):
871 875 return self.join('hg-authormap')
872 876
873 877 def __init__(self, ui, path):
874 878 converter_sink.__init__(self, ui, path)
875 879 commandline.__init__(self, ui, 'svn')
876 880 self.delete = []
877 881 self.setexec = []
878 882 self.delexec = []
879 883 self.copies = []
880 884 self.wc = None
881 885 self.cwd = os.getcwd()
882 886
883 887 path = os.path.realpath(path)
884 888
885 889 created = False
886 890 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
887 891 self.wc = path
888 892 self.run0('update')
889 893 else:
890 894 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
891 895
892 896 if os.path.isdir(os.path.dirname(path)):
893 897 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
894 898 ui.status(_('initializing svn repo %r\n') %
895 899 os.path.basename(path))
896 900 commandline(ui, 'svnadmin').run0('create', path)
897 901 created = path
898 902 path = util.normpath(path)
899 903 if not path.startswith('/'):
900 904 path = '/' + path
901 905 path = 'file://' + path
902 906
903 907 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
904 908 self.run0('checkout', path, wcpath)
905 909
906 910 self.wc = wcpath
907 911 self.opener = util.opener(self.wc)
908 912 self.wopener = util.opener(self.wc)
909 913 self.childmap = mapfile(ui, self.join('hg-childmap'))
910 914 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
911 915
912 916 if created:
913 917 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
914 918 fp = open(hook, 'w')
915 919 fp.write(pre_revprop_change)
916 920 fp.close()
917 921 util.set_flags(hook, "x")
918 922
919 923 xport = transport.SvnRaTransport(url=geturl(path))
920 924 self.uuid = svn.ra.get_uuid(xport.ra)
921 925
922 926 def wjoin(self, *names):
923 927 return os.path.join(self.wc, *names)
924 928
925 929 def putfile(self, filename, flags, data):
926 930 if 'l' in flags:
927 931 self.wopener.symlink(data, filename)
928 932 else:
929 933 try:
930 934 if os.path.islink(self.wjoin(filename)):
931 935 os.unlink(filename)
932 936 except OSError:
933 937 pass
934 938 self.wopener(filename, 'w').write(data)
935 939
936 940 if self.is_exec:
937 941 was_exec = self.is_exec(self.wjoin(filename))
938 942 else:
939 943 # On filesystems not supporting execute-bit, there is no way
940 944 # to know if it is set but asking subversion. Setting it
941 945 # systematically is just as expensive and much simpler.
942 946 was_exec = 'x' not in flags
943 947
944 948 util.set_flags(self.wjoin(filename), flags)
945 949 if was_exec:
946 950 if 'x' not in flags:
947 951 self.delexec.append(filename)
948 952 else:
949 953 if 'x' in flags:
950 954 self.setexec.append(filename)
951 955
952 956 def delfile(self, name):
953 957 self.delete.append(name)
954 958
955 959 def copyfile(self, source, dest):
956 960 self.copies.append([source, dest])
957 961
958 962 def _copyfile(self, source, dest):
959 963 # SVN's copy command pukes if the destination file exists, but
960 964 # our copyfile method expects to record a copy that has
961 965 # already occurred. Cross the semantic gap.
962 966 wdest = self.wjoin(dest)
963 967 exists = os.path.exists(wdest)
964 968 if exists:
965 969 fd, tempname = tempfile.mkstemp(
966 970 prefix='hg-copy-', dir=os.path.dirname(wdest))
967 971 os.close(fd)
968 972 os.unlink(tempname)
969 973 os.rename(wdest, tempname)
970 974 try:
971 975 self.run0('copy', source, dest)
972 976 finally:
973 977 if exists:
974 978 try:
975 979 os.unlink(wdest)
976 980 except OSError:
977 981 pass
978 982 os.rename(tempname, wdest)
979 983
980 984 def dirs_of(self, files):
981 985 dirs = util.set()
982 986 for f in files:
983 987 if os.path.isdir(self.wjoin(f)):
984 988 dirs.add(f)
985 989 for i in strutil.rfindall(f, '/'):
986 990 dirs.add(f[:i])
987 991 return dirs
988 992
989 993 def add_dirs(self, files):
990 994 add_dirs = [d for d in self.dirs_of(files)
991 995 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
992 996 if add_dirs:
993 997 add_dirs.sort()
994 998 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
995 999 return add_dirs
996 1000
997 1001 def add_files(self, files):
998 1002 if files:
999 1003 self.xargs(files, 'add', quiet=True)
1000 1004 return files
1001 1005
1002 1006 def tidy_dirs(self, names):
1003 1007 dirs = list(self.dirs_of(names))
1004 1008 dirs.sort()
1005 1009 dirs.reverse()
1006 1010 deleted = []
1007 1011 for d in dirs:
1008 1012 wd = self.wjoin(d)
1009 1013 if os.listdir(wd) == '.svn':
1010 1014 self.run0('delete', d)
1011 1015 deleted.append(d)
1012 1016 return deleted
1013 1017
1014 1018 def addchild(self, parent, child):
1015 1019 self.childmap[parent] = child
1016 1020
1017 1021 def revid(self, rev):
1018 1022 return u"svn:%s@%s" % (self.uuid, rev)
1019 1023
1020 1024 def putcommit(self, files, parents, commit):
1021 1025 for parent in parents:
1022 1026 try:
1023 1027 return self.revid(self.childmap[parent])
1024 1028 except KeyError:
1025 1029 pass
1026 1030 entries = util.set(self.delete)
1027 1031 files = util.frozenset(files)
1028 1032 entries.update(self.add_dirs(files.difference(entries)))
1029 1033 if self.copies:
1030 1034 for s, d in self.copies:
1031 1035 self._copyfile(s, d)
1032 1036 self.copies = []
1033 1037 if self.delete:
1034 1038 self.xargs(self.delete, 'delete')
1035 1039 self.delete = []
1036 1040 entries.update(self.add_files(files.difference(entries)))
1037 1041 entries.update(self.tidy_dirs(entries))
1038 1042 if self.delexec:
1039 1043 self.xargs(self.delexec, 'propdel', 'svn:executable')
1040 1044 self.delexec = []
1041 1045 if self.setexec:
1042 1046 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
1043 1047 self.setexec = []
1044 1048
1045 1049 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
1046 1050 fp = os.fdopen(fd, 'w')
1047 1051 fp.write(commit.desc)
1048 1052 fp.close()
1049 1053 try:
1050 1054 output = self.run0('commit',
1051 1055 username=util.shortuser(commit.author),
1052 1056 file=messagefile,
1053 1057 encoding='utf-8')
1054 1058 try:
1055 1059 rev = self.commit_re.search(output).group(1)
1056 1060 except AttributeError:
1057 1061 self.ui.warn(_('unexpected svn output:\n'))
1058 1062 self.ui.warn(output)
1059 1063 raise util.Abort(_('unable to cope with svn output'))
1060 1064 if commit.rev:
1061 1065 self.run('propset', 'hg:convert-rev', commit.rev,
1062 1066 revprop=True, revision=rev)
1063 1067 if commit.branch and commit.branch != 'default':
1064 1068 self.run('propset', 'hg:convert-branch', commit.branch,
1065 1069 revprop=True, revision=rev)
1066 1070 for parent in parents:
1067 1071 self.addchild(parent, rev)
1068 1072 return self.revid(rev)
1069 1073 finally:
1070 1074 os.unlink(messagefile)
1071 1075
1072 1076 def puttags(self, tags):
1073 1077 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now