##// END OF EJS Templates
convert: fetch less revisions when looking for a branch parent
Patrick Mezard -
r5875:f1504d33 default
parent child Browse files
Show More
@@ -1,958 +1,966
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + util.normpath(path)
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 except IOError:
93 93 # Caller may interrupt the iteration
94 94 pickle.dump(None, fp, protocol)
95 95 else:
96 96 pickle.dump(None, fp, protocol)
97 97 fp.close()
98 98
99 99 def debugsvnlog(ui, **opts):
100 100 """Fetch SVN log in a subprocess and channel them back to parent to
101 101 avoid memory collection issues.
102 102 """
103 103 util.set_binary(sys.stdin)
104 104 util.set_binary(sys.stdout)
105 105 args = decodeargs(sys.stdin.read())
106 106 get_log_child(sys.stdout, *args)
107 107
108 108 class logstream:
109 109 """Interruptible revision log iterator."""
110 110 def __init__(self, stdout):
111 111 self._stdout = stdout
112 112
113 113 def __iter__(self):
114 114 while True:
115 115 entry = pickle.load(self._stdout)
116 116 try:
117 117 orig_paths, revnum, author, date, message = entry
118 118 except:
119 119 if entry is None:
120 120 break
121 121 raise SubversionException("child raised exception", entry)
122 122 yield entry
123 123
124 124 def close(self):
125 125 if self._stdout:
126 126 self._stdout.close()
127 127 self._stdout = None
128 128
129 129 def get_log(url, paths, start, end, limit=0, discover_changed_paths=True,
130 130 strict_node_history=False):
131 131 args = [url, paths, start, end, limit, discover_changed_paths,
132 132 strict_node_history]
133 133 arg = encodeargs(args)
134 134 hgexe = util.hgexecutable()
135 135 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
136 136 stdin, stdout = os.popen2(cmd, 'b')
137 137 stdin.write(arg)
138 138 stdin.close()
139 139 return logstream(stdout)
140 140
141 141 # SVN conversion code stolen from bzr-svn and tailor
142 142 class svn_source(converter_source):
143 143 def __init__(self, ui, url, rev=None):
144 144 super(svn_source, self).__init__(ui, url, rev=rev)
145 145
146 146 try:
147 147 SubversionException
148 148 except NameError:
149 149 raise NoRepo('Subversion python bindings could not be loaded')
150 150
151 151 self.encoding = locale.getpreferredencoding()
152 152 self.lastrevs = {}
153 153
154 154 latest = None
155 155 try:
156 156 # Support file://path@rev syntax. Useful e.g. to convert
157 157 # deleted branches.
158 158 at = url.rfind('@')
159 159 if at >= 0:
160 160 latest = int(url[at+1:])
161 161 url = url[:at]
162 162 except ValueError, e:
163 163 pass
164 164 self.url = geturl(url)
165 165 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
166 166 try:
167 167 self.transport = transport.SvnRaTransport(url=self.url)
168 168 self.ra = self.transport.ra
169 169 self.ctx = self.transport.client
170 170 self.base = svn.ra.get_repos_root(self.ra)
171 171 self.module = self.url[len(self.base):]
172 172 self.commits = {}
173 173 self.paths = {}
174 174 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
175 175 except SubversionException, e:
176 176 ui.print_exc()
177 177 raise NoRepo("%s does not look like a Subversion repo" % self.url)
178 178
179 179 if rev:
180 180 try:
181 181 latest = int(rev)
182 182 except ValueError:
183 183 raise util.Abort('svn: revision %s is not an integer' % rev)
184 184
185 185 try:
186 186 self.get_blacklist()
187 187 except IOError, e:
188 188 pass
189 189
190 190 self.last_changed = self.latest(self.module, latest)
191 191
192 192 self.head = self.revid(self.last_changed)
193 193 self._changescache = None
194 194
195 195 if os.path.exists(os.path.join(url, '.svn/entries')):
196 196 self.wc = url
197 197 else:
198 198 self.wc = None
199 199 self.convertfp = None
200 200
201 201 def setrevmap(self, revmap):
202 202 lastrevs = {}
203 203 for revid in revmap.iterkeys():
204 204 uuid, module, revnum = self.revsplit(revid)
205 205 lastrevnum = lastrevs.setdefault(module, revnum)
206 206 if revnum > lastrevnum:
207 207 lastrevs[module] = revnum
208 208 self.lastrevs = lastrevs
209 209
210 210 def exists(self, path, optrev):
211 211 try:
212 212 svn.client.ls(self.url.rstrip('/') + '/' + path,
213 213 optrev, False, self.ctx)
214 214 return True
215 215 except SubversionException, err:
216 216 return False
217 217
218 218 def getheads(self):
219 219
220 220 def getcfgpath(name, rev):
221 221 cfgpath = self.ui.config('convert', 'svn.' + name)
222 222 path = (cfgpath or name).strip('/')
223 223 if not self.exists(path, rev):
224 224 if cfgpath:
225 225 raise util.Abort(_('expected %s to be at %r, but not found')
226 226 % (name, path))
227 227 return None
228 228 self.ui.note(_('found %s at %r\n') % (name, path))
229 229 return path
230 230
231 231 rev = optrev(self.last_changed)
232 232 oldmodule = ''
233 233 trunk = getcfgpath('trunk', rev)
234 234 tags = getcfgpath('tags', rev)
235 235 branches = getcfgpath('branches', rev)
236 236
237 237 # If the project has a trunk or branches, we will extract heads
238 238 # from them. We keep the project root otherwise.
239 239 if trunk:
240 240 oldmodule = self.module or ''
241 241 self.module += '/' + trunk
242 242 lt = self.latest(self.module, self.last_changed)
243 243 self.head = self.revid(lt)
244 244
245 245 # First head in the list is the module's head
246 246 self.heads = [self.head]
247 247 self.tags = '%s/%s' % (oldmodule , (tags or 'tags'))
248 248
249 249 # Check if branches bring a few more heads to the list
250 250 if branches:
251 251 rpath = self.url.strip('/')
252 252 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
253 253 self.ctx)
254 254 for branch in branchnames.keys():
255 255 module = '%s/%s/%s' % (oldmodule, branches, branch)
256 256 brevnum = self.latest(module, self.last_changed)
257 257 brev = self.revid(brevnum, module)
258 258 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
259 259 self.heads.append(brev)
260 260
261 261 return self.heads
262 262
263 263 def getfile(self, file, rev):
264 264 data, mode = self._getfile(file, rev)
265 265 self.modecache[(file, rev)] = mode
266 266 return data
267 267
268 268 def getmode(self, file, rev):
269 269 return self.modecache[(file, rev)]
270 270
271 271 def getchanges(self, rev):
272 272 if self._changescache and self._changescache[0] == rev:
273 273 return self._changescache[1]
274 274 self._changescache = None
275 275 self.modecache = {}
276 276 (paths, parents) = self.paths[rev]
277 277 files, copies = self.expandpaths(rev, paths, parents)
278 278 files.sort()
279 279 files = zip(files, [rev] * len(files))
280 280
281 281 # caller caches the result, so free it here to release memory
282 282 del self.paths[rev]
283 283 return (files, copies)
284 284
285 285 def getchangedfiles(self, rev, i):
286 286 changes = self.getchanges(rev)
287 287 self._changescache = (rev, changes)
288 288 return [f[0] for f in changes[0]]
289 289
290 290 def getcommit(self, rev):
291 291 if rev not in self.commits:
292 292 uuid, module, revnum = self.revsplit(rev)
293 293 self.module = module
294 294 self.reparent(module)
295 # We assume that:
296 # - requests for revisions after "stop" come from the
297 # revision graph backward traversal. Cache all of them
298 # down to stop, they will be used eventually.
299 # - requests for revisions before "stop" come to get
300 # isolated branches parents. Just fetch what is needed.
295 301 stop = self.lastrevs.get(module, 0)
302 if revnum < stop:
303 stop = revnum + 1
296 304 self._fetch_revisions(revnum, stop)
297 305 commit = self.commits[rev]
298 306 # caller caches the result, so free it here to release memory
299 307 del self.commits[rev]
300 308 return commit
301 309
302 310 def gettags(self):
303 311 tags = {}
304 312 start = self.revnum(self.head)
305 313 try:
306 314 for entry in get_log(self.url, [self.tags], 0, start):
307 315 orig_paths, revnum, author, date, message = entry
308 316 for path in orig_paths:
309 317 if not path.startswith(self.tags+'/'):
310 318 continue
311 319 ent = orig_paths[path]
312 320 source = ent.copyfrom_path
313 321 rev = ent.copyfrom_rev
314 322 tag = path.split('/')[-1]
315 323 tags[tag] = self.revid(rev, module=source)
316 324 except SubversionException, (inst, num):
317 325 self.ui.note('no tags found at revision %d\n' % start)
318 326 return tags
319 327
320 328 def converted(self, rev, destrev):
321 329 if not self.wc:
322 330 return
323 331 if self.convertfp is None:
324 332 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
325 333 'a')
326 334 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
327 335 self.convertfp.flush()
328 336
329 337 # -- helper functions --
330 338
331 339 def revid(self, revnum, module=None):
332 340 if not module:
333 341 module = self.module
334 342 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
335 343 revnum)
336 344
337 345 def revnum(self, rev):
338 346 return int(rev.split('@')[-1])
339 347
340 348 def revsplit(self, rev):
341 349 url, revnum = rev.encode(self.encoding).split('@', 1)
342 350 revnum = int(revnum)
343 351 parts = url.split('/', 1)
344 352 uuid = parts.pop(0)[4:]
345 353 mod = ''
346 354 if parts:
347 355 mod = '/' + parts[0]
348 356 return uuid, mod, revnum
349 357
350 358 def latest(self, path, stop=0):
351 359 'find the latest revision affecting path, up to stop'
352 360 if not stop:
353 361 stop = svn.ra.get_latest_revnum(self.ra)
354 362 try:
355 363 self.reparent('')
356 364 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
357 365 self.reparent(self.module)
358 366 except SubversionException:
359 367 dirent = None
360 368 if not dirent:
361 369 raise util.Abort('%s not found up to revision %d' % (path, stop))
362 370
363 371 return dirent.created_rev
364 372
365 373 def get_blacklist(self):
366 374 """Avoid certain revision numbers.
367 375 It is not uncommon for two nearby revisions to cancel each other
368 376 out, e.g. 'I copied trunk into a subdirectory of itself instead
369 377 of making a branch'. The converted repository is significantly
370 378 smaller if we ignore such revisions."""
371 379 self.blacklist = util.set()
372 380 blacklist = self.blacklist
373 381 for line in file("blacklist.txt", "r"):
374 382 if not line.startswith("#"):
375 383 try:
376 384 svn_rev = int(line.strip())
377 385 blacklist.add(svn_rev)
378 386 except ValueError, e:
379 387 pass # not an integer or a comment
380 388
381 389 def is_blacklisted(self, svn_rev):
382 390 return svn_rev in self.blacklist
383 391
384 392 def reparent(self, module):
385 393 svn_url = self.base + module
386 394 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
387 395 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
388 396
389 397 def expandpaths(self, rev, paths, parents):
390 398 def get_entry_from_path(path, module=self.module):
391 399 # Given the repository url of this wc, say
392 400 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
393 401 # extract the "entry" portion (a relative path) from what
394 402 # svn log --xml says, ie
395 403 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
396 404 # that is to say "tests/PloneTestCase.py"
397 405 if path.startswith(module):
398 406 relative = path[len(module):]
399 407 if relative.startswith('/'):
400 408 return relative[1:]
401 409 else:
402 410 return relative
403 411
404 412 # The path is outside our tracked tree...
405 413 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
406 414 return None
407 415
408 416 entries = []
409 417 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
410 418 copies = {}
411 419
412 420 new_module, revnum = self.revsplit(rev)[1:]
413 421 if new_module != self.module:
414 422 self.module = new_module
415 423 self.reparent(self.module)
416 424
417 425 for path, ent in paths:
418 426 entrypath = get_entry_from_path(path, module=self.module)
419 427 entry = entrypath.decode(self.encoding)
420 428
421 429 kind = svn.ra.check_path(self.ra, entrypath, revnum)
422 430 if kind == svn.core.svn_node_file:
423 431 if ent.copyfrom_path:
424 432 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
425 433 if copyfrom_path:
426 434 self.ui.debug("Copied to %s from %s@%s\n" %
427 435 (entrypath, copyfrom_path,
428 436 ent.copyfrom_rev))
429 437 # It's probably important for hg that the source
430 438 # exists in the revision's parent, not just the
431 439 # ent.copyfrom_rev
432 440 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
433 441 if fromkind != 0:
434 442 copies[self.recode(entry)] = self.recode(copyfrom_path)
435 443 entries.append(self.recode(entry))
436 444 elif kind == 0: # gone, but had better be a deleted *file*
437 445 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
438 446
439 447 # if a branch is created but entries are removed in the same
440 448 # changeset, get the right fromrev
441 449 # parents cannot be empty here, you cannot remove things from
442 450 # a root revision.
443 451 uuid, old_module, fromrev = self.revsplit(parents[0])
444 452
445 453 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
446 454 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
447 455
448 456 def lookup_parts(p):
449 457 rc = None
450 458 parts = p.split("/")
451 459 for i in range(len(parts)):
452 460 part = "/".join(parts[:i])
453 461 info = part, copyfrom.get(part, None)
454 462 if info[1] is not None:
455 463 self.ui.debug("Found parent directory %s\n" % info[1])
456 464 rc = info
457 465 return rc
458 466
459 467 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
460 468
461 469 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
462 470
463 471 # need to remove fragment from lookup_parts and replace with copyfrom_path
464 472 if frompath is not None:
465 473 self.ui.debug("munge-o-matic\n")
466 474 self.ui.debug(entrypath + '\n')
467 475 self.ui.debug(entrypath[len(frompath):] + '\n')
468 476 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
469 477 fromrev = froment.copyfrom_rev
470 478 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
471 479
472 480 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
473 481 if fromkind == svn.core.svn_node_file: # a deleted file
474 482 entries.append(self.recode(entry))
475 483 elif fromkind == svn.core.svn_node_dir:
476 484 # print "Deleted/moved non-file:", revnum, path, ent
477 485 # children = self._find_children(path, revnum - 1)
478 486 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
479 487 # Sometimes this is tricky. For example: in
480 488 # The Subversion Repository revision 6940 a dir
481 489 # was copied and one of its files was deleted
482 490 # from the new location in the same commit. This
483 491 # code can't deal with that yet.
484 492 if ent.action == 'C':
485 493 children = self._find_children(path, fromrev)
486 494 else:
487 495 oroot = entrypath.strip('/')
488 496 nroot = path.strip('/')
489 497 children = self._find_children(oroot, fromrev)
490 498 children = [s.replace(oroot,nroot) for s in children]
491 499 # Mark all [files, not directories] as deleted.
492 500 for child in children:
493 501 # Can we move a child directory and its
494 502 # parent in the same commit? (probably can). Could
495 503 # cause problems if instead of revnum -1,
496 504 # we have to look in (copyfrom_path, revnum - 1)
497 505 entrypath = get_entry_from_path("/" + child, module=old_module)
498 506 if entrypath:
499 507 entry = self.recode(entrypath.decode(self.encoding))
500 508 if entry in copies:
501 509 # deleted file within a copy
502 510 del copies[entry]
503 511 else:
504 512 entries.append(entry)
505 513 else:
506 514 self.ui.debug('unknown path in revision %d: %s\n' % \
507 515 (revnum, path))
508 516 elif kind == svn.core.svn_node_dir:
509 517 # Should probably synthesize normal file entries
510 518 # and handle as above to clean up copy/rename handling.
511 519
512 520 # If the directory just had a prop change,
513 521 # then we shouldn't need to look for its children.
514 522 if ent.action == 'M':
515 523 continue
516 524
517 525 # Also this could create duplicate entries. Not sure
518 526 # whether this will matter. Maybe should make entries a set.
519 527 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
520 528 # This will fail if a directory was copied
521 529 # from another branch and then some of its files
522 530 # were deleted in the same transaction.
523 531 children = self._find_children(path, revnum)
524 532 children.sort()
525 533 for child in children:
526 534 # Can we move a child directory and its
527 535 # parent in the same commit? (probably can). Could
528 536 # cause problems if instead of revnum -1,
529 537 # we have to look in (copyfrom_path, revnum - 1)
530 538 entrypath = get_entry_from_path("/" + child, module=self.module)
531 539 # print child, self.module, entrypath
532 540 if entrypath:
533 541 # Need to filter out directories here...
534 542 kind = svn.ra.check_path(self.ra, entrypath, revnum)
535 543 if kind != svn.core.svn_node_dir:
536 544 entries.append(self.recode(entrypath))
537 545
538 546 # Copies here (must copy all from source)
539 547 # Probably not a real problem for us if
540 548 # source does not exist
541 549
542 550 # Can do this with the copy command "hg copy"
543 551 # if ent.copyfrom_path:
544 552 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
545 553 # module=self.module)
546 554 # copyto_entry = entrypath
547 555 #
548 556 # print "copy directory", copyfrom_entry, 'to', copyto_entry
549 557 #
550 558 # copies.append((copyfrom_entry, copyto_entry))
551 559
552 560 if ent.copyfrom_path:
553 561 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
554 562 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
555 563 if copyfrom_entry:
556 564 copyfrom[path] = ent
557 565 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
558 566
559 567 # Good, /probably/ a regular copy. Really should check
560 568 # to see whether the parent revision actually contains
561 569 # the directory in question.
562 570 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
563 571 children.sort()
564 572 for child in children:
565 573 entrypath = get_entry_from_path("/" + child, module=self.module)
566 574 if entrypath:
567 575 entry = entrypath.decode(self.encoding)
568 576 # print "COPY COPY From", copyfrom_entry, entry
569 577 copyto_path = path + entry[len(copyfrom_entry):]
570 578 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
571 579 # print "COPY", entry, "COPY To", copyto_entry
572 580 copies[self.recode(copyto_entry)] = self.recode(entry)
573 581 # copy from quux splort/quuxfile
574 582
575 583 return (entries, copies)
576 584
577 585 def _fetch_revisions(self, from_revnum, to_revnum):
578 586 if from_revnum < to_revnum:
579 587 from_revnum, to_revnum = to_revnum, from_revnum
580 588
581 589 self.child_cset = None
582 590 def parselogentry(orig_paths, revnum, author, date, message):
583 591 """Return the parsed commit object or None, and True if
584 592 the revision is a branch root.
585 593 """
586 594 self.ui.debug("parsing revision %d (%d changes)\n" %
587 595 (revnum, len(orig_paths)))
588 596
589 597 rev = self.revid(revnum)
590 598 # branch log might return entries for a parent we already have
591 599
592 600 if (rev in self.commits or revnum < to_revnum):
593 601 return None, False
594 602
595 603 parents = []
596 604 # check whether this revision is the start of a branch
597 605 if self.module in orig_paths:
598 606 ent = orig_paths[self.module]
599 607 if ent.copyfrom_path:
600 608 # ent.copyfrom_rev may not be the actual last revision
601 609 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
602 610 parents = [self.revid(prev, ent.copyfrom_path)]
603 611 self.ui.note('found parent of branch %s at %d: %s\n' % \
604 612 (self.module, prev, ent.copyfrom_path))
605 613 else:
606 614 self.ui.debug("No copyfrom path, don't know what to do.\n")
607 615
608 616 orig_paths = orig_paths.items()
609 617 orig_paths.sort()
610 618 paths = []
611 619 # filter out unrelated paths
612 620 for path, ent in orig_paths:
613 621 if not path.startswith(self.module):
614 622 self.ui.debug("boring@%s: %s\n" % (revnum, path))
615 623 continue
616 624 paths.append((path, ent))
617 625
618 626 # Example SVN datetime. Includes microseconds.
619 627 # ISO-8601 conformant
620 628 # '2007-01-04T17:35:00.902377Z'
621 629 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
622 630
623 631 log = message and self.recode(message)
624 632 author = author and self.recode(author) or ''
625 633 try:
626 634 branch = self.module.split("/")[-1]
627 635 if branch == 'trunk':
628 636 branch = ''
629 637 except IndexError:
630 638 branch = None
631 639
632 640 cset = commit(author=author,
633 641 date=util.datestr(date),
634 642 desc=log,
635 643 parents=parents,
636 644 branch=branch,
637 645 rev=rev.encode('utf-8'))
638 646
639 647 self.commits[rev] = cset
640 648 # The parents list is *shared* among self.paths and the
641 649 # commit object. Both will be updated below.
642 650 self.paths[rev] = (paths, cset.parents)
643 651 if self.child_cset and not self.child_cset.parents:
644 652 self.child_cset.parents[:] = [rev]
645 653 self.child_cset = cset
646 654 return cset, len(parents) > 0
647 655
648 656 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
649 657 (self.module, from_revnum, to_revnum))
650 658
651 659 try:
652 660 firstcset = None
653 661 stream = get_log(self.url, [self.module], from_revnum, to_revnum)
654 662 try:
655 663 for entry in stream:
656 664 paths, revnum, author, date, message = entry
657 665 if self.is_blacklisted(revnum):
658 666 self.ui.note('skipping blacklisted revision %d\n'
659 667 % revnum)
660 668 continue
661 669 if paths is None:
662 670 self.ui.debug('revision %d has no entries\n' % revnum)
663 671 continue
664 672 cset, branched = parselogentry(paths, revnum, author,
665 673 date, message)
666 674 if cset:
667 675 firstcset = cset
668 676 if branched:
669 677 break
670 678 finally:
671 679 stream.close()
672 680
673 681 if firstcset and not firstcset.parents:
674 682 # The first revision of the sequence (the last fetched one)
675 683 # has invalid parents if not a branch root. Find the parent
676 684 # revision now, if any.
677 685 try:
678 686 firstrevnum = self.revnum(firstcset.rev)
679 687 if firstrevnum > 1:
680 688 latest = self.latest(self.module, firstrevnum - 1)
681 689 firstcset.parents.append(self.revid(latest))
682 690 except util.Abort:
683 691 pass
684 692 except SubversionException, (inst, num):
685 693 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
686 694 raise NoSuchRevision(branch=self,
687 695 revision="Revision number %d" % to_revnum)
688 696 raise
689 697
690 698 def _getfile(self, file, rev):
691 699 io = StringIO()
692 700 # TODO: ra.get_file transmits the whole file instead of diffs.
693 701 mode = ''
694 702 try:
695 703 new_module, revnum = self.revsplit(rev)[1:]
696 704 if self.module != new_module:
697 705 self.module = new_module
698 706 self.reparent(self.module)
699 707 info = svn.ra.get_file(self.ra, file, revnum, io)
700 708 if isinstance(info, list):
701 709 info = info[-1]
702 710 mode = ("svn:executable" in info) and 'x' or ''
703 711 mode = ("svn:special" in info) and 'l' or mode
704 712 except SubversionException, e:
705 713 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
706 714 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
707 715 if e.apr_err in notfound: # File not found
708 716 raise IOError()
709 717 raise
710 718 data = io.getvalue()
711 719 if mode == 'l':
712 720 link_prefix = "link "
713 721 if data.startswith(link_prefix):
714 722 data = data[len(link_prefix):]
715 723 return data, mode
716 724
717 725 def _find_children(self, path, revnum):
718 726 path = path.strip('/')
719 727 pool = Pool()
720 728 rpath = '/'.join([self.base, path]).strip('/')
721 729 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
722 730
723 731 pre_revprop_change = '''#!/bin/sh
724 732
725 733 REPOS="$1"
726 734 REV="$2"
727 735 USER="$3"
728 736 PROPNAME="$4"
729 737 ACTION="$5"
730 738
731 739 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
732 740 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
733 741 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
734 742
735 743 echo "Changing prohibited revision property" >&2
736 744 exit 1
737 745 '''
738 746
739 747 class svn_sink(converter_sink, commandline):
740 748 commit_re = re.compile(r'Committed revision (\d+).', re.M)
741 749
742 750 def prerun(self):
743 751 if self.wc:
744 752 os.chdir(self.wc)
745 753
746 754 def postrun(self):
747 755 if self.wc:
748 756 os.chdir(self.cwd)
749 757
750 758 def join(self, name):
751 759 return os.path.join(self.wc, '.svn', name)
752 760
753 761 def revmapfile(self):
754 762 return self.join('hg-shamap')
755 763
756 764 def authorfile(self):
757 765 return self.join('hg-authormap')
758 766
759 767 def __init__(self, ui, path):
760 768 converter_sink.__init__(self, ui, path)
761 769 commandline.__init__(self, ui, 'svn')
762 770 self.delete = []
763 771 self.setexec = []
764 772 self.delexec = []
765 773 self.copies = []
766 774 self.wc = None
767 775 self.cwd = os.getcwd()
768 776
769 777 path = os.path.realpath(path)
770 778
771 779 created = False
772 780 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
773 781 self.wc = path
774 782 self.run0('update')
775 783 else:
776 784 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
777 785
778 786 if os.path.isdir(os.path.dirname(path)):
779 787 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
780 788 ui.status(_('initializing svn repo %r\n') %
781 789 os.path.basename(path))
782 790 commandline(ui, 'svnadmin').run0('create', path)
783 791 created = path
784 792 path = util.normpath(path)
785 793 if not path.startswith('/'):
786 794 path = '/' + path
787 795 path = 'file://' + path
788 796
789 797 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
790 798 self.run0('checkout', path, wcpath)
791 799
792 800 self.wc = wcpath
793 801 self.opener = util.opener(self.wc)
794 802 self.wopener = util.opener(self.wc)
795 803 self.childmap = mapfile(ui, self.join('hg-childmap'))
796 804 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
797 805
798 806 if created:
799 807 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
800 808 fp = open(hook, 'w')
801 809 fp.write(pre_revprop_change)
802 810 fp.close()
803 811 util.set_flags(hook, "x")
804 812
805 813 xport = transport.SvnRaTransport(url=geturl(path))
806 814 self.uuid = svn.ra.get_uuid(xport.ra)
807 815
808 816 def wjoin(self, *names):
809 817 return os.path.join(self.wc, *names)
810 818
811 819 def putfile(self, filename, flags, data):
812 820 if 'l' in flags:
813 821 self.wopener.symlink(data, filename)
814 822 else:
815 823 try:
816 824 if os.path.islink(self.wjoin(filename)):
817 825 os.unlink(filename)
818 826 except OSError:
819 827 pass
820 828 self.wopener(filename, 'w').write(data)
821 829
822 830 if self.is_exec:
823 831 was_exec = self.is_exec(self.wjoin(filename))
824 832 else:
825 833 # On filesystems not supporting execute-bit, there is no way
826 834 # to know if it is set but asking subversion. Setting it
827 835 # systematically is just as expensive and much simpler.
828 836 was_exec = 'x' not in flags
829 837
830 838 util.set_flags(self.wjoin(filename), flags)
831 839 if was_exec:
832 840 if 'x' not in flags:
833 841 self.delexec.append(filename)
834 842 else:
835 843 if 'x' in flags:
836 844 self.setexec.append(filename)
837 845
838 846 def delfile(self, name):
839 847 self.delete.append(name)
840 848
841 849 def copyfile(self, source, dest):
842 850 self.copies.append([source, dest])
843 851
844 852 def _copyfile(self, source, dest):
845 853 # SVN's copy command pukes if the destination file exists, but
846 854 # our copyfile method expects to record a copy that has
847 855 # already occurred. Cross the semantic gap.
848 856 wdest = self.wjoin(dest)
849 857 exists = os.path.exists(wdest)
850 858 if exists:
851 859 fd, tempname = tempfile.mkstemp(
852 860 prefix='hg-copy-', dir=os.path.dirname(wdest))
853 861 os.close(fd)
854 862 os.unlink(tempname)
855 863 os.rename(wdest, tempname)
856 864 try:
857 865 self.run0('copy', source, dest)
858 866 finally:
859 867 if exists:
860 868 try:
861 869 os.unlink(wdest)
862 870 except OSError:
863 871 pass
864 872 os.rename(tempname, wdest)
865 873
866 874 def dirs_of(self, files):
867 875 dirs = set()
868 876 for f in files:
869 877 if os.path.isdir(self.wjoin(f)):
870 878 dirs.add(f)
871 879 for i in strutil.rfindall(f, '/'):
872 880 dirs.add(f[:i])
873 881 return dirs
874 882
875 883 def add_dirs(self, files):
876 884 add_dirs = [d for d in self.dirs_of(files)
877 885 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
878 886 if add_dirs:
879 887 add_dirs.sort()
880 888 self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
881 889 return add_dirs
882 890
883 891 def add_files(self, files):
884 892 if files:
885 893 self.xargs(files, 'add', quiet=True)
886 894 return files
887 895
888 896 def tidy_dirs(self, names):
889 897 dirs = list(self.dirs_of(names))
890 898 dirs.sort(reverse=True)
891 899 deleted = []
892 900 for d in dirs:
893 901 wd = self.wjoin(d)
894 902 if os.listdir(wd) == '.svn':
895 903 self.run0('delete', d)
896 904 deleted.append(d)
897 905 return deleted
898 906
899 907 def addchild(self, parent, child):
900 908 self.childmap[parent] = child
901 909
902 910 def revid(self, rev):
903 911 return u"svn:%s@%s" % (self.uuid, rev)
904 912
905 913 def putcommit(self, files, parents, commit):
906 914 for parent in parents:
907 915 try:
908 916 return self.revid(self.childmap[parent])
909 917 except KeyError:
910 918 pass
911 919 entries = set(self.delete)
912 920 files = util.frozenset(files)
913 921 entries.update(self.add_dirs(files.difference(entries)))
914 922 if self.copies:
915 923 for s, d in self.copies:
916 924 self._copyfile(s, d)
917 925 self.copies = []
918 926 if self.delete:
919 927 self.xargs(self.delete, 'delete')
920 928 self.delete = []
921 929 entries.update(self.add_files(files.difference(entries)))
922 930 entries.update(self.tidy_dirs(entries))
923 931 if self.delexec:
924 932 self.xargs(self.delexec, 'propdel', 'svn:executable')
925 933 self.delexec = []
926 934 if self.setexec:
927 935 self.xargs(self.setexec, 'propset', 'svn:executable', '*')
928 936 self.setexec = []
929 937
930 938 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
931 939 fp = os.fdopen(fd, 'w')
932 940 fp.write(commit.desc)
933 941 fp.close()
934 942 try:
935 943 output = self.run0('commit',
936 944 username=util.shortuser(commit.author),
937 945 file=messagefile,
938 946 encoding='utf-8')
939 947 try:
940 948 rev = self.commit_re.search(output).group(1)
941 949 except AttributeError:
942 950 self.ui.warn(_('unexpected svn output:\n'))
943 951 self.ui.warn(output)
944 952 raise util.Abort(_('unable to cope with svn output'))
945 953 if commit.rev:
946 954 self.run('propset', 'hg:convert-rev', commit.rev,
947 955 revprop=True, revision=rev)
948 956 if commit.branch and commit.branch != 'default':
949 957 self.run('propset', 'hg:convert-branch', commit.branch,
950 958 revprop=True, revision=rev)
951 959 for parent in parents:
952 960 self.addchild(parent, rev)
953 961 return self.revid(rev)
954 962 finally:
955 963 os.unlink(messagefile)
956 964
957 965 def puttags(self, tags):
958 966 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now