##// END OF EJS Templates
convert: Fix bug of limit_arglist() loosing file on limit boundary.
Shun-ichi GOTO -
r5806:a3a380af default
parent child Browse files
Show More
@@ -1,952 +1,952
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 5 # Configuration options:
6 6 #
7 7 # convert.svn.trunk
8 8 # Relative path to the trunk (default: "trunk")
9 9 # convert.svn.branches
10 10 # Relative path to tree of branches (default: "branches")
11 11 # convert.svn.tags
12 12 # Relative path to tree of tags (default: "tags")
13 13 #
14 14 # Set these in a hgrc, or on the command line as follows:
15 15 #
16 16 # hg convert --config convert.svn.trunk=wackoname [...]
17 17
18 18 import locale
19 19 import os
20 20 import re
21 21 import sys
22 22 import cPickle as pickle
23 23 import tempfile
24 24
25 25 from mercurial import strutil, util
26 26 from mercurial.i18n import _
27 27
28 28 # Subversion stuff. Works best with very recent Python SVN bindings
29 29 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
30 30 # these bindings.
31 31
32 32 from cStringIO import StringIO
33 33
34 34 from common import NoRepo, commit, converter_source, encodeargs, decodeargs
35 35 from common import commandline, converter_sink, mapfile
36 36
37 37 try:
38 38 from svn.core import SubversionException, Pool
39 39 import svn
40 40 import svn.client
41 41 import svn.core
42 42 import svn.ra
43 43 import svn.delta
44 44 import transport
45 45 except ImportError:
46 46 pass
47 47
48 48 def geturl(path):
49 49 try:
50 50 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
51 51 except SubversionException:
52 52 pass
53 53 if os.path.isdir(path):
54 54 path = os.path.normpath(os.path.abspath(path))
55 55 if os.name == 'nt':
56 56 path = '/' + path.replace('\\', '/')
57 57 return 'file://%s' % path
58 58 return path
59 59
60 60 def optrev(number):
61 61 optrev = svn.core.svn_opt_revision_t()
62 62 optrev.kind = svn.core.svn_opt_revision_number
63 63 optrev.value.number = number
64 64 return optrev
65 65
66 66 class changedpath(object):
67 67 def __init__(self, p):
68 68 self.copyfrom_path = p.copyfrom_path
69 69 self.copyfrom_rev = p.copyfrom_rev
70 70 self.action = p.action
71 71
72 72 def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
73 73 strict_node_history=False):
74 74 protocol = -1
75 75 def receiver(orig_paths, revnum, author, date, message, pool):
76 76 if orig_paths is not None:
77 77 for k, v in orig_paths.iteritems():
78 78 orig_paths[k] = changedpath(v)
79 79 pickle.dump((orig_paths, revnum, author, date, message),
80 80 fp, protocol)
81 81
82 82 try:
83 83 # Use an ra of our own so that our parent can consume
84 84 # our results without confusing the server.
85 85 t = transport.SvnRaTransport(url=url)
86 86 svn.ra.get_log(t.ra, paths, start, end, limit,
87 87 discover_changed_paths,
88 88 strict_node_history,
89 89 receiver)
90 90 except SubversionException, (inst, num):
91 91 pickle.dump(num, fp, protocol)
92 92 else:
93 93 pickle.dump(None, fp, protocol)
94 94 fp.close()
95 95
96 96 def debugsvnlog(ui, **opts):
97 97 """Fetch SVN log in a subprocess and channel them back to parent to
98 98 avoid memory collection issues.
99 99 """
100 100 util.set_binary(sys.stdin)
101 101 util.set_binary(sys.stdout)
102 102 args = decodeargs(sys.stdin.read())
103 103 get_log_child(sys.stdout, *args)
104 104
105 105 # SVN conversion code stolen from bzr-svn and tailor
106 106 class svn_source(converter_source):
107 107 def __init__(self, ui, url, rev=None):
108 108 super(svn_source, self).__init__(ui, url, rev=rev)
109 109
110 110 try:
111 111 SubversionException
112 112 except NameError:
113 113 raise NoRepo('Subversion python bindings could not be loaded')
114 114
115 115 self.encoding = locale.getpreferredencoding()
116 116 self.lastrevs = {}
117 117
118 118 latest = None
119 119 try:
120 120 # Support file://path@rev syntax. Useful e.g. to convert
121 121 # deleted branches.
122 122 at = url.rfind('@')
123 123 if at >= 0:
124 124 latest = int(url[at+1:])
125 125 url = url[:at]
126 126 except ValueError, e:
127 127 pass
128 128 self.url = geturl(url)
129 129 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
130 130 try:
131 131 self.transport = transport.SvnRaTransport(url=self.url)
132 132 self.ra = self.transport.ra
133 133 self.ctx = self.transport.client
134 134 self.base = svn.ra.get_repos_root(self.ra)
135 135 self.module = self.url[len(self.base):]
136 136 self.modulemap = {} # revision, module
137 137 self.commits = {}
138 138 self.paths = {}
139 139 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
140 140 except SubversionException, e:
141 141 ui.print_exc()
142 142 raise NoRepo("%s does not look like a Subversion repo" % self.url)
143 143
144 144 if rev:
145 145 try:
146 146 latest = int(rev)
147 147 except ValueError:
148 148 raise util.Abort('svn: revision %s is not an integer' % rev)
149 149
150 150 try:
151 151 self.get_blacklist()
152 152 except IOError, e:
153 153 pass
154 154
155 155 self.last_changed = self.latest(self.module, latest)
156 156
157 157 self.head = self.revid(self.last_changed)
158 158 self._changescache = None
159 159
160 160 if os.path.exists(os.path.join(url, '.svn/entries')):
161 161 self.wc = url
162 162 else:
163 163 self.wc = None
164 164 self.convertfp = None
165 165
166 166 def setrevmap(self, revmap):
167 167 lastrevs = {}
168 168 for revid in revmap.iterkeys():
169 169 uuid, module, revnum = self.revsplit(revid)
170 170 lastrevnum = lastrevs.setdefault(module, revnum)
171 171 if revnum > lastrevnum:
172 172 lastrevs[module] = revnum
173 173 self.lastrevs = lastrevs
174 174
175 175 def exists(self, path, optrev):
176 176 try:
177 177 svn.client.ls(self.url.rstrip('/') + '/' + path,
178 178 optrev, False, self.ctx)
179 179 return True
180 180 except SubversionException, err:
181 181 return False
182 182
183 183 def getheads(self):
184 184 # detect standard /branches, /tags, /trunk layout
185 185 rev = optrev(self.last_changed)
186 186 rpath = self.url.strip('/')
187 187 cfgtrunk = self.ui.config('convert', 'svn.trunk')
188 188 cfgbranches = self.ui.config('convert', 'svn.branches')
189 189 cfgtags = self.ui.config('convert', 'svn.tags')
190 190 trunk = (cfgtrunk or 'trunk').strip('/')
191 191 branches = (cfgbranches or 'branches').strip('/')
192 192 tags = (cfgtags or 'tags').strip('/')
193 193 if self.exists(trunk, rev) and self.exists(branches, rev) and self.exists(tags, rev):
194 194 self.ui.note('found trunk at %r, branches at %r and tags at %r\n' %
195 195 (trunk, branches, tags))
196 196 oldmodule = self.module
197 197 self.module += '/' + trunk
198 198 lt = self.latest(self.module, self.last_changed)
199 199 self.head = self.revid(lt)
200 200 self.heads = [self.head]
201 201 branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
202 202 self.ctx)
203 203 for branch in branchnames.keys():
204 204 if oldmodule:
205 205 module = oldmodule + '/' + branches + '/' + branch
206 206 else:
207 207 module = '/' + branches + '/' + branch
208 208 brevnum = self.latest(module, self.last_changed)
209 209 brev = self.revid(brevnum, module)
210 210 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
211 211 self.heads.append(brev)
212 212
213 213 if oldmodule:
214 214 self.tags = '%s/%s' % (oldmodule, tags)
215 215 else:
216 216 self.tags = '/%s' % tags
217 217
218 218 elif cfgtrunk or cfgbranches or cfgtags:
219 219 raise util.Abort('trunk/branch/tags layout expected, but not found')
220 220 else:
221 221 self.ui.note('working with one branch\n')
222 222 self.heads = [self.head]
223 223 self.tags = tags
224 224 return self.heads
225 225
226 226 def getfile(self, file, rev):
227 227 data, mode = self._getfile(file, rev)
228 228 self.modecache[(file, rev)] = mode
229 229 return data
230 230
231 231 def getmode(self, file, rev):
232 232 return self.modecache[(file, rev)]
233 233
234 234 def getchanges(self, rev):
235 235 if self._changescache and self._changescache[0] == rev:
236 236 return self._changescache[1]
237 237 self._changescache = None
238 238 self.modecache = {}
239 239 (paths, parents) = self.paths[rev]
240 240 files, copies = self.expandpaths(rev, paths, parents)
241 241 files.sort()
242 242 files = zip(files, [rev] * len(files))
243 243
244 244 # caller caches the result, so free it here to release memory
245 245 del self.paths[rev]
246 246 return (files, copies)
247 247
248 248 def getchangedfiles(self, rev, i):
249 249 changes = self.getchanges(rev)
250 250 self._changescache = (rev, changes)
251 251 return [f[0] for f in changes[0]]
252 252
253 253 def getcommit(self, rev):
254 254 if rev not in self.commits:
255 255 uuid, module, revnum = self.revsplit(rev)
256 256 self.module = module
257 257 self.reparent(module)
258 258 stop = self.lastrevs.get(module, 0)
259 259 self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
260 260 commit = self.commits[rev]
261 261 # caller caches the result, so free it here to release memory
262 262 del self.commits[rev]
263 263 return commit
264 264
265 265 def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
266 266 strict_node_history=False):
267 267
268 268 def parent(fp):
269 269 while True:
270 270 entry = pickle.load(fp)
271 271 try:
272 272 orig_paths, revnum, author, date, message = entry
273 273 except:
274 274 if entry is None:
275 275 break
276 276 raise SubversionException("child raised exception", entry)
277 277 yield entry
278 278
279 279 args = [self.url, paths, start, end, limit, discover_changed_paths,
280 280 strict_node_history]
281 281 arg = encodeargs(args)
282 282 hgexe = util.hgexecutable()
283 283 cmd = '%s debugsvnlog' % util.shellquote(hgexe)
284 284 stdin, stdout = os.popen2(cmd, 'b')
285 285
286 286 stdin.write(arg)
287 287 stdin.close()
288 288
289 289 for p in parent(stdout):
290 290 yield p
291 291
292 292 def gettags(self):
293 293 tags = {}
294 294 start = self.revnum(self.head)
295 295 try:
296 296 for entry in self.get_log([self.tags], 0, start):
297 297 orig_paths, revnum, author, date, message = entry
298 298 for path in orig_paths:
299 299 if not path.startswith(self.tags+'/'):
300 300 continue
301 301 ent = orig_paths[path]
302 302 source = ent.copyfrom_path
303 303 rev = ent.copyfrom_rev
304 304 tag = path.split('/')[-1]
305 305 tags[tag] = self.revid(rev, module=source)
306 306 except SubversionException, (inst, num):
307 307 self.ui.note('no tags found at revision %d\n' % start)
308 308 return tags
309 309
310 310 def converted(self, rev, destrev):
311 311 if not self.wc:
312 312 return
313 313 if self.convertfp is None:
314 314 self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
315 315 'a')
316 316 self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
317 317 self.convertfp.flush()
318 318
319 319 # -- helper functions --
320 320
321 321 def revid(self, revnum, module=None):
322 322 if not module:
323 323 module = self.module
324 324 return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
325 325 revnum)
326 326
327 327 def revnum(self, rev):
328 328 return int(rev.split('@')[-1])
329 329
330 330 def revsplit(self, rev):
331 331 url, revnum = rev.encode(self.encoding).split('@', 1)
332 332 revnum = int(revnum)
333 333 parts = url.split('/', 1)
334 334 uuid = parts.pop(0)[4:]
335 335 mod = ''
336 336 if parts:
337 337 mod = '/' + parts[0]
338 338 return uuid, mod, revnum
339 339
340 340 def latest(self, path, stop=0):
341 341 'find the latest revision affecting path, up to stop'
342 342 if not stop:
343 343 stop = svn.ra.get_latest_revnum(self.ra)
344 344 try:
345 345 self.reparent('')
346 346 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
347 347 self.reparent(self.module)
348 348 except SubversionException:
349 349 dirent = None
350 350 if not dirent:
351 351 raise util.Abort('%s not found up to revision %d' % (path, stop))
352 352
353 353 return dirent.created_rev
354 354
355 355 def get_blacklist(self):
356 356 """Avoid certain revision numbers.
357 357 It is not uncommon for two nearby revisions to cancel each other
358 358 out, e.g. 'I copied trunk into a subdirectory of itself instead
359 359 of making a branch'. The converted repository is significantly
360 360 smaller if we ignore such revisions."""
361 361 self.blacklist = util.set()
362 362 blacklist = self.blacklist
363 363 for line in file("blacklist.txt", "r"):
364 364 if not line.startswith("#"):
365 365 try:
366 366 svn_rev = int(line.strip())
367 367 blacklist.add(svn_rev)
368 368 except ValueError, e:
369 369 pass # not an integer or a comment
370 370
371 371 def is_blacklisted(self, svn_rev):
372 372 return svn_rev in self.blacklist
373 373
374 374 def reparent(self, module):
375 375 svn_url = self.base + module
376 376 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
377 377 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
378 378
379 379 def expandpaths(self, rev, paths, parents):
380 380 def get_entry_from_path(path, module=self.module):
381 381 # Given the repository url of this wc, say
382 382 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
383 383 # extract the "entry" portion (a relative path) from what
384 384 # svn log --xml says, ie
385 385 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
386 386 # that is to say "tests/PloneTestCase.py"
387 387 if path.startswith(module):
388 388 relative = path[len(module):]
389 389 if relative.startswith('/'):
390 390 return relative[1:]
391 391 else:
392 392 return relative
393 393
394 394 # The path is outside our tracked tree...
395 395 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
396 396 return None
397 397
398 398 entries = []
399 399 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
400 400 copies = {}
401 401 revnum = self.revnum(rev)
402 402
403 403 if revnum in self.modulemap:
404 404 new_module = self.modulemap[revnum]
405 405 if new_module != self.module:
406 406 self.module = new_module
407 407 self.reparent(self.module)
408 408
409 409 for path, ent in paths:
410 410 entrypath = get_entry_from_path(path, module=self.module)
411 411 entry = entrypath.decode(self.encoding)
412 412
413 413 kind = svn.ra.check_path(self.ra, entrypath, revnum)
414 414 if kind == svn.core.svn_node_file:
415 415 if ent.copyfrom_path:
416 416 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
417 417 if copyfrom_path:
418 418 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
419 419 # It's probably important for hg that the source
420 420 # exists in the revision's parent, not just the
421 421 # ent.copyfrom_rev
422 422 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
423 423 if fromkind != 0:
424 424 copies[self.recode(entry)] = self.recode(copyfrom_path)
425 425 entries.append(self.recode(entry))
426 426 elif kind == 0: # gone, but had better be a deleted *file*
427 427 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
428 428
429 429 # if a branch is created but entries are removed in the same
430 430 # changeset, get the right fromrev
431 431 if parents:
432 432 uuid, old_module, fromrev = self.revsplit(parents[0])
433 433 else:
434 434 fromrev = revnum - 1
435 435 # might always need to be revnum - 1 in these 3 lines?
436 436 old_module = self.modulemap.get(fromrev, self.module)
437 437
438 438 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
439 439 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
440 440
441 441 def lookup_parts(p):
442 442 rc = None
443 443 parts = p.split("/")
444 444 for i in range(len(parts)):
445 445 part = "/".join(parts[:i])
446 446 info = part, copyfrom.get(part, None)
447 447 if info[1] is not None:
448 448 self.ui.debug("Found parent directory %s\n" % info[1])
449 449 rc = info
450 450 return rc
451 451
452 452 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
453 453
454 454 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
455 455
456 456 # need to remove fragment from lookup_parts and replace with copyfrom_path
457 457 if frompath is not None:
458 458 self.ui.debug("munge-o-matic\n")
459 459 self.ui.debug(entrypath + '\n')
460 460 self.ui.debug(entrypath[len(frompath):] + '\n')
461 461 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
462 462 fromrev = froment.copyfrom_rev
463 463 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
464 464
465 465 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
466 466 if fromkind == svn.core.svn_node_file: # a deleted file
467 467 entries.append(self.recode(entry))
468 468 elif fromkind == svn.core.svn_node_dir:
469 469 # print "Deleted/moved non-file:", revnum, path, ent
470 470 # children = self._find_children(path, revnum - 1)
471 471 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
472 472 # Sometimes this is tricky. For example: in
473 473 # The Subversion Repository revision 6940 a dir
474 474 # was copied and one of its files was deleted
475 475 # from the new location in the same commit. This
476 476 # code can't deal with that yet.
477 477 if ent.action == 'C':
478 478 children = self._find_children(path, fromrev)
479 479 else:
480 480 oroot = entrypath.strip('/')
481 481 nroot = path.strip('/')
482 482 children = self._find_children(oroot, fromrev)
483 483 children = [s.replace(oroot,nroot) for s in children]
484 484 # Mark all [files, not directories] as deleted.
485 485 for child in children:
486 486 # Can we move a child directory and its
487 487 # parent in the same commit? (probably can). Could
488 488 # cause problems if instead of revnum -1,
489 489 # we have to look in (copyfrom_path, revnum - 1)
490 490 entrypath = get_entry_from_path("/" + child, module=old_module)
491 491 if entrypath:
492 492 entry = self.recode(entrypath.decode(self.encoding))
493 493 if entry in copies:
494 494 # deleted file within a copy
495 495 del copies[entry]
496 496 else:
497 497 entries.append(entry)
498 498 else:
499 499 self.ui.debug('unknown path in revision %d: %s\n' % \
500 500 (revnum, path))
501 501 elif kind == svn.core.svn_node_dir:
502 502 # Should probably synthesize normal file entries
503 503 # and handle as above to clean up copy/rename handling.
504 504
505 505 # If the directory just had a prop change,
506 506 # then we shouldn't need to look for its children.
507 507 # Also this could create duplicate entries. Not sure
508 508 # whether this will matter. Maybe should make entries a set.
509 509 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
510 510 # This will fail if a directory was copied
511 511 # from another branch and then some of its files
512 512 # were deleted in the same transaction.
513 513 children = self._find_children(path, revnum)
514 514 children.sort()
515 515 for child in children:
516 516 # Can we move a child directory and its
517 517 # parent in the same commit? (probably can). Could
518 518 # cause problems if instead of revnum -1,
519 519 # we have to look in (copyfrom_path, revnum - 1)
520 520 entrypath = get_entry_from_path("/" + child, module=self.module)
521 521 # print child, self.module, entrypath
522 522 if entrypath:
523 523 # Need to filter out directories here...
524 524 kind = svn.ra.check_path(self.ra, entrypath, revnum)
525 525 if kind != svn.core.svn_node_dir:
526 526 entries.append(self.recode(entrypath))
527 527
528 528 # Copies here (must copy all from source)
529 529 # Probably not a real problem for us if
530 530 # source does not exist
531 531
532 532 # Can do this with the copy command "hg copy"
533 533 # if ent.copyfrom_path:
534 534 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
535 535 # module=self.module)
536 536 # copyto_entry = entrypath
537 537 #
538 538 # print "copy directory", copyfrom_entry, 'to', copyto_entry
539 539 #
540 540 # copies.append((copyfrom_entry, copyto_entry))
541 541
542 542 if ent.copyfrom_path:
543 543 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
544 544 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
545 545 if copyfrom_entry:
546 546 copyfrom[path] = ent
547 547 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
548 548
549 549 # Good, /probably/ a regular copy. Really should check
550 550 # to see whether the parent revision actually contains
551 551 # the directory in question.
552 552 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
553 553 children.sort()
554 554 for child in children:
555 555 entrypath = get_entry_from_path("/" + child, module=self.module)
556 556 if entrypath:
557 557 entry = entrypath.decode(self.encoding)
558 558 # print "COPY COPY From", copyfrom_entry, entry
559 559 copyto_path = path + entry[len(copyfrom_entry):]
560 560 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
561 561 # print "COPY", entry, "COPY To", copyto_entry
562 562 copies[self.recode(copyto_entry)] = self.recode(entry)
563 563 # copy from quux splort/quuxfile
564 564
565 565 return (entries, copies)
566 566
567 567 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
568 568 self.child_cset = None
569 569 def parselogentry(orig_paths, revnum, author, date, message):
570 570 self.ui.debug("parsing revision %d (%d changes)\n" %
571 571 (revnum, len(orig_paths)))
572 572
573 573 if revnum in self.modulemap:
574 574 new_module = self.modulemap[revnum]
575 575 if new_module != self.module:
576 576 self.module = new_module
577 577 self.reparent(self.module)
578 578
579 579 rev = self.revid(revnum)
580 580 # branch log might return entries for a parent we already have
581 581 if (rev in self.commits or
582 582 (revnum < self.lastrevs.get(self.module, 0))):
583 583 return
584 584
585 585 parents = []
586 586 # check whether this revision is the start of a branch
587 587 if self.module in orig_paths:
588 588 ent = orig_paths[self.module]
589 589 if ent.copyfrom_path:
590 590 # ent.copyfrom_rev may not be the actual last revision
591 591 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
592 592 self.modulemap[prev] = ent.copyfrom_path
593 593 parents = [self.revid(prev, ent.copyfrom_path)]
594 594 self.ui.note('found parent of branch %s at %d: %s\n' % \
595 595 (self.module, prev, ent.copyfrom_path))
596 596 else:
597 597 self.ui.debug("No copyfrom path, don't know what to do.\n")
598 598
599 599 self.modulemap[revnum] = self.module # track backwards in time
600 600
601 601 orig_paths = orig_paths.items()
602 602 orig_paths.sort()
603 603 paths = []
604 604 # filter out unrelated paths
605 605 for path, ent in orig_paths:
606 606 if not path.startswith(self.module):
607 607 self.ui.debug("boring@%s: %s\n" % (revnum, path))
608 608 continue
609 609 paths.append((path, ent))
610 610
611 611 self.paths[rev] = (paths, parents)
612 612
613 613 # Example SVN datetime. Includes microseconds.
614 614 # ISO-8601 conformant
615 615 # '2007-01-04T17:35:00.902377Z'
616 616 date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
617 617
618 618 log = message and self.recode(message)
619 619 author = author and self.recode(author) or ''
620 620 try:
621 621 branch = self.module.split("/")[-1]
622 622 if branch == 'trunk':
623 623 branch = ''
624 624 except IndexError:
625 625 branch = None
626 626
627 627 cset = commit(author=author,
628 628 date=util.datestr(date),
629 629 desc=log,
630 630 parents=parents,
631 631 branch=branch,
632 632 rev=rev.encode('utf-8'))
633 633
634 634 self.commits[rev] = cset
635 635 if self.child_cset and not self.child_cset.parents:
636 636 self.child_cset.parents = [rev]
637 637 self.child_cset = cset
638 638
639 639 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
640 640 (self.module, from_revnum, to_revnum))
641 641
642 642 try:
643 643 for entry in self.get_log([self.module], from_revnum, to_revnum):
644 644 orig_paths, revnum, author, date, message = entry
645 645 if self.is_blacklisted(revnum):
646 646 self.ui.note('skipping blacklisted revision %d\n' % revnum)
647 647 continue
648 648 if orig_paths is None:
649 649 self.ui.debug('revision %d has no entries\n' % revnum)
650 650 continue
651 651 parselogentry(orig_paths, revnum, author, date, message)
652 652 except SubversionException, (inst, num):
653 653 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
654 654 raise NoSuchRevision(branch=self,
655 655 revision="Revision number %d" % to_revnum)
656 656 raise
657 657
658 658 def _getfile(self, file, rev):
659 659 io = StringIO()
660 660 # TODO: ra.get_file transmits the whole file instead of diffs.
661 661 mode = ''
662 662 try:
663 663 revnum = self.revnum(rev)
664 664 if self.module != self.modulemap[revnum]:
665 665 self.module = self.modulemap[revnum]
666 666 self.reparent(self.module)
667 667 info = svn.ra.get_file(self.ra, file, revnum, io)
668 668 if isinstance(info, list):
669 669 info = info[-1]
670 670 mode = ("svn:executable" in info) and 'x' or ''
671 671 mode = ("svn:special" in info) and 'l' or mode
672 672 except SubversionException, e:
673 673 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
674 674 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
675 675 if e.apr_err in notfound: # File not found
676 676 raise IOError()
677 677 raise
678 678 data = io.getvalue()
679 679 if mode == 'l':
680 680 link_prefix = "link "
681 681 if data.startswith(link_prefix):
682 682 data = data[len(link_prefix):]
683 683 return data, mode
684 684
685 685 def _find_children(self, path, revnum):
686 686 path = path.strip('/')
687 687 pool = Pool()
688 688 rpath = '/'.join([self.base, path]).strip('/')
689 689 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
690 690
691 691 pre_revprop_change = '''#!/bin/sh
692 692
693 693 REPOS="$1"
694 694 REV="$2"
695 695 USER="$3"
696 696 PROPNAME="$4"
697 697 ACTION="$5"
698 698
699 699 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
700 700 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
701 701 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
702 702
703 703 echo "Changing prohibited revision property" >&2
704 704 exit 1
705 705 '''
706 706
707 707 class svn_sink(converter_sink, commandline):
708 708 commit_re = re.compile(r'Committed revision (\d+).', re.M)
709 709
710 710 # iterates sublist of given list for concatenated length is within limit
711 711 def limit_arglist(self, files):
712 712 if os.name != 'nt':
713 713 yield files
714 714 return
715 715 # When I tested on WinXP, limit = 2500 is NG, 2400 is OK
716 716 limit = 2000
717 717 bytes = 0
718 718 fl = []
719 719 for fn in files:
720 720 b = len(fn) + 1
721 721 if bytes + b < limit:
722 722 fl.append(fn)
723 723 bytes += b
724 724 else:
725 725 yield fl
726 fl = []
727 bytes = 0
726 fl = [fn]
727 bytes = b
728 728 if fl:
729 729 yield fl
730 730
731 731 def prerun(self):
732 732 if self.wc:
733 733 os.chdir(self.wc)
734 734
735 735 def postrun(self):
736 736 if self.wc:
737 737 os.chdir(self.cwd)
738 738
739 739 def join(self, name):
740 740 return os.path.join(self.wc, '.svn', name)
741 741
742 742 def revmapfile(self):
743 743 return self.join('hg-shamap')
744 744
745 745 def authorfile(self):
746 746 return self.join('hg-authormap')
747 747
748 748 def __init__(self, ui, path):
749 749 converter_sink.__init__(self, ui, path)
750 750 commandline.__init__(self, ui, 'svn')
751 751 self.delete = []
752 752 self.setexec = []
753 753 self.delexec = []
754 754 self.copies = []
755 755 self.wc = None
756 756 self.cwd = os.getcwd()
757 757
758 758 path = os.path.realpath(path)
759 759
760 760 created = False
761 761 if os.path.isfile(os.path.join(path, '.svn', 'entries')):
762 762 self.wc = path
763 763 self.run0('update')
764 764 else:
765 765 wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
766 766
767 767 if os.path.isdir(os.path.dirname(path)):
768 768 if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
769 769 ui.status(_('initializing svn repo %r\n') %
770 770 os.path.basename(path))
771 771 commandline(ui, 'svnadmin').run0('create', path)
772 772 created = path
773 773 path = path.replace('\\', '/')
774 774 if not path.startswith('/'):
775 775 path = '/' + path
776 776 path = 'file://' + path
777 777
778 778 ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
779 779 self.run0('checkout', path, wcpath)
780 780
781 781 self.wc = wcpath
782 782 self.opener = util.opener(self.wc)
783 783 self.wopener = util.opener(self.wc)
784 784 self.childmap = mapfile(ui, self.join('hg-childmap'))
785 785 self.is_exec = util.checkexec(self.wc) and util.is_exec or None
786 786
787 787 if created:
788 788 hook = os.path.join(created, 'hooks', 'pre-revprop-change')
789 789 fp = open(hook, 'w')
790 790 fp.write(pre_revprop_change)
791 791 fp.close()
792 792 util.set_flags(hook, "x")
793 793
794 794 xport = transport.SvnRaTransport(url=geturl(path))
795 795 self.uuid = svn.ra.get_uuid(xport.ra)
796 796
797 797 def wjoin(self, *names):
798 798 return os.path.join(self.wc, *names)
799 799
800 800 def putfile(self, filename, flags, data):
801 801 if 'l' in flags:
802 802 self.wopener.symlink(data, filename)
803 803 else:
804 804 try:
805 805 if os.path.islink(self.wjoin(filename)):
806 806 os.unlink(filename)
807 807 except OSError:
808 808 pass
809 809 self.wopener(filename, 'w').write(data)
810 810
811 811 if self.is_exec:
812 812 was_exec = self.is_exec(self.wjoin(filename))
813 813 else:
814 814 # On filesystems not supporting execute-bit, there is no way
815 815 # to know if it is set but asking subversion. Setting it
816 816 # systematically is just as expensive and much simpler.
817 817 was_exec = 'x' not in flags
818 818
819 819 util.set_flags(self.wjoin(filename), flags)
820 820 if was_exec:
821 821 if 'x' not in flags:
822 822 self.delexec.append(filename)
823 823 else:
824 824 if 'x' in flags:
825 825 self.setexec.append(filename)
826 826
827 827 def delfile(self, name):
828 828 self.delete.append(name)
829 829
830 830 def copyfile(self, source, dest):
831 831 self.copies.append([source, dest])
832 832
833 833 def _copyfile(self, source, dest):
834 834 # SVN's copy command pukes if the destination file exists, but
835 835 # our copyfile method expects to record a copy that has
836 836 # already occurred. Cross the semantic gap.
837 837 wdest = self.wjoin(dest)
838 838 exists = os.path.exists(wdest)
839 839 if exists:
840 840 fd, tempname = tempfile.mkstemp(
841 841 prefix='hg-copy-', dir=os.path.dirname(wdest))
842 842 os.close(fd)
843 843 os.unlink(tempname)
844 844 os.rename(wdest, tempname)
845 845 try:
846 846 self.run0('copy', source, dest)
847 847 finally:
848 848 if exists:
849 849 try:
850 850 os.unlink(wdest)
851 851 except OSError:
852 852 pass
853 853 os.rename(tempname, wdest)
854 854
855 855 def dirs_of(self, files):
856 856 dirs = set()
857 857 for f in files:
858 858 if os.path.isdir(self.wjoin(f)):
859 859 dirs.add(f)
860 860 for i in strutil.rfindall(f, '/'):
861 861 dirs.add(f[:i])
862 862 return dirs
863 863
864 864 def add_dirs(self, files):
865 865 add_dirs = [d for d in self.dirs_of(files)
866 866 if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
867 867 if add_dirs:
868 868 add_dirs.sort()
869 869 for fl in self.limit_arglist(add_dirs):
870 870 self.run('add', non_recursive=True, quiet=True, *fl)
871 871 return add_dirs
872 872
873 873 def add_files(self, files):
874 874 if files:
875 875 for fl in self.limit_arglist(files):
876 876 self.run('add', quiet=True, *fl)
877 877 return files
878 878
879 879 def tidy_dirs(self, names):
880 880 dirs = list(self.dirs_of(names))
881 881 dirs.sort(reverse=True)
882 882 deleted = []
883 883 for d in dirs:
884 884 wd = self.wjoin(d)
885 885 if os.listdir(wd) == '.svn':
886 886 self.run0('delete', d)
887 887 deleted.append(d)
888 888 return deleted
889 889
890 890 def addchild(self, parent, child):
891 891 self.childmap[parent] = child
892 892
893 893 def revid(self, rev):
894 894 return u"svn:%s@%s" % (self.uuid, rev)
895 895
896 896 def putcommit(self, files, parents, commit):
897 897 for parent in parents:
898 898 try:
899 899 return self.revid(self.childmap[parent])
900 900 except KeyError:
901 901 pass
902 902 entries = set(self.delete)
903 903 files = util.frozenset(files)
904 904 entries.update(self.add_dirs(files.difference(entries)))
905 905 if self.copies:
906 906 for s, d in self.copies:
907 907 self._copyfile(s, d)
908 908 self.copies = []
909 909 if self.delete:
910 910 for fl in self.limit_arglist(self.delete):
911 911 self.run0('delete', *fl)
912 912 self.delete = []
913 913 entries.update(self.add_files(files.difference(entries)))
914 914 entries.update(self.tidy_dirs(entries))
915 915 if self.delexec:
916 916 for fl in self.limit_arglist(self.delexec):
917 917 self.run0('propdel', 'svn:executable', *fl)
918 918 self.delexec = []
919 919 if self.setexec:
920 920 for fl in self.limit_arglist(self.setexec):
921 921 self.run0('propset', 'svn:executable', '*', *fl)
922 922 self.setexec = []
923 923
924 924 fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
925 925 fp = os.fdopen(fd, 'w')
926 926 fp.write(commit.desc)
927 927 fp.close()
928 928 try:
929 929 output = self.run0('commit',
930 930 username=util.shortuser(commit.author),
931 931 file=messagefile,
932 932 encoding='utf-8')
933 933 try:
934 934 rev = self.commit_re.search(output).group(1)
935 935 except AttributeError:
936 936 self.ui.warn(_('unexpected svn output:\n'))
937 937 self.ui.warn(output)
938 938 raise util.Abort(_('unable to cope with svn output'))
939 939 if commit.rev:
940 940 self.run('propset', 'hg:convert-rev', commit.rev,
941 941 revprop=True, revision=rev)
942 942 if commit.branch and commit.branch != 'default':
943 943 self.run('propset', 'hg:convert-branch', commit.branch,
944 944 revprop=True, revision=rev)
945 945 for parent in parents:
946 946 self.addchild(parent, rev)
947 947 return self.revid(rev)
948 948 finally:
949 949 os.unlink(messagefile)
950 950
951 951 def puttags(self, tags):
952 952 self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))
General Comments 0
You need to be logged in to leave comments. Login now