##// END OF EJS Templates
convert: bail out in Subversion source if encountering non-ASCII HTTP(S) URL...
Manuel Jacob -
r45559:697212a8 stable
parent child Browse files
Show More
@@ -1,1599 +1,1608 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 from __future__ import absolute_import
5 5
6 6 import os
7 7 import re
8 8 import xml.dom.minidom
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial.pycompat import open
12 12 from mercurial import (
13 13 encoding,
14 14 error,
15 15 pycompat,
16 16 util,
17 17 vfs as vfsmod,
18 18 )
19 19 from mercurial.utils import (
20 20 dateutil,
21 21 procutil,
22 22 stringutil,
23 23 )
24 24
25 25 from . import common
26 26
27 27 pickle = util.pickle
28 28 stringio = util.stringio
29 29 propertycache = util.propertycache
30 30 urlerr = util.urlerr
31 31 urlreq = util.urlreq
32 32
33 33 commandline = common.commandline
34 34 commit = common.commit
35 35 converter_sink = common.converter_sink
36 36 converter_source = common.converter_source
37 37 decodeargs = common.decodeargs
38 38 encodeargs = common.encodeargs
39 39 makedatetimestamp = common.makedatetimestamp
40 40 mapfile = common.mapfile
41 41 MissingTool = common.MissingTool
42 42 NoRepo = common.NoRepo
43 43
44 44 # Subversion stuff. Works best with very recent Python SVN bindings
45 45 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
46 46 # these bindings.
47 47
48 48 try:
49 49 import svn
50 50 import svn.client
51 51 import svn.core
52 52 import svn.ra
53 53 import svn.delta
54 54 from . import transport
55 55 import warnings
56 56
57 57 warnings.filterwarnings(
58 58 'ignore', module='svn.core', category=DeprecationWarning
59 59 )
60 60 svn.core.SubversionException # trigger import to catch error
61 61
62 62 except ImportError:
63 63 svn = None
64 64
65 65
66 66 class SvnPathNotFound(Exception):
67 67 pass
68 68
69 69
70 70 def revsplit(rev):
71 71 """Parse a revision string and return (uuid, path, revnum).
72 72 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
73 73 ... b'/proj%20B/mytrunk/mytrunk@1')
74 74 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
75 75 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
76 76 ('', '', 1)
77 77 >>> revsplit(b'@7')
78 78 ('', '', 7)
79 79 >>> revsplit(b'7')
80 80 ('', '', 0)
81 81 >>> revsplit(b'bad')
82 82 ('', '', 0)
83 83 """
84 84 parts = rev.rsplit(b'@', 1)
85 85 revnum = 0
86 86 if len(parts) > 1:
87 87 revnum = int(parts[1])
88 88 parts = parts[0].split(b'/', 1)
89 89 uuid = b''
90 90 mod = b''
91 91 if len(parts) > 1 and parts[0].startswith(b'svn:'):
92 92 uuid = parts[0][4:]
93 93 mod = b'/' + parts[1]
94 94 return uuid, mod, revnum
95 95
96 96
97 97 def quote(s):
98 98 # As of svn 1.7, many svn calls expect "canonical" paths. In
99 99 # theory, we should call svn.core.*canonicalize() on all paths
100 100 # before passing them to the API. Instead, we assume the base url
101 101 # is canonical and copy the behaviour of svn URL encoding function
102 102 # so we can extend it safely with new components. The "safe"
103 103 # characters were taken from the "svn_uri__char_validity" table in
104 104 # libsvn_subr/path.c.
105 105 return urlreq.quote(s, b"!$&'()*+,-./:=@_~")
106 106
107 107
108 108 def geturl(path):
109 109 try:
110 110 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
111 111 except svn.core.SubversionException:
112 112 # svn.client.url_from_path() fails with local repositories
113 113 pass
114 114 if os.path.isdir(path):
115 115 path = os.path.normpath(os.path.abspath(path))
116 116 if pycompat.iswindows:
117 117 path = b'/' + util.normpath(path)
118 118 # Module URL is later compared with the repository URL returned
119 119 # by svn API, which is UTF-8.
120 120 path = encoding.tolocal(path)
121 121 path = b'file://%s' % quote(path)
122 122 return svn.core.svn_path_canonicalize(path)
123 123
124 124
125 125 def optrev(number):
126 126 optrev = svn.core.svn_opt_revision_t()
127 127 optrev.kind = svn.core.svn_opt_revision_number
128 128 optrev.value.number = number
129 129 return optrev
130 130
131 131
132 132 class changedpath(object):
133 133 def __init__(self, p):
134 134 self.copyfrom_path = p.copyfrom_path
135 135 self.copyfrom_rev = p.copyfrom_rev
136 136 self.action = p.action
137 137
138 138
139 139 def get_log_child(
140 140 fp,
141 141 url,
142 142 paths,
143 143 start,
144 144 end,
145 145 limit=0,
146 146 discover_changed_paths=True,
147 147 strict_node_history=False,
148 148 ):
149 149 protocol = -1
150 150
151 151 def receiver(orig_paths, revnum, author, date, message, pool):
152 152 paths = {}
153 153 if orig_paths is not None:
154 154 for k, v in pycompat.iteritems(orig_paths):
155 155 paths[k] = changedpath(v)
156 156 pickle.dump((paths, revnum, author, date, message), fp, protocol)
157 157
158 158 try:
159 159 # Use an ra of our own so that our parent can consume
160 160 # our results without confusing the server.
161 161 t = transport.SvnRaTransport(url=url)
162 162 svn.ra.get_log(
163 163 t.ra,
164 164 paths,
165 165 start,
166 166 end,
167 167 limit,
168 168 discover_changed_paths,
169 169 strict_node_history,
170 170 receiver,
171 171 )
172 172 except IOError:
173 173 # Caller may interrupt the iteration
174 174 pickle.dump(None, fp, protocol)
175 175 except Exception as inst:
176 176 pickle.dump(stringutil.forcebytestr(inst), fp, protocol)
177 177 else:
178 178 pickle.dump(None, fp, protocol)
179 179 fp.flush()
180 180 # With large history, cleanup process goes crazy and suddenly
181 181 # consumes *huge* amount of memory. The output file being closed,
182 182 # there is no need for clean termination.
183 183 os._exit(0)
184 184
185 185
186 186 def debugsvnlog(ui, **opts):
187 187 """Fetch SVN log in a subprocess and channel them back to parent to
188 188 avoid memory collection issues.
189 189 """
190 190 with util.with_lc_ctype():
191 191 if svn is None:
192 192 raise error.Abort(
193 193 _(b'debugsvnlog could not load Subversion python bindings')
194 194 )
195 195
196 196 args = decodeargs(ui.fin.read())
197 197 get_log_child(ui.fout, *args)
198 198
199 199
200 200 class logstream(object):
201 201 """Interruptible revision log iterator."""
202 202
203 203 def __init__(self, stdout):
204 204 self._stdout = stdout
205 205
206 206 def __iter__(self):
207 207 while True:
208 208 try:
209 209 entry = pickle.load(self._stdout)
210 210 except EOFError:
211 211 raise error.Abort(
212 212 _(
213 213 b'Mercurial failed to run itself, check'
214 214 b' hg executable is in PATH'
215 215 )
216 216 )
217 217 try:
218 218 orig_paths, revnum, author, date, message = entry
219 219 except (TypeError, ValueError):
220 220 if entry is None:
221 221 break
222 222 raise error.Abort(_(b"log stream exception '%s'") % entry)
223 223 yield entry
224 224
225 225 def close(self):
226 226 if self._stdout:
227 227 self._stdout.close()
228 228 self._stdout = None
229 229
230 230
231 231 class directlogstream(list):
232 232 """Direct revision log iterator.
233 233 This can be used for debugging and development but it will probably leak
234 234 memory and is not suitable for real conversions."""
235 235
236 236 def __init__(
237 237 self,
238 238 url,
239 239 paths,
240 240 start,
241 241 end,
242 242 limit=0,
243 243 discover_changed_paths=True,
244 244 strict_node_history=False,
245 245 ):
246 246 def receiver(orig_paths, revnum, author, date, message, pool):
247 247 paths = {}
248 248 if orig_paths is not None:
249 249 for k, v in pycompat.iteritems(orig_paths):
250 250 paths[k] = changedpath(v)
251 251 self.append((paths, revnum, author, date, message))
252 252
253 253 # Use an ra of our own so that our parent can consume
254 254 # our results without confusing the server.
255 255 t = transport.SvnRaTransport(url=url)
256 256 svn.ra.get_log(
257 257 t.ra,
258 258 paths,
259 259 start,
260 260 end,
261 261 limit,
262 262 discover_changed_paths,
263 263 strict_node_history,
264 264 receiver,
265 265 )
266 266
267 267 def close(self):
268 268 pass
269 269
270 270
271 271 # Check to see if the given path is a local Subversion repo. Verify this by
272 272 # looking for several svn-specific files and directories in the given
273 273 # directory.
274 274 def filecheck(ui, path, proto):
275 275 for x in (b'locks', b'hooks', b'format', b'db'):
276 276 if not os.path.exists(os.path.join(path, x)):
277 277 return False
278 278 return True
279 279
280 280
281 281 # Check to see if a given path is the root of an svn repo over http. We verify
282 282 # this by requesting a version-controlled URL we know can't exist and looking
283 283 # for the svn-specific "not found" XML.
284 284 def httpcheck(ui, path, proto):
285 285 try:
286 286 opener = urlreq.buildopener()
287 287 rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
288 288 data = rsp.read()
289 289 except urlerr.httperror as inst:
290 290 if inst.code != 404:
291 291 # Except for 404 we cannot know for sure this is not an svn repo
292 292 ui.warn(
293 293 _(
294 294 b'svn: cannot probe remote repository, assume it could '
295 295 b'be a subversion repository. Use --source-type if you '
296 296 b'know better.\n'
297 297 )
298 298 )
299 299 return True
300 300 data = inst.fp.read()
301 301 except Exception:
302 302 # Could be urlerr.urlerror if the URL is invalid or anything else.
303 303 return False
304 304 return b'<m:human-readable errcode="160013">' in data
305 305
306 306
307 307 protomap = {
308 308 b'http': httpcheck,
309 309 b'https': httpcheck,
310 310 b'file': filecheck,
311 311 }
312 312
313 313
314 314 def issvnurl(ui, url):
315 315 try:
316 316 proto, path = url.split(b'://', 1)
317 317 if proto == b'file':
318 318 if (
319 319 pycompat.iswindows
320 320 and path[:1] == b'/'
321 321 and path[1:2].isalpha()
322 322 and path[2:6].lower() == b'%3a/'
323 323 ):
324 324 path = path[:2] + b':/' + path[6:]
325 325 # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
326 326 # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
327 327 # py3 will decode percent-encoded bytes using the utf-8 encoding
328 328 # and the "replace" error handler. This means that it will not
329 329 # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
330 330 # url.open() uses the reverse function (urlreq.pathname2url()) and
331 331 # has a similar problem
332 332 # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
333 333 # sense to solve both problems together and handle all file URLs
334 334 # consistently. For now, we warn.
335 335 unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
336 336 if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
337 337 ui.warn(
338 338 _(
339 339 b'on Python 3, we currently do not support non-UTF-8 '
340 340 b'percent-encoded bytes in file URLs for Subversion '
341 341 b'repositories\n'
342 342 )
343 343 )
344 344 path = pycompat.fsencode(unicodepath)
345 345 except ValueError:
346 346 proto = b'file'
347 347 path = os.path.abspath(url)
348 348 if proto == b'file':
349 349 path = util.pconvert(path)
350 elif proto in (b'http', 'https'):
351 if not encoding.isasciistr(path):
352 ui.warn(
353 _(
354 b"Subversion sources don't support non-ASCII characters in "
355 b"HTTP(S) URLs. Please percent-encode them.\n"
356 )
357 )
358 return False
350 359 check = protomap.get(proto, lambda *args: False)
351 360 while b'/' in path:
352 361 if check(ui, path, proto):
353 362 return True
354 363 path = path.rsplit(b'/', 1)[0]
355 364 return False
356 365
357 366
358 367 # SVN conversion code stolen from bzr-svn and tailor
359 368 #
360 369 # Subversion looks like a versioned filesystem, branches structures
361 370 # are defined by conventions and not enforced by the tool. First,
362 371 # we define the potential branches (modules) as "trunk" and "branches"
363 372 # children directories. Revisions are then identified by their
364 373 # module and revision number (and a repository identifier).
365 374 #
366 375 # The revision graph is really a tree (or a forest). By default, a
367 376 # revision parent is the previous revision in the same module. If the
368 377 # module directory is copied/moved from another module then the
369 378 # revision is the module root and its parent the source revision in
370 379 # the parent module. A revision has at most one parent.
371 380 #
372 381 class svn_source(converter_source):
373 382 def __init__(self, ui, repotype, url, revs=None):
374 383 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
375 384
376 385 if not (
377 386 url.startswith(b'svn://')
378 387 or url.startswith(b'svn+ssh://')
379 388 or (
380 389 os.path.exists(url)
381 390 and os.path.exists(os.path.join(url, b'.svn'))
382 391 )
383 392 or issvnurl(ui, url)
384 393 ):
385 394 raise NoRepo(
386 395 _(b"%s does not look like a Subversion repository") % url
387 396 )
388 397 if svn is None:
389 398 raise MissingTool(_(b'could not load Subversion python bindings'))
390 399
391 400 try:
392 401 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
393 402 if version < (1, 4):
394 403 raise MissingTool(
395 404 _(
396 405 b'Subversion python bindings %d.%d found, '
397 406 b'1.4 or later required'
398 407 )
399 408 % version
400 409 )
401 410 except AttributeError:
402 411 raise MissingTool(
403 412 _(
404 413 b'Subversion python bindings are too old, 1.4 '
405 414 b'or later required'
406 415 )
407 416 )
408 417
409 418 self.lastrevs = {}
410 419
411 420 latest = None
412 421 try:
413 422 # Support file://path@rev syntax. Useful e.g. to convert
414 423 # deleted branches.
415 424 at = url.rfind(b'@')
416 425 if at >= 0:
417 426 latest = int(url[at + 1 :])
418 427 url = url[:at]
419 428 except ValueError:
420 429 pass
421 430 self.url = geturl(url)
422 431 self.encoding = b'UTF-8' # Subversion is always nominal UTF-8
423 432 try:
424 433 with util.with_lc_ctype():
425 434 self.transport = transport.SvnRaTransport(url=self.url)
426 435 self.ra = self.transport.ra
427 436 self.ctx = self.transport.client
428 437 self.baseurl = svn.ra.get_repos_root(self.ra)
429 438 # Module is either empty or a repository path starting with
430 439 # a slash and not ending with a slash.
431 440 self.module = urlreq.unquote(self.url[len(self.baseurl) :])
432 441 self.prevmodule = None
433 442 self.rootmodule = self.module
434 443 self.commits = {}
435 444 self.paths = {}
436 445 self.uuid = svn.ra.get_uuid(self.ra)
437 446 except svn.core.SubversionException:
438 447 ui.traceback()
439 448 svnversion = b'%d.%d.%d' % (
440 449 svn.core.SVN_VER_MAJOR,
441 450 svn.core.SVN_VER_MINOR,
442 451 svn.core.SVN_VER_MICRO,
443 452 )
444 453 raise NoRepo(
445 454 _(
446 455 b"%s does not look like a Subversion repository "
447 456 b"to libsvn version %s"
448 457 )
449 458 % (self.url, svnversion)
450 459 )
451 460
452 461 if revs:
453 462 if len(revs) > 1:
454 463 raise error.Abort(
455 464 _(
456 465 b'subversion source does not support '
457 466 b'specifying multiple revisions'
458 467 )
459 468 )
460 469 try:
461 470 latest = int(revs[0])
462 471 except ValueError:
463 472 raise error.Abort(
464 473 _(b'svn: revision %s is not an integer') % revs[0]
465 474 )
466 475
467 476 trunkcfg = self.ui.config(b'convert', b'svn.trunk')
468 477 if trunkcfg is None:
469 478 trunkcfg = b'trunk'
470 479 self.trunkname = trunkcfg.strip(b'/')
471 480 self.startrev = self.ui.config(b'convert', b'svn.startrev')
472 481 try:
473 482 self.startrev = int(self.startrev)
474 483 if self.startrev < 0:
475 484 self.startrev = 0
476 485 except ValueError:
477 486 raise error.Abort(
478 487 _(b'svn: start revision %s is not an integer') % self.startrev
479 488 )
480 489
481 490 try:
482 491 with util.with_lc_ctype():
483 492 self.head = self.latest(self.module, latest)
484 493 except SvnPathNotFound:
485 494 self.head = None
486 495 if not self.head:
487 496 raise error.Abort(
488 497 _(b'no revision found in module %s') % self.module
489 498 )
490 499 self.last_changed = self.revnum(self.head)
491 500
492 501 self._changescache = (None, None)
493 502
494 503 if os.path.exists(os.path.join(url, b'.svn/entries')):
495 504 self.wc = url
496 505 else:
497 506 self.wc = None
498 507 self.convertfp = None
499 508
500 509 def before(self):
501 510 self.with_lc_ctype = util.with_lc_ctype()
502 511 self.with_lc_ctype.__enter__()
503 512
504 513 def after(self):
505 514 self.with_lc_ctype.__exit__(None, None, None)
506 515
507 516 def setrevmap(self, revmap):
508 517 lastrevs = {}
509 518 for revid in revmap:
510 519 uuid, module, revnum = revsplit(revid)
511 520 lastrevnum = lastrevs.setdefault(module, revnum)
512 521 if revnum > lastrevnum:
513 522 lastrevs[module] = revnum
514 523 self.lastrevs = lastrevs
515 524
516 525 def exists(self, path, optrev):
517 526 try:
518 527 svn.client.ls(
519 528 self.url.rstrip(b'/') + b'/' + quote(path),
520 529 optrev,
521 530 False,
522 531 self.ctx,
523 532 )
524 533 return True
525 534 except svn.core.SubversionException:
526 535 return False
527 536
528 537 def getheads(self):
529 538 def isdir(path, revnum):
530 539 kind = self._checkpath(path, revnum)
531 540 return kind == svn.core.svn_node_dir
532 541
533 542 def getcfgpath(name, rev):
534 543 cfgpath = self.ui.config(b'convert', b'svn.' + name)
535 544 if cfgpath is not None and cfgpath.strip() == b'':
536 545 return None
537 546 path = (cfgpath or name).strip(b'/')
538 547 if not self.exists(path, rev):
539 548 if self.module.endswith(path) and name == b'trunk':
540 549 # we are converting from inside this directory
541 550 return None
542 551 if cfgpath:
543 552 raise error.Abort(
544 553 _(b'expected %s to be at %r, but not found')
545 554 % (name, path)
546 555 )
547 556 return None
548 557 self.ui.note(
549 558 _(b'found %s at %r\n') % (name, pycompat.bytestr(path))
550 559 )
551 560 return path
552 561
553 562 rev = optrev(self.last_changed)
554 563 oldmodule = b''
555 564 trunk = getcfgpath(b'trunk', rev)
556 565 self.tags = getcfgpath(b'tags', rev)
557 566 branches = getcfgpath(b'branches', rev)
558 567
559 568 # If the project has a trunk or branches, we will extract heads
560 569 # from them. We keep the project root otherwise.
561 570 if trunk:
562 571 oldmodule = self.module or b''
563 572 self.module += b'/' + trunk
564 573 self.head = self.latest(self.module, self.last_changed)
565 574 if not self.head:
566 575 raise error.Abort(
567 576 _(b'no revision found in module %s') % self.module
568 577 )
569 578
570 579 # First head in the list is the module's head
571 580 self.heads = [self.head]
572 581 if self.tags is not None:
573 582 self.tags = b'%s/%s' % (oldmodule, (self.tags or b'tags'))
574 583
575 584 # Check if branches bring a few more heads to the list
576 585 if branches:
577 586 rpath = self.url.strip(b'/')
578 587 branchnames = svn.client.ls(
579 588 rpath + b'/' + quote(branches), rev, False, self.ctx
580 589 )
581 590 for branch in sorted(branchnames):
582 591 module = b'%s/%s/%s' % (oldmodule, branches, branch)
583 592 if not isdir(module, self.last_changed):
584 593 continue
585 594 brevid = self.latest(module, self.last_changed)
586 595 if not brevid:
587 596 self.ui.note(_(b'ignoring empty branch %s\n') % branch)
588 597 continue
589 598 self.ui.note(
590 599 _(b'found branch %s at %d\n')
591 600 % (branch, self.revnum(brevid))
592 601 )
593 602 self.heads.append(brevid)
594 603
595 604 if self.startrev and self.heads:
596 605 if len(self.heads) > 1:
597 606 raise error.Abort(
598 607 _(
599 608 b'svn: start revision is not supported '
600 609 b'with more than one branch'
601 610 )
602 611 )
603 612 revnum = self.revnum(self.heads[0])
604 613 if revnum < self.startrev:
605 614 raise error.Abort(
606 615 _(b'svn: no revision found after start revision %d')
607 616 % self.startrev
608 617 )
609 618
610 619 return self.heads
611 620
612 621 def _getchanges(self, rev, full):
613 622 (paths, parents) = self.paths[rev]
614 623 copies = {}
615 624 if parents:
616 625 files, self.removed, copies = self.expandpaths(rev, paths, parents)
617 626 if full or not parents:
618 627 # Perform a full checkout on roots
619 628 uuid, module, revnum = revsplit(rev)
620 629 entries = svn.client.ls(
621 630 self.baseurl + quote(module), optrev(revnum), True, self.ctx
622 631 )
623 632 files = [
624 633 n
625 634 for n, e in pycompat.iteritems(entries)
626 635 if e.kind == svn.core.svn_node_file
627 636 ]
628 637 self.removed = set()
629 638
630 639 files.sort()
631 640 files = pycompat.ziplist(files, [rev] * len(files))
632 641 return (files, copies)
633 642
634 643 def getchanges(self, rev, full):
635 644 # reuse cache from getchangedfiles
636 645 if self._changescache[0] == rev and not full:
637 646 (files, copies) = self._changescache[1]
638 647 else:
639 648 (files, copies) = self._getchanges(rev, full)
640 649 # caller caches the result, so free it here to release memory
641 650 del self.paths[rev]
642 651 return (files, copies, set())
643 652
644 653 def getchangedfiles(self, rev, i):
645 654 # called from filemap - cache computed values for reuse in getchanges
646 655 (files, copies) = self._getchanges(rev, False)
647 656 self._changescache = (rev, (files, copies))
648 657 return [f[0] for f in files]
649 658
650 659 def getcommit(self, rev):
651 660 if rev not in self.commits:
652 661 uuid, module, revnum = revsplit(rev)
653 662 self.module = module
654 663 self.reparent(module)
655 664 # We assume that:
656 665 # - requests for revisions after "stop" come from the
657 666 # revision graph backward traversal. Cache all of them
658 667 # down to stop, they will be used eventually.
659 668 # - requests for revisions before "stop" come to get
660 669 # isolated branches parents. Just fetch what is needed.
661 670 stop = self.lastrevs.get(module, 0)
662 671 if revnum < stop:
663 672 stop = revnum + 1
664 673 self._fetch_revisions(revnum, stop)
665 674 if rev not in self.commits:
666 675 raise error.Abort(_(b'svn: revision %s not found') % revnum)
667 676 revcommit = self.commits[rev]
668 677 # caller caches the result, so free it here to release memory
669 678 del self.commits[rev]
670 679 return revcommit
671 680
672 681 def checkrevformat(self, revstr, mapname=b'splicemap'):
673 682 """ fails if revision format does not match the correct format"""
674 683 if not re.match(
675 684 br'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
676 685 br'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
677 686 br'{12,12}(.*)@[0-9]+$',
678 687 revstr,
679 688 ):
680 689 raise error.Abort(
681 690 _(b'%s entry %s is not a valid revision identifier')
682 691 % (mapname, revstr)
683 692 )
684 693
685 694 def numcommits(self):
686 695 return int(self.head.rsplit(b'@', 1)[1]) - self.startrev
687 696
688 697 def gettags(self):
689 698 tags = {}
690 699 if self.tags is None:
691 700 return tags
692 701
693 702 # svn tags are just a convention, project branches left in a
694 703 # 'tags' directory. There is no other relationship than
695 704 # ancestry, which is expensive to discover and makes them hard
696 705 # to update incrementally. Worse, past revisions may be
697 706 # referenced by tags far away in the future, requiring a deep
698 707 # history traversal on every calculation. Current code
699 708 # performs a single backward traversal, tracking moves within
700 709 # the tags directory (tag renaming) and recording a new tag
701 710 # everytime a project is copied from outside the tags
702 711 # directory. It also lists deleted tags, this behaviour may
703 712 # change in the future.
704 713 pendings = []
705 714 tagspath = self.tags
706 715 start = svn.ra.get_latest_revnum(self.ra)
707 716 stream = self._getlog([self.tags], start, self.startrev)
708 717 try:
709 718 for entry in stream:
710 719 origpaths, revnum, author, date, message = entry
711 720 if not origpaths:
712 721 origpaths = []
713 722 copies = [
714 723 (e.copyfrom_path, e.copyfrom_rev, p)
715 724 for p, e in pycompat.iteritems(origpaths)
716 725 if e.copyfrom_path
717 726 ]
718 727 # Apply moves/copies from more specific to general
719 728 copies.sort(reverse=True)
720 729
721 730 srctagspath = tagspath
722 731 if copies and copies[-1][2] == tagspath:
723 732 # Track tags directory moves
724 733 srctagspath = copies.pop()[0]
725 734
726 735 for source, sourcerev, dest in copies:
727 736 if not dest.startswith(tagspath + b'/'):
728 737 continue
729 738 for tag in pendings:
730 739 if tag[0].startswith(dest):
731 740 tagpath = source + tag[0][len(dest) :]
732 741 tag[:2] = [tagpath, sourcerev]
733 742 break
734 743 else:
735 744 pendings.append([source, sourcerev, dest])
736 745
737 746 # Filter out tags with children coming from different
738 747 # parts of the repository like:
739 748 # /tags/tag.1 (from /trunk:10)
740 749 # /tags/tag.1/foo (from /branches/foo:12)
741 750 # Here/tags/tag.1 discarded as well as its children.
742 751 # It happens with tools like cvs2svn. Such tags cannot
743 752 # be represented in mercurial.
744 753 addeds = {
745 754 p: e.copyfrom_path
746 755 for p, e in pycompat.iteritems(origpaths)
747 756 if e.action == b'A' and e.copyfrom_path
748 757 }
749 758 badroots = set()
750 759 for destroot in addeds:
751 760 for source, sourcerev, dest in pendings:
752 761 if not dest.startswith(
753 762 destroot + b'/'
754 763 ) or source.startswith(addeds[destroot] + b'/'):
755 764 continue
756 765 badroots.add(destroot)
757 766 break
758 767
759 768 for badroot in badroots:
760 769 pendings = [
761 770 p
762 771 for p in pendings
763 772 if p[2] != badroot
764 773 and not p[2].startswith(badroot + b'/')
765 774 ]
766 775
767 776 # Tell tag renamings from tag creations
768 777 renamings = []
769 778 for source, sourcerev, dest in pendings:
770 779 tagname = dest.split(b'/')[-1]
771 780 if source.startswith(srctagspath):
772 781 renamings.append([source, sourcerev, tagname])
773 782 continue
774 783 if tagname in tags:
775 784 # Keep the latest tag value
776 785 continue
777 786 # From revision may be fake, get one with changes
778 787 try:
779 788 tagid = self.latest(source, sourcerev)
780 789 if tagid and tagname not in tags:
781 790 tags[tagname] = tagid
782 791 except SvnPathNotFound:
783 792 # It happens when we are following directories
784 793 # we assumed were copied with their parents
785 794 # but were really created in the tag
786 795 # directory.
787 796 pass
788 797 pendings = renamings
789 798 tagspath = srctagspath
790 799 finally:
791 800 stream.close()
792 801 return tags
793 802
794 803 def converted(self, rev, destrev):
795 804 if not self.wc:
796 805 return
797 806 if self.convertfp is None:
798 807 self.convertfp = open(
799 808 os.path.join(self.wc, b'.svn', b'hg-shamap'), b'ab'
800 809 )
801 810 self.convertfp.write(
802 811 util.tonativeeol(b'%s %d\n' % (destrev, self.revnum(rev)))
803 812 )
804 813 self.convertfp.flush()
805 814
806 815 def revid(self, revnum, module=None):
807 816 return b'svn:%s%s@%d' % (self.uuid, module or self.module, revnum)
808 817
809 818 def revnum(self, rev):
810 819 return int(rev.split(b'@')[-1])
811 820
812 821 def latest(self, path, stop=None):
813 822 """Find the latest revid affecting path, up to stop revision
814 823 number. If stop is None, default to repository latest
815 824 revision. It may return a revision in a different module,
816 825 since a branch may be moved without a change being
817 826 reported. Return None if computed module does not belong to
818 827 rootmodule subtree.
819 828 """
820 829
821 830 def findchanges(path, start, stop=None):
822 831 stream = self._getlog([path], start, stop or 1)
823 832 try:
824 833 for entry in stream:
825 834 paths, revnum, author, date, message = entry
826 835 if stop is None and paths:
827 836 # We do not know the latest changed revision,
828 837 # keep the first one with changed paths.
829 838 break
830 839 if stop is not None and revnum <= stop:
831 840 break
832 841
833 842 for p in paths:
834 843 if not path.startswith(p) or not paths[p].copyfrom_path:
835 844 continue
836 845 newpath = paths[p].copyfrom_path + path[len(p) :]
837 846 self.ui.debug(
838 847 b"branch renamed from %s to %s at %d\n"
839 848 % (path, newpath, revnum)
840 849 )
841 850 path = newpath
842 851 break
843 852 if not paths:
844 853 revnum = None
845 854 return revnum, path
846 855 finally:
847 856 stream.close()
848 857
849 858 if not path.startswith(self.rootmodule):
850 859 # Requests on foreign branches may be forbidden at server level
851 860 self.ui.debug(b'ignoring foreign branch %r\n' % path)
852 861 return None
853 862
854 863 if stop is None:
855 864 stop = svn.ra.get_latest_revnum(self.ra)
856 865 try:
857 866 prevmodule = self.reparent(b'')
858 867 dirent = svn.ra.stat(self.ra, path.strip(b'/'), stop)
859 868 self.reparent(prevmodule)
860 869 except svn.core.SubversionException:
861 870 dirent = None
862 871 if not dirent:
863 872 raise SvnPathNotFound(
864 873 _(b'%s not found up to revision %d') % (path, stop)
865 874 )
866 875
867 876 # stat() gives us the previous revision on this line of
868 877 # development, but it might be in *another module*. Fetch the
869 878 # log and detect renames down to the latest revision.
870 879 revnum, realpath = findchanges(path, stop, dirent.created_rev)
871 880 if revnum is None:
872 881 # Tools like svnsync can create empty revision, when
873 882 # synchronizing only a subtree for instance. These empty
874 883 # revisions created_rev still have their original values
875 884 # despite all changes having disappeared and can be
876 885 # returned by ra.stat(), at least when stating the root
877 886 # module. In that case, do not trust created_rev and scan
878 887 # the whole history.
879 888 revnum, realpath = findchanges(path, stop)
880 889 if revnum is None:
881 890 self.ui.debug(b'ignoring empty branch %r\n' % realpath)
882 891 return None
883 892
884 893 if not realpath.startswith(self.rootmodule):
885 894 self.ui.debug(b'ignoring foreign branch %r\n' % realpath)
886 895 return None
887 896 return self.revid(revnum, realpath)
888 897
889 898 def reparent(self, module):
890 899 """Reparent the svn transport and return the previous parent."""
891 900 if self.prevmodule == module:
892 901 return module
893 902 svnurl = self.baseurl + quote(module)
894 903 prevmodule = self.prevmodule
895 904 if prevmodule is None:
896 905 prevmodule = b''
897 906 self.ui.debug(b"reparent to %s\n" % svnurl)
898 907 svn.ra.reparent(self.ra, svnurl)
899 908 self.prevmodule = module
900 909 return prevmodule
901 910
902 911 def expandpaths(self, rev, paths, parents):
903 912 changed, removed = set(), set()
904 913 copies = {}
905 914
906 915 new_module, revnum = revsplit(rev)[1:]
907 916 if new_module != self.module:
908 917 self.module = new_module
909 918 self.reparent(self.module)
910 919
911 920 progress = self.ui.makeprogress(
912 921 _(b'scanning paths'), unit=_(b'paths'), total=len(paths)
913 922 )
914 923 for i, (path, ent) in enumerate(paths):
915 924 progress.update(i, item=path)
916 925 entrypath = self.getrelpath(path)
917 926
918 927 kind = self._checkpath(entrypath, revnum)
919 928 if kind == svn.core.svn_node_file:
920 929 changed.add(self.recode(entrypath))
921 930 if not ent.copyfrom_path or not parents:
922 931 continue
923 932 # Copy sources not in parent revisions cannot be
924 933 # represented, ignore their origin for now
925 934 pmodule, prevnum = revsplit(parents[0])[1:]
926 935 if ent.copyfrom_rev < prevnum:
927 936 continue
928 937 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
929 938 if not copyfrom_path:
930 939 continue
931 940 self.ui.debug(
932 941 b"copied to %s from %s@%d\n"
933 942 % (entrypath, copyfrom_path, ent.copyfrom_rev)
934 943 )
935 944 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
936 945 elif kind == 0: # gone, but had better be a deleted *file*
937 946 self.ui.debug(b"gone from %d\n" % ent.copyfrom_rev)
938 947 pmodule, prevnum = revsplit(parents[0])[1:]
939 948 parentpath = pmodule + b"/" + entrypath
940 949 fromkind = self._checkpath(entrypath, prevnum, pmodule)
941 950
942 951 if fromkind == svn.core.svn_node_file:
943 952 removed.add(self.recode(entrypath))
944 953 elif fromkind == svn.core.svn_node_dir:
945 954 oroot = parentpath.strip(b'/')
946 955 nroot = path.strip(b'/')
947 956 children = self._iterfiles(oroot, prevnum)
948 957 for childpath in children:
949 958 childpath = childpath.replace(oroot, nroot)
950 959 childpath = self.getrelpath(b"/" + childpath, pmodule)
951 960 if childpath:
952 961 removed.add(self.recode(childpath))
953 962 else:
954 963 self.ui.debug(
955 964 b'unknown path in revision %d: %s\n' % (revnum, path)
956 965 )
957 966 elif kind == svn.core.svn_node_dir:
958 967 if ent.action == b'M':
959 968 # If the directory just had a prop change,
960 969 # then we shouldn't need to look for its children.
961 970 continue
962 971 if ent.action == b'R' and parents:
963 972 # If a directory is replacing a file, mark the previous
964 973 # file as deleted
965 974 pmodule, prevnum = revsplit(parents[0])[1:]
966 975 pkind = self._checkpath(entrypath, prevnum, pmodule)
967 976 if pkind == svn.core.svn_node_file:
968 977 removed.add(self.recode(entrypath))
969 978 elif pkind == svn.core.svn_node_dir:
970 979 # We do not know what files were kept or removed,
971 980 # mark them all as changed.
972 981 for childpath in self._iterfiles(pmodule, prevnum):
973 982 childpath = self.getrelpath(b"/" + childpath)
974 983 if childpath:
975 984 changed.add(self.recode(childpath))
976 985
977 986 for childpath in self._iterfiles(path, revnum):
978 987 childpath = self.getrelpath(b"/" + childpath)
979 988 if childpath:
980 989 changed.add(self.recode(childpath))
981 990
982 991 # Handle directory copies
983 992 if not ent.copyfrom_path or not parents:
984 993 continue
985 994 # Copy sources not in parent revisions cannot be
986 995 # represented, ignore their origin for now
987 996 pmodule, prevnum = revsplit(parents[0])[1:]
988 997 if ent.copyfrom_rev < prevnum:
989 998 continue
990 999 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
991 1000 if not copyfrompath:
992 1001 continue
993 1002 self.ui.debug(
994 1003 b"mark %s came from %s:%d\n"
995 1004 % (path, copyfrompath, ent.copyfrom_rev)
996 1005 )
997 1006 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
998 1007 for childpath in children:
999 1008 childpath = self.getrelpath(b"/" + childpath, pmodule)
1000 1009 if not childpath:
1001 1010 continue
1002 1011 copytopath = path + childpath[len(copyfrompath) :]
1003 1012 copytopath = self.getrelpath(copytopath)
1004 1013 copies[self.recode(copytopath)] = self.recode(childpath)
1005 1014
1006 1015 progress.complete()
1007 1016 changed.update(removed)
1008 1017 return (list(changed), removed, copies)
1009 1018
1010 1019 def _fetch_revisions(self, from_revnum, to_revnum):
1011 1020 if from_revnum < to_revnum:
1012 1021 from_revnum, to_revnum = to_revnum, from_revnum
1013 1022
1014 1023 self.child_cset = None
1015 1024
1016 1025 def parselogentry(orig_paths, revnum, author, date, message):
1017 1026 """Return the parsed commit object or None, and True if
1018 1027 the revision is a branch root.
1019 1028 """
1020 1029 self.ui.debug(
1021 1030 b"parsing revision %d (%d changes)\n"
1022 1031 % (revnum, len(orig_paths))
1023 1032 )
1024 1033
1025 1034 branched = False
1026 1035 rev = self.revid(revnum)
1027 1036 # branch log might return entries for a parent we already have
1028 1037
1029 1038 if rev in self.commits or revnum < to_revnum:
1030 1039 return None, branched
1031 1040
1032 1041 parents = []
1033 1042 # check whether this revision is the start of a branch or part
1034 1043 # of a branch renaming
1035 1044 orig_paths = sorted(pycompat.iteritems(orig_paths))
1036 1045 root_paths = [
1037 1046 (p, e) for p, e in orig_paths if self.module.startswith(p)
1038 1047 ]
1039 1048 if root_paths:
1040 1049 path, ent = root_paths[-1]
1041 1050 if ent.copyfrom_path:
1042 1051 branched = True
1043 1052 newpath = ent.copyfrom_path + self.module[len(path) :]
1044 1053 # ent.copyfrom_rev may not be the actual last revision
1045 1054 previd = self.latest(newpath, ent.copyfrom_rev)
1046 1055 if previd is not None:
1047 1056 prevmodule, prevnum = revsplit(previd)[1:]
1048 1057 if prevnum >= self.startrev:
1049 1058 parents = [previd]
1050 1059 self.ui.note(
1051 1060 _(b'found parent of branch %s at %d: %s\n')
1052 1061 % (self.module, prevnum, prevmodule)
1053 1062 )
1054 1063 else:
1055 1064 self.ui.debug(b"no copyfrom path, don't know what to do.\n")
1056 1065
1057 1066 paths = []
1058 1067 # filter out unrelated paths
1059 1068 for path, ent in orig_paths:
1060 1069 if self.getrelpath(path) is None:
1061 1070 continue
1062 1071 paths.append((path, ent))
1063 1072
1064 1073 # Example SVN datetime. Includes microseconds.
1065 1074 # ISO-8601 conformant
1066 1075 # '2007-01-04T17:35:00.902377Z'
1067 1076 date = dateutil.parsedate(
1068 1077 date[:19] + b" UTC", [b"%Y-%m-%dT%H:%M:%S"]
1069 1078 )
1070 1079 if self.ui.configbool(b'convert', b'localtimezone'):
1071 1080 date = makedatetimestamp(date[0])
1072 1081
1073 1082 if message:
1074 1083 log = self.recode(message)
1075 1084 else:
1076 1085 log = b''
1077 1086
1078 1087 if author:
1079 1088 author = self.recode(author)
1080 1089 else:
1081 1090 author = b''
1082 1091
1083 1092 try:
1084 1093 branch = self.module.split(b"/")[-1]
1085 1094 if branch == self.trunkname:
1086 1095 branch = None
1087 1096 except IndexError:
1088 1097 branch = None
1089 1098
1090 1099 cset = commit(
1091 1100 author=author,
1092 1101 date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
1093 1102 desc=log,
1094 1103 parents=parents,
1095 1104 branch=branch,
1096 1105 rev=rev,
1097 1106 )
1098 1107
1099 1108 self.commits[rev] = cset
1100 1109 # The parents list is *shared* among self.paths and the
1101 1110 # commit object. Both will be updated below.
1102 1111 self.paths[rev] = (paths, cset.parents)
1103 1112 if self.child_cset and not self.child_cset.parents:
1104 1113 self.child_cset.parents[:] = [rev]
1105 1114 self.child_cset = cset
1106 1115 return cset, branched
1107 1116
1108 1117 self.ui.note(
1109 1118 _(b'fetching revision log for "%s" from %d to %d\n')
1110 1119 % (self.module, from_revnum, to_revnum)
1111 1120 )
1112 1121
1113 1122 try:
1114 1123 firstcset = None
1115 1124 lastonbranch = False
1116 1125 stream = self._getlog([self.module], from_revnum, to_revnum)
1117 1126 try:
1118 1127 for entry in stream:
1119 1128 paths, revnum, author, date, message = entry
1120 1129 if revnum < self.startrev:
1121 1130 lastonbranch = True
1122 1131 break
1123 1132 if not paths:
1124 1133 self.ui.debug(b'revision %d has no entries\n' % revnum)
1125 1134 # If we ever leave the loop on an empty
1126 1135 # revision, do not try to get a parent branch
1127 1136 lastonbranch = lastonbranch or revnum == 0
1128 1137 continue
1129 1138 cset, lastonbranch = parselogentry(
1130 1139 paths, revnum, author, date, message
1131 1140 )
1132 1141 if cset:
1133 1142 firstcset = cset
1134 1143 if lastonbranch:
1135 1144 break
1136 1145 finally:
1137 1146 stream.close()
1138 1147
1139 1148 if not lastonbranch and firstcset and not firstcset.parents:
1140 1149 # The first revision of the sequence (the last fetched one)
1141 1150 # has invalid parents if not a branch root. Find the parent
1142 1151 # revision now, if any.
1143 1152 try:
1144 1153 firstrevnum = self.revnum(firstcset.rev)
1145 1154 if firstrevnum > 1:
1146 1155 latest = self.latest(self.module, firstrevnum - 1)
1147 1156 if latest:
1148 1157 firstcset.parents.append(latest)
1149 1158 except SvnPathNotFound:
1150 1159 pass
1151 1160 except svn.core.SubversionException as xxx_todo_changeme:
1152 1161 (inst, num) = xxx_todo_changeme.args
1153 1162 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
1154 1163 raise error.Abort(
1155 1164 _(b'svn: branch has no revision %s') % to_revnum
1156 1165 )
1157 1166 raise
1158 1167
1159 1168 def getfile(self, file, rev):
1160 1169 # TODO: ra.get_file transmits the whole file instead of diffs.
1161 1170 if file in self.removed:
1162 1171 return None, None
1163 1172 try:
1164 1173 new_module, revnum = revsplit(rev)[1:]
1165 1174 if self.module != new_module:
1166 1175 self.module = new_module
1167 1176 self.reparent(self.module)
1168 1177 io = stringio()
1169 1178 info = svn.ra.get_file(self.ra, file, revnum, io)
1170 1179 data = io.getvalue()
1171 1180 # ra.get_file() seems to keep a reference on the input buffer
1172 1181 # preventing collection. Release it explicitly.
1173 1182 io.close()
1174 1183 if isinstance(info, list):
1175 1184 info = info[-1]
1176 1185 mode = (b"svn:executable" in info) and b'x' or b''
1177 1186 mode = (b"svn:special" in info) and b'l' or mode
1178 1187 except svn.core.SubversionException as e:
1179 1188 notfound = (
1180 1189 svn.core.SVN_ERR_FS_NOT_FOUND,
1181 1190 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND,
1182 1191 )
1183 1192 if e.apr_err in notfound: # File not found
1184 1193 return None, None
1185 1194 raise
1186 1195 if mode == b'l':
1187 1196 link_prefix = b"link "
1188 1197 if data.startswith(link_prefix):
1189 1198 data = data[len(link_prefix) :]
1190 1199 return data, mode
1191 1200
1192 1201 def _iterfiles(self, path, revnum):
1193 1202 """Enumerate all files in path at revnum, recursively."""
1194 1203 path = path.strip(b'/')
1195 1204 pool = svn.core.Pool()
1196 1205 rpath = b'/'.join([self.baseurl, quote(path)]).strip(b'/')
1197 1206 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1198 1207 if path:
1199 1208 path += b'/'
1200 1209 return (
1201 1210 (path + p)
1202 1211 for p, e in pycompat.iteritems(entries)
1203 1212 if e.kind == svn.core.svn_node_file
1204 1213 )
1205 1214
1206 1215 def getrelpath(self, path, module=None):
1207 1216 if module is None:
1208 1217 module = self.module
1209 1218 # Given the repository url of this wc, say
1210 1219 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1211 1220 # extract the "entry" portion (a relative path) from what
1212 1221 # svn log --xml says, i.e.
1213 1222 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1214 1223 # that is to say "tests/PloneTestCase.py"
1215 1224 if path.startswith(module):
1216 1225 relative = path.rstrip(b'/')[len(module) :]
1217 1226 if relative.startswith(b'/'):
1218 1227 return relative[1:]
1219 1228 elif relative == b'':
1220 1229 return relative
1221 1230
1222 1231 # The path is outside our tracked tree...
1223 1232 self.ui.debug(
1224 1233 b'%r is not under %r, ignoring\n'
1225 1234 % (pycompat.bytestr(path), pycompat.bytestr(module))
1226 1235 )
1227 1236 return None
1228 1237
1229 1238 def _checkpath(self, path, revnum, module=None):
1230 1239 if module is not None:
1231 1240 prevmodule = self.reparent(b'')
1232 1241 path = module + b'/' + path
1233 1242 try:
1234 1243 # ra.check_path does not like leading slashes very much, it leads
1235 1244 # to PROPFIND subversion errors
1236 1245 return svn.ra.check_path(self.ra, path.strip(b'/'), revnum)
1237 1246 finally:
1238 1247 if module is not None:
1239 1248 self.reparent(prevmodule)
1240 1249
1241 1250 def _getlog(
1242 1251 self,
1243 1252 paths,
1244 1253 start,
1245 1254 end,
1246 1255 limit=0,
1247 1256 discover_changed_paths=True,
1248 1257 strict_node_history=False,
1249 1258 ):
1250 1259 # Normalize path names, svn >= 1.5 only wants paths relative to
1251 1260 # supplied URL
1252 1261 relpaths = []
1253 1262 for p in paths:
1254 1263 if not p.startswith(b'/'):
1255 1264 p = self.module + b'/' + p
1256 1265 relpaths.append(p.strip(b'/'))
1257 1266 args = [
1258 1267 self.baseurl,
1259 1268 relpaths,
1260 1269 start,
1261 1270 end,
1262 1271 limit,
1263 1272 discover_changed_paths,
1264 1273 strict_node_history,
1265 1274 ]
1266 1275 # developer config: convert.svn.debugsvnlog
1267 1276 if not self.ui.configbool(b'convert', b'svn.debugsvnlog'):
1268 1277 return directlogstream(*args)
1269 1278 arg = encodeargs(args)
1270 1279 hgexe = procutil.hgexecutable()
1271 1280 cmd = b'%s debugsvnlog' % procutil.shellquote(hgexe)
1272 1281 stdin, stdout = procutil.popen2(procutil.quotecommand(cmd))
1273 1282 stdin.write(arg)
1274 1283 try:
1275 1284 stdin.close()
1276 1285 except IOError:
1277 1286 raise error.Abort(
1278 1287 _(
1279 1288 b'Mercurial failed to run itself, check'
1280 1289 b' hg executable is in PATH'
1281 1290 )
1282 1291 )
1283 1292 return logstream(stdout)
1284 1293
1285 1294
1286 1295 pre_revprop_change = b'''#!/bin/sh
1287 1296
1288 1297 REPOS="$1"
1289 1298 REV="$2"
1290 1299 USER="$3"
1291 1300 PROPNAME="$4"
1292 1301 ACTION="$5"
1293 1302
1294 1303 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1295 1304 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1296 1305 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1297 1306
1298 1307 echo "Changing prohibited revision property" >&2
1299 1308 exit 1
1300 1309 '''
1301 1310
1302 1311
1303 1312 class svn_sink(converter_sink, commandline):
1304 1313 commit_re = re.compile(br'Committed revision (\d+).', re.M)
1305 1314 uuid_re = re.compile(br'Repository UUID:\s*(\S+)', re.M)
1306 1315
1307 1316 def prerun(self):
1308 1317 if self.wc:
1309 1318 os.chdir(self.wc)
1310 1319
1311 1320 def postrun(self):
1312 1321 if self.wc:
1313 1322 os.chdir(self.cwd)
1314 1323
1315 1324 def join(self, name):
1316 1325 return os.path.join(self.wc, b'.svn', name)
1317 1326
1318 1327 def revmapfile(self):
1319 1328 return self.join(b'hg-shamap')
1320 1329
1321 1330 def authorfile(self):
1322 1331 return self.join(b'hg-authormap')
1323 1332
1324 1333 def __init__(self, ui, repotype, path):
1325 1334
1326 1335 converter_sink.__init__(self, ui, repotype, path)
1327 1336 commandline.__init__(self, ui, b'svn')
1328 1337 self.delete = []
1329 1338 self.setexec = []
1330 1339 self.delexec = []
1331 1340 self.copies = []
1332 1341 self.wc = None
1333 1342 self.cwd = encoding.getcwd()
1334 1343
1335 1344 created = False
1336 1345 if os.path.isfile(os.path.join(path, b'.svn', b'entries')):
1337 1346 self.wc = os.path.realpath(path)
1338 1347 self.run0(b'update')
1339 1348 else:
1340 1349 if not re.search(br'^(file|http|https|svn|svn\+ssh)://', path):
1341 1350 path = os.path.realpath(path)
1342 1351 if os.path.isdir(os.path.dirname(path)):
1343 1352 if not os.path.exists(
1344 1353 os.path.join(path, b'db', b'fs-type')
1345 1354 ):
1346 1355 ui.status(
1347 1356 _(b"initializing svn repository '%s'\n")
1348 1357 % os.path.basename(path)
1349 1358 )
1350 1359 commandline(ui, b'svnadmin').run0(b'create', path)
1351 1360 created = path
1352 1361 path = util.normpath(path)
1353 1362 if not path.startswith(b'/'):
1354 1363 path = b'/' + path
1355 1364 path = b'file://' + path
1356 1365
1357 1366 wcpath = os.path.join(
1358 1367 encoding.getcwd(), os.path.basename(path) + b'-wc'
1359 1368 )
1360 1369 ui.status(
1361 1370 _(b"initializing svn working copy '%s'\n")
1362 1371 % os.path.basename(wcpath)
1363 1372 )
1364 1373 self.run0(b'checkout', path, wcpath)
1365 1374
1366 1375 self.wc = wcpath
1367 1376 self.opener = vfsmod.vfs(self.wc)
1368 1377 self.wopener = vfsmod.vfs(self.wc)
1369 1378 self.childmap = mapfile(ui, self.join(b'hg-childmap'))
1370 1379 if util.checkexec(self.wc):
1371 1380 self.is_exec = util.isexec
1372 1381 else:
1373 1382 self.is_exec = None
1374 1383
1375 1384 if created:
1376 1385 hook = os.path.join(created, b'hooks', b'pre-revprop-change')
1377 1386 fp = open(hook, b'wb')
1378 1387 fp.write(pre_revprop_change)
1379 1388 fp.close()
1380 1389 util.setflags(hook, False, True)
1381 1390
1382 1391 output = self.run0(b'info')
1383 1392 self.uuid = self.uuid_re.search(output).group(1).strip()
1384 1393
1385 1394 def wjoin(self, *names):
1386 1395 return os.path.join(self.wc, *names)
1387 1396
1388 1397 @propertycache
1389 1398 def manifest(self):
1390 1399 # As of svn 1.7, the "add" command fails when receiving
1391 1400 # already tracked entries, so we have to track and filter them
1392 1401 # ourselves.
1393 1402 m = set()
1394 1403 output = self.run0(b'ls', recursive=True, xml=True)
1395 1404 doc = xml.dom.minidom.parseString(output)
1396 1405 for e in doc.getElementsByTagName('entry'):
1397 1406 for n in e.childNodes:
1398 1407 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1399 1408 continue
1400 1409 name = ''.join(
1401 1410 c.data for c in n.childNodes if c.nodeType == c.TEXT_NODE
1402 1411 )
1403 1412 # Entries are compared with names coming from
1404 1413 # mercurial, so bytes with undefined encoding. Our
1405 1414 # best bet is to assume they are in local
1406 1415 # encoding. They will be passed to command line calls
1407 1416 # later anyway, so they better be.
1408 1417 m.add(encoding.unitolocal(name))
1409 1418 break
1410 1419 return m
1411 1420
1412 1421 def putfile(self, filename, flags, data):
1413 1422 if b'l' in flags:
1414 1423 self.wopener.symlink(data, filename)
1415 1424 else:
1416 1425 try:
1417 1426 if os.path.islink(self.wjoin(filename)):
1418 1427 os.unlink(filename)
1419 1428 except OSError:
1420 1429 pass
1421 1430
1422 1431 if self.is_exec:
1423 1432 # We need to check executability of the file before the change,
1424 1433 # because `vfs.write` is able to reset exec bit.
1425 1434 wasexec = False
1426 1435 if os.path.exists(self.wjoin(filename)):
1427 1436 wasexec = self.is_exec(self.wjoin(filename))
1428 1437
1429 1438 self.wopener.write(filename, data)
1430 1439
1431 1440 if self.is_exec:
1432 1441 if wasexec:
1433 1442 if b'x' not in flags:
1434 1443 self.delexec.append(filename)
1435 1444 else:
1436 1445 if b'x' in flags:
1437 1446 self.setexec.append(filename)
1438 1447 util.setflags(self.wjoin(filename), False, b'x' in flags)
1439 1448
1440 1449 def _copyfile(self, source, dest):
1441 1450 # SVN's copy command pukes if the destination file exists, but
1442 1451 # our copyfile method expects to record a copy that has
1443 1452 # already occurred. Cross the semantic gap.
1444 1453 wdest = self.wjoin(dest)
1445 1454 exists = os.path.lexists(wdest)
1446 1455 if exists:
1447 1456 fd, tempname = pycompat.mkstemp(
1448 1457 prefix=b'hg-copy-', dir=os.path.dirname(wdest)
1449 1458 )
1450 1459 os.close(fd)
1451 1460 os.unlink(tempname)
1452 1461 os.rename(wdest, tempname)
1453 1462 try:
1454 1463 self.run0(b'copy', source, dest)
1455 1464 finally:
1456 1465 self.manifest.add(dest)
1457 1466 if exists:
1458 1467 try:
1459 1468 os.unlink(wdest)
1460 1469 except OSError:
1461 1470 pass
1462 1471 os.rename(tempname, wdest)
1463 1472
1464 1473 def dirs_of(self, files):
1465 1474 dirs = set()
1466 1475 for f in files:
1467 1476 if os.path.isdir(self.wjoin(f)):
1468 1477 dirs.add(f)
1469 1478 i = len(f)
1470 1479 for i in iter(lambda: f.rfind(b'/', 0, i), -1):
1471 1480 dirs.add(f[:i])
1472 1481 return dirs
1473 1482
1474 1483 def add_dirs(self, files):
1475 1484 add_dirs = [
1476 1485 d for d in sorted(self.dirs_of(files)) if d not in self.manifest
1477 1486 ]
1478 1487 if add_dirs:
1479 1488 self.manifest.update(add_dirs)
1480 1489 self.xargs(add_dirs, b'add', non_recursive=True, quiet=True)
1481 1490 return add_dirs
1482 1491
1483 1492 def add_files(self, files):
1484 1493 files = [f for f in files if f not in self.manifest]
1485 1494 if files:
1486 1495 self.manifest.update(files)
1487 1496 self.xargs(files, b'add', quiet=True)
1488 1497 return files
1489 1498
1490 1499 def addchild(self, parent, child):
1491 1500 self.childmap[parent] = child
1492 1501
1493 1502 def revid(self, rev):
1494 1503 return b"svn:%s@%s" % (self.uuid, rev)
1495 1504
1496 1505 def putcommit(
1497 1506 self, files, copies, parents, commit, source, revmap, full, cleanp2
1498 1507 ):
1499 1508 for parent in parents:
1500 1509 try:
1501 1510 return self.revid(self.childmap[parent])
1502 1511 except KeyError:
1503 1512 pass
1504 1513
1505 1514 # Apply changes to working copy
1506 1515 for f, v in files:
1507 1516 data, mode = source.getfile(f, v)
1508 1517 if data is None:
1509 1518 self.delete.append(f)
1510 1519 else:
1511 1520 self.putfile(f, mode, data)
1512 1521 if f in copies:
1513 1522 self.copies.append([copies[f], f])
1514 1523 if full:
1515 1524 self.delete.extend(sorted(self.manifest.difference(files)))
1516 1525 files = [f[0] for f in files]
1517 1526
1518 1527 entries = set(self.delete)
1519 1528 files = frozenset(files)
1520 1529 entries.update(self.add_dirs(files.difference(entries)))
1521 1530 if self.copies:
1522 1531 for s, d in self.copies:
1523 1532 self._copyfile(s, d)
1524 1533 self.copies = []
1525 1534 if self.delete:
1526 1535 self.xargs(self.delete, b'delete')
1527 1536 for f in self.delete:
1528 1537 self.manifest.remove(f)
1529 1538 self.delete = []
1530 1539 entries.update(self.add_files(files.difference(entries)))
1531 1540 if self.delexec:
1532 1541 self.xargs(self.delexec, b'propdel', b'svn:executable')
1533 1542 self.delexec = []
1534 1543 if self.setexec:
1535 1544 self.xargs(self.setexec, b'propset', b'svn:executable', b'*')
1536 1545 self.setexec = []
1537 1546
1538 1547 fd, messagefile = pycompat.mkstemp(prefix=b'hg-convert-')
1539 1548 fp = os.fdopen(fd, 'wb')
1540 1549 fp.write(util.tonativeeol(commit.desc))
1541 1550 fp.close()
1542 1551 try:
1543 1552 output = self.run0(
1544 1553 b'commit',
1545 1554 username=stringutil.shortuser(commit.author),
1546 1555 file=messagefile,
1547 1556 encoding=b'utf-8',
1548 1557 )
1549 1558 try:
1550 1559 rev = self.commit_re.search(output).group(1)
1551 1560 except AttributeError:
1552 1561 if not files:
1553 1562 return parents[0] if parents else b'None'
1554 1563 self.ui.warn(_(b'unexpected svn output:\n'))
1555 1564 self.ui.warn(output)
1556 1565 raise error.Abort(_(b'unable to cope with svn output'))
1557 1566 if commit.rev:
1558 1567 self.run(
1559 1568 b'propset',
1560 1569 b'hg:convert-rev',
1561 1570 commit.rev,
1562 1571 revprop=True,
1563 1572 revision=rev,
1564 1573 )
1565 1574 if commit.branch and commit.branch != b'default':
1566 1575 self.run(
1567 1576 b'propset',
1568 1577 b'hg:convert-branch',
1569 1578 commit.branch,
1570 1579 revprop=True,
1571 1580 revision=rev,
1572 1581 )
1573 1582 for parent in parents:
1574 1583 self.addchild(parent, rev)
1575 1584 return self.revid(rev)
1576 1585 finally:
1577 1586 os.unlink(messagefile)
1578 1587
1579 1588 def puttags(self, tags):
1580 1589 self.ui.warn(_(b'writing Subversion tags is not yet implemented\n'))
1581 1590 return None, None
1582 1591
1583 1592 def hascommitfrommap(self, rev):
1584 1593 # We trust that revisions referenced in a map still is present
1585 1594 # TODO: implement something better if necessary and feasible
1586 1595 return True
1587 1596
1588 1597 def hascommitforsplicemap(self, rev):
1589 1598 # This is not correct as one can convert to an existing subversion
1590 1599 # repository and childmap would not list all revisions. Too bad.
1591 1600 if rev in self.childmap:
1592 1601 return True
1593 1602 raise error.Abort(
1594 1603 _(
1595 1604 b'splice map revision %s not found in subversion '
1596 1605 b'child map (revision lookups are not implemented)'
1597 1606 )
1598 1607 % rev
1599 1608 )
@@ -1,174 +1,184 b''
1 1 #require svn svn-bindings
2 2
3 3 $ cat >> $HGRCPATH <<EOF
4 4 > [extensions]
5 5 > convert =
6 6 > EOF
7 7
8 8 $ svnadmin create svn-repo
9 9 $ svnadmin load -q svn-repo < "$TESTDIR/svn/encoding.svndump"
10 10
11 11 Convert while testing all possible outputs
12 12
13 13 $ hg --debug convert svn-repo A-hg --config progress.debug=1
14 14 initializing destination A-hg repository
15 15 reparent to file:/*/$TESTTMP/svn-repo (glob)
16 16 run hg sink pre-conversion action
17 17 scanning source...
18 18 found trunk at 'trunk'
19 19 found tags at 'tags'
20 20 found branches at 'branches'
21 21 found branch branch\xc3\xa9 at 5 (esc)
22 22 found branch branch\xc3\xa9e at 6 (esc)
23 23 scanning: 1/4 revisions (25.00%)
24 24 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
25 25 fetching revision log for "/trunk" from 4 to 0
26 26 parsing revision 4 (2 changes)
27 27 parsing revision 3 (4 changes)
28 28 parsing revision 2 (3 changes)
29 29 parsing revision 1 (3 changes)
30 30 no copyfrom path, don't know what to do.
31 31 '/branches' is not under '/trunk', ignoring
32 32 '/tags' is not under '/trunk', ignoring
33 33 scanning: 2/4 revisions (50.00%)
34 34 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
35 35 fetching revision log for "/branches/branch\xc3\xa9" from 5 to 0 (esc)
36 36 parsing revision 5 (1 changes)
37 37 reparent to file:/*/$TESTTMP/svn-repo (glob)
38 38 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
39 39 found parent of branch /branches/branch\xc3\xa9 at 4: /trunk (esc)
40 40 scanning: 3/4 revisions (75.00%)
41 41 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
42 42 fetching revision log for "/branches/branch\xc3\xa9e" from 6 to 0 (esc)
43 43 parsing revision 6 (1 changes)
44 44 reparent to file:/*/$TESTTMP/svn-repo (glob)
45 45 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
46 46 found parent of branch /branches/branch\xc3\xa9e at 5: /branches/branch\xc3\xa9 (esc)
47 47 scanning: 4/4 revisions (100.00%)
48 48 scanning: 5/4 revisions (125.00%)
49 49 scanning: 6/4 revisions (150.00%)
50 50 sorting...
51 51 converting...
52 52 5 init projA
53 53 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@1
54 54 converting: 0/6 revisions (0.00%)
55 55 reusing manifest from p1 (no file change)
56 56 committing changelog
57 57 updating the branch cache
58 58 4 hello
59 59 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@2
60 60 converting: 1/6 revisions (16.67%)
61 61 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
62 62 scanning paths: /trunk/\xc3\xa0 0/3 paths (0.00%) (esc)
63 63 scanning paths: /trunk/\xc3\xa0/e\xcc\x81 1/3 paths (33.33%) (esc)
64 64 scanning paths: /trunk/\xc3\xa9 2/3 paths (66.67%) (esc)
65 65 committing files:
66 66 \xc3\xa0/e\xcc\x81 (esc)
67 67 getting files: \xc3\xa0/e\xcc\x81 1/2 files (50.00%) (esc)
68 68 \xc3\xa9 (esc)
69 69 getting files: \xc3\xa9 2/2 files (100.00%) (esc)
70 70 committing manifest
71 71 committing changelog
72 72 updating the branch cache
73 73 3 copy files
74 74 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@3
75 75 converting: 2/6 revisions (33.33%)
76 76 scanning paths: /trunk/\xc3\xa0 0/4 paths (0.00%) (esc)
77 77 gone from -1
78 78 reparent to file:/*/$TESTTMP/svn-repo (glob)
79 79 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
80 80 scanning paths: /trunk/\xc3\xa8 1/4 paths (25.00%) (esc)
81 81 copied to \xc3\xa8 from \xc3\xa9@2 (esc)
82 82 scanning paths: /trunk/\xc3\xa9 2/4 paths (50.00%) (esc)
83 83 gone from -1
84 84 reparent to file:/*/$TESTTMP/svn-repo (glob)
85 85 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
86 86 scanning paths: /trunk/\xc3\xb9 3/4 paths (75.00%) (esc)
87 87 mark /trunk/\xc3\xb9 came from \xc3\xa0:2 (esc)
88 88 getting files: \xc3\xa0/e\xcc\x81 1/4 files (25.00%) (esc)
89 89 getting files: \xc3\xa9 2/4 files (50.00%) (esc)
90 90 committing files:
91 91 \xc3\xa8 (esc)
92 92 getting files: \xc3\xa8 3/4 files (75.00%) (esc)
93 93 \xc3\xa8: copy \xc3\xa9:6b67ccefd5ce6de77e7ead4f5292843a0255329f (esc)
94 94 \xc3\xb9/e\xcc\x81 (esc)
95 95 getting files: \xc3\xb9/e\xcc\x81 4/4 files (100.00%) (esc)
96 96 \xc3\xb9/e\xcc\x81: copy \xc3\xa0/e\xcc\x81:a9092a3d84a37b9993b5c73576f6de29b7ea50f6 (esc)
97 97 committing manifest
98 98 committing changelog
99 99 updating the branch cache
100 100 2 remove files
101 101 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@4
102 102 converting: 3/6 revisions (50.00%)
103 103 scanning paths: /trunk/\xc3\xa8 0/2 paths (0.00%) (esc)
104 104 gone from -1
105 105 reparent to file:/*/$TESTTMP/svn-repo (glob)
106 106 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
107 107 scanning paths: /trunk/\xc3\xb9 1/2 paths (50.00%) (esc)
108 108 gone from -1
109 109 reparent to file:/*/$TESTTMP/svn-repo (glob)
110 110 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
111 111 getting files: \xc3\xa8 1/2 files (50.00%) (esc)
112 112 getting files: \xc3\xb9/e\xcc\x81 2/2 files (100.00%) (esc)
113 113 committing files:
114 114 committing manifest
115 115 committing changelog
116 116 updating the branch cache
117 117 1 branch to branch?
118 118 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/branches/branch?@5
119 119 converting: 4/6 revisions (66.67%)
120 120 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
121 121 scanning paths: /branches/branch\xc3\xa9 0/1 paths (0.00%) (esc)
122 122 reusing manifest from p1 (no file change)
123 123 committing changelog
124 124 updating the branch cache
125 125 0 branch to branch?e
126 126 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/branches/branch?e@6
127 127 converting: 5/6 revisions (83.33%)
128 128 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
129 129 scanning paths: /branches/branch\xc3\xa9e 0/1 paths (0.00%) (esc)
130 130 reusing manifest from p1 (no file change)
131 131 committing changelog
132 132 updating the branch cache
133 133 reparent to file:/*/$TESTTMP/svn-repo (glob)
134 134 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
135 135 reparent to file:/*/$TESTTMP/svn-repo (glob)
136 136 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
137 137 updating tags
138 138 committing files:
139 139 .hgtags
140 140 committing manifest
141 141 committing changelog
142 142 updating the branch cache
143 143 run hg sink post-conversion action
144 144 $ cd A-hg
145 145 $ hg up
146 146 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
147 147
148 148 Check tags are in UTF-8
149 149
150 150 $ cat .hgtags
151 151 e94e4422020e715add80525e8f0f46c9968689f1 branch\xc3\xa9e (esc)
152 152 f7e66f98380ed1e53a797c5c7a7a2616a7ab377d branch\xc3\xa9 (esc)
153 153
154 154 $ cd ..
155 155
156 Subversion sources don't support non-ASCII characters in HTTP(S) URLs.
157
158 $ XFF=$($PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")')
159 $ hg convert --source-type=svn http://localhost:$HGPORT/$XFF test
160 initializing destination test repository
161 Subversion sources don't support non-ASCII characters in HTTP(S) URLs. Please percent-encode them.
162 http://localhost:$HGPORT/\xff does not look like a Subversion repository (esc)
163 abort: http://localhost:$HGPORT/\xff: missing or unsupported repository (esc)
164 [255]
165
156 166 #if py3
157 167 For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
158 168 bytes in a filename.
159 169
160 170 $ hg convert file:///%ff test
161 171 initializing destination test repository
162 172 on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
163 173 file:///%ff does not look like a CVS checkout
164 174 $TESTTMP/file:/%ff does not look like a Git repository
165 175 file:///%ff does not look like a Subversion repository
166 176 file:///%ff is not a local Mercurial repository
167 177 file:///%ff does not look like a darcs repository
168 178 file:///%ff does not look like a monotone repository
169 179 file:///%ff does not look like a GNU Arch repository
170 180 file:///%ff does not look like a Bazaar repository
171 181 file:///%ff does not look like a P4 repository
172 182 abort: file:///%ff: missing or unsupported repository
173 183 [255]
174 184 #endif
General Comments 0
You need to be logged in to leave comments. Login now