##// END OF EJS Templates
py3: pass native string to urlreq.url2pathname()...
Manuel Jacob -
r45495:75b59d22 stable
parent child Browse files
Show More
@@ -1,1565 +1,1584 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 from __future__ import absolute_import
5 5
6 6 import os
7 7 import re
8 8 import xml.dom.minidom
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial.pycompat import open
12 12 from mercurial import (
13 13 encoding,
14 14 error,
15 15 pycompat,
16 16 util,
17 17 vfs as vfsmod,
18 18 )
19 19 from mercurial.utils import (
20 20 dateutil,
21 21 procutil,
22 22 stringutil,
23 23 )
24 24
25 25 from . import common
26 26
27 27 pickle = util.pickle
28 28 stringio = util.stringio
29 29 propertycache = util.propertycache
30 30 urlerr = util.urlerr
31 31 urlreq = util.urlreq
32 32
33 33 commandline = common.commandline
34 34 commit = common.commit
35 35 converter_sink = common.converter_sink
36 36 converter_source = common.converter_source
37 37 decodeargs = common.decodeargs
38 38 encodeargs = common.encodeargs
39 39 makedatetimestamp = common.makedatetimestamp
40 40 mapfile = common.mapfile
41 41 MissingTool = common.MissingTool
42 42 NoRepo = common.NoRepo
43 43
44 44 # Subversion stuff. Works best with very recent Python SVN bindings
45 45 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
46 46 # these bindings.
47 47
48 48 try:
49 49 import svn
50 50 import svn.client
51 51 import svn.core
52 52 import svn.ra
53 53 import svn.delta
54 54 from . import transport
55 55 import warnings
56 56
57 57 warnings.filterwarnings(
58 58 'ignore', module='svn.core', category=DeprecationWarning
59 59 )
60 60 svn.core.SubversionException # trigger import to catch error
61 61
62 62 except ImportError:
63 63 svn = None
64 64
65 65
66 66 class SvnPathNotFound(Exception):
67 67 pass
68 68
69 69
70 70 def revsplit(rev):
71 71 """Parse a revision string and return (uuid, path, revnum).
72 72 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
73 73 ... b'/proj%20B/mytrunk/mytrunk@1')
74 74 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
75 75 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
76 76 ('', '', 1)
77 77 >>> revsplit(b'@7')
78 78 ('', '', 7)
79 79 >>> revsplit(b'7')
80 80 ('', '', 0)
81 81 >>> revsplit(b'bad')
82 82 ('', '', 0)
83 83 """
84 84 parts = rev.rsplit(b'@', 1)
85 85 revnum = 0
86 86 if len(parts) > 1:
87 87 revnum = int(parts[1])
88 88 parts = parts[0].split(b'/', 1)
89 89 uuid = b''
90 90 mod = b''
91 91 if len(parts) > 1 and parts[0].startswith(b'svn:'):
92 92 uuid = parts[0][4:]
93 93 mod = b'/' + parts[1]
94 94 return uuid, mod, revnum
95 95
96 96
97 97 def quote(s):
98 98 # As of svn 1.7, many svn calls expect "canonical" paths. In
99 99 # theory, we should call svn.core.*canonicalize() on all paths
100 100 # before passing them to the API. Instead, we assume the base url
101 101 # is canonical and copy the behaviour of svn URL encoding function
102 102 # so we can extend it safely with new components. The "safe"
103 103 # characters were taken from the "svn_uri__char_validity" table in
104 104 # libsvn_subr/path.c.
105 105 return urlreq.quote(s, b"!$&'()*+,-./:=@_~")
106 106
107 107
108 108 def geturl(path):
109 109 try:
110 110 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
111 111 except svn.core.SubversionException:
112 112 # svn.client.url_from_path() fails with local repositories
113 113 pass
114 114 if os.path.isdir(path):
115 115 path = os.path.normpath(os.path.abspath(path))
116 116 if pycompat.iswindows:
117 117 path = b'/' + util.normpath(path)
118 118 # Module URL is later compared with the repository URL returned
119 119 # by svn API, which is UTF-8.
120 120 path = encoding.tolocal(path)
121 121 path = b'file://%s' % quote(path)
122 122 return svn.core.svn_path_canonicalize(path)
123 123
124 124
125 125 def optrev(number):
126 126 optrev = svn.core.svn_opt_revision_t()
127 127 optrev.kind = svn.core.svn_opt_revision_number
128 128 optrev.value.number = number
129 129 return optrev
130 130
131 131
132 132 class changedpath(object):
133 133 def __init__(self, p):
134 134 self.copyfrom_path = p.copyfrom_path
135 135 self.copyfrom_rev = p.copyfrom_rev
136 136 self.action = p.action
137 137
138 138
139 139 def get_log_child(
140 140 fp,
141 141 url,
142 142 paths,
143 143 start,
144 144 end,
145 145 limit=0,
146 146 discover_changed_paths=True,
147 147 strict_node_history=False,
148 148 ):
149 149 protocol = -1
150 150
151 151 def receiver(orig_paths, revnum, author, date, message, pool):
152 152 paths = {}
153 153 if orig_paths is not None:
154 154 for k, v in pycompat.iteritems(orig_paths):
155 155 paths[k] = changedpath(v)
156 156 pickle.dump((paths, revnum, author, date, message), fp, protocol)
157 157
158 158 try:
159 159 # Use an ra of our own so that our parent can consume
160 160 # our results without confusing the server.
161 161 t = transport.SvnRaTransport(url=url)
162 162 svn.ra.get_log(
163 163 t.ra,
164 164 paths,
165 165 start,
166 166 end,
167 167 limit,
168 168 discover_changed_paths,
169 169 strict_node_history,
170 170 receiver,
171 171 )
172 172 except IOError:
173 173 # Caller may interrupt the iteration
174 174 pickle.dump(None, fp, protocol)
175 175 except Exception as inst:
176 176 pickle.dump(stringutil.forcebytestr(inst), fp, protocol)
177 177 else:
178 178 pickle.dump(None, fp, protocol)
179 179 fp.flush()
180 180 # With large history, cleanup process goes crazy and suddenly
181 181 # consumes *huge* amount of memory. The output file being closed,
182 182 # there is no need for clean termination.
183 183 os._exit(0)
184 184
185 185
186 186 def debugsvnlog(ui, **opts):
187 187 """Fetch SVN log in a subprocess and channel them back to parent to
188 188 avoid memory collection issues.
189 189 """
190 190 if svn is None:
191 191 raise error.Abort(
192 192 _(b'debugsvnlog could not load Subversion python bindings')
193 193 )
194 194
195 195 args = decodeargs(ui.fin.read())
196 196 get_log_child(ui.fout, *args)
197 197
198 198
199 199 class logstream(object):
200 200 """Interruptible revision log iterator."""
201 201
202 202 def __init__(self, stdout):
203 203 self._stdout = stdout
204 204
205 205 def __iter__(self):
206 206 while True:
207 207 try:
208 208 entry = pickle.load(self._stdout)
209 209 except EOFError:
210 210 raise error.Abort(
211 211 _(
212 212 b'Mercurial failed to run itself, check'
213 213 b' hg executable is in PATH'
214 214 )
215 215 )
216 216 try:
217 217 orig_paths, revnum, author, date, message = entry
218 218 except (TypeError, ValueError):
219 219 if entry is None:
220 220 break
221 221 raise error.Abort(_(b"log stream exception '%s'") % entry)
222 222 yield entry
223 223
224 224 def close(self):
225 225 if self._stdout:
226 226 self._stdout.close()
227 227 self._stdout = None
228 228
229 229
230 230 class directlogstream(list):
231 231 """Direct revision log iterator.
232 232 This can be used for debugging and development but it will probably leak
233 233 memory and is not suitable for real conversions."""
234 234
235 235 def __init__(
236 236 self,
237 237 url,
238 238 paths,
239 239 start,
240 240 end,
241 241 limit=0,
242 242 discover_changed_paths=True,
243 243 strict_node_history=False,
244 244 ):
245 245 def receiver(orig_paths, revnum, author, date, message, pool):
246 246 paths = {}
247 247 if orig_paths is not None:
248 248 for k, v in pycompat.iteritems(orig_paths):
249 249 paths[k] = changedpath(v)
250 250 self.append((paths, revnum, author, date, message))
251 251
252 252 # Use an ra of our own so that our parent can consume
253 253 # our results without confusing the server.
254 254 t = transport.SvnRaTransport(url=url)
255 255 svn.ra.get_log(
256 256 t.ra,
257 257 paths,
258 258 start,
259 259 end,
260 260 limit,
261 261 discover_changed_paths,
262 262 strict_node_history,
263 263 receiver,
264 264 )
265 265
266 266 def close(self):
267 267 pass
268 268
269 269
270 270 # Check to see if the given path is a local Subversion repo. Verify this by
271 271 # looking for several svn-specific files and directories in the given
272 272 # directory.
273 273 def filecheck(ui, path, proto):
274 274 for x in (b'locks', b'hooks', b'format', b'db'):
275 275 if not os.path.exists(os.path.join(path, x)):
276 276 return False
277 277 return True
278 278
279 279
280 280 # Check to see if a given path is the root of an svn repo over http. We verify
281 281 # this by requesting a version-controlled URL we know can't exist and looking
282 282 # for the svn-specific "not found" XML.
283 283 def httpcheck(ui, path, proto):
284 284 try:
285 285 opener = urlreq.buildopener()
286 286 rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
287 287 data = rsp.read()
288 288 except urlerr.httperror as inst:
289 289 if inst.code != 404:
290 290 # Except for 404 we cannot know for sure this is not an svn repo
291 291 ui.warn(
292 292 _(
293 293 b'svn: cannot probe remote repository, assume it could '
294 294 b'be a subversion repository. Use --source-type if you '
295 295 b'know better.\n'
296 296 )
297 297 )
298 298 return True
299 299 data = inst.fp.read()
300 300 except Exception:
301 301 # Could be urlerr.urlerror if the URL is invalid or anything else.
302 302 return False
303 303 return b'<m:human-readable errcode="160013">' in data
304 304
305 305
306 306 protomap = {
307 307 b'http': httpcheck,
308 308 b'https': httpcheck,
309 309 b'file': filecheck,
310 310 }
311 311
312 312
313 313 def issvnurl(ui, url):
314 314 try:
315 315 proto, path = url.split(b'://', 1)
316 316 if proto == b'file':
317 317 if (
318 318 pycompat.iswindows
319 319 and path[:1] == b'/'
320 320 and path[1:2].isalpha()
321 321 and path[2:6].lower() == b'%3a/'
322 322 ):
323 323 path = path[:2] + b':/' + path[6:]
324 path = urlreq.url2pathname(path)
324 # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
325 # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
326 # py3 will decode percent-encoded bytes using the utf-8 encoding
327 # and the "replace" error handler. This means that it will not
328 # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
329 # url.open() uses the reverse function (urlreq.pathname2url()) and
330 # has a similar problem
331 # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
332 # sense to solve both problems together and handle all file URLs
333 # consistently. For now, we warn.
334 unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
335 if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
336 ui.warn(
337 _(
338 b'on Python 3, we currently do not support non-UTF-8 '
339 b'percent-encoded bytes in file URLs for Subversion '
340 b'repositories\n'
341 )
342 )
343 path = pycompat.fsencode(unicodepath)
325 344 except ValueError:
326 345 proto = b'file'
327 346 path = os.path.abspath(url)
328 347 if proto == b'file':
329 348 path = util.pconvert(path)
330 349 check = protomap.get(proto, lambda *args: False)
331 350 while b'/' in path:
332 351 if check(ui, path, proto):
333 352 return True
334 353 path = path.rsplit(b'/', 1)[0]
335 354 return False
336 355
337 356
338 357 # SVN conversion code stolen from bzr-svn and tailor
339 358 #
340 359 # Subversion looks like a versioned filesystem, branches structures
341 360 # are defined by conventions and not enforced by the tool. First,
342 361 # we define the potential branches (modules) as "trunk" and "branches"
343 362 # children directories. Revisions are then identified by their
344 363 # module and revision number (and a repository identifier).
345 364 #
346 365 # The revision graph is really a tree (or a forest). By default, a
347 366 # revision parent is the previous revision in the same module. If the
348 367 # module directory is copied/moved from another module then the
349 368 # revision is the module root and its parent the source revision in
350 369 # the parent module. A revision has at most one parent.
351 370 #
352 371 class svn_source(converter_source):
353 372 def __init__(self, ui, repotype, url, revs=None):
354 373 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
355 374
356 375 if not (
357 376 url.startswith(b'svn://')
358 377 or url.startswith(b'svn+ssh://')
359 378 or (
360 379 os.path.exists(url)
361 380 and os.path.exists(os.path.join(url, b'.svn'))
362 381 )
363 382 or issvnurl(ui, url)
364 383 ):
365 384 raise NoRepo(
366 385 _(b"%s does not look like a Subversion repository") % url
367 386 )
368 387 if svn is None:
369 388 raise MissingTool(_(b'could not load Subversion python bindings'))
370 389
371 390 try:
372 391 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
373 392 if version < (1, 4):
374 393 raise MissingTool(
375 394 _(
376 395 b'Subversion python bindings %d.%d found, '
377 396 b'1.4 or later required'
378 397 )
379 398 % version
380 399 )
381 400 except AttributeError:
382 401 raise MissingTool(
383 402 _(
384 403 b'Subversion python bindings are too old, 1.4 '
385 404 b'or later required'
386 405 )
387 406 )
388 407
389 408 self.lastrevs = {}
390 409
391 410 latest = None
392 411 try:
393 412 # Support file://path@rev syntax. Useful e.g. to convert
394 413 # deleted branches.
395 414 at = url.rfind(b'@')
396 415 if at >= 0:
397 416 latest = int(url[at + 1 :])
398 417 url = url[:at]
399 418 except ValueError:
400 419 pass
401 420 self.url = geturl(url)
402 421 self.encoding = b'UTF-8' # Subversion is always nominal UTF-8
403 422 try:
404 423 self.transport = transport.SvnRaTransport(url=self.url)
405 424 self.ra = self.transport.ra
406 425 self.ctx = self.transport.client
407 426 self.baseurl = svn.ra.get_repos_root(self.ra)
408 427 # Module is either empty or a repository path starting with
409 428 # a slash and not ending with a slash.
410 429 self.module = urlreq.unquote(self.url[len(self.baseurl) :])
411 430 self.prevmodule = None
412 431 self.rootmodule = self.module
413 432 self.commits = {}
414 433 self.paths = {}
415 434 self.uuid = svn.ra.get_uuid(self.ra)
416 435 except svn.core.SubversionException:
417 436 ui.traceback()
418 437 svnversion = b'%d.%d.%d' % (
419 438 svn.core.SVN_VER_MAJOR,
420 439 svn.core.SVN_VER_MINOR,
421 440 svn.core.SVN_VER_MICRO,
422 441 )
423 442 raise NoRepo(
424 443 _(
425 444 b"%s does not look like a Subversion repository "
426 445 b"to libsvn version %s"
427 446 )
428 447 % (self.url, svnversion)
429 448 )
430 449
431 450 if revs:
432 451 if len(revs) > 1:
433 452 raise error.Abort(
434 453 _(
435 454 b'subversion source does not support '
436 455 b'specifying multiple revisions'
437 456 )
438 457 )
439 458 try:
440 459 latest = int(revs[0])
441 460 except ValueError:
442 461 raise error.Abort(
443 462 _(b'svn: revision %s is not an integer') % revs[0]
444 463 )
445 464
446 465 trunkcfg = self.ui.config(b'convert', b'svn.trunk')
447 466 if trunkcfg is None:
448 467 trunkcfg = b'trunk'
449 468 self.trunkname = trunkcfg.strip(b'/')
450 469 self.startrev = self.ui.config(b'convert', b'svn.startrev')
451 470 try:
452 471 self.startrev = int(self.startrev)
453 472 if self.startrev < 0:
454 473 self.startrev = 0
455 474 except ValueError:
456 475 raise error.Abort(
457 476 _(b'svn: start revision %s is not an integer') % self.startrev
458 477 )
459 478
460 479 try:
461 480 self.head = self.latest(self.module, latest)
462 481 except SvnPathNotFound:
463 482 self.head = None
464 483 if not self.head:
465 484 raise error.Abort(
466 485 _(b'no revision found in module %s') % self.module
467 486 )
468 487 self.last_changed = self.revnum(self.head)
469 488
470 489 self._changescache = (None, None)
471 490
472 491 if os.path.exists(os.path.join(url, b'.svn/entries')):
473 492 self.wc = url
474 493 else:
475 494 self.wc = None
476 495 self.convertfp = None
477 496
478 497 def setrevmap(self, revmap):
479 498 lastrevs = {}
480 499 for revid in revmap:
481 500 uuid, module, revnum = revsplit(revid)
482 501 lastrevnum = lastrevs.setdefault(module, revnum)
483 502 if revnum > lastrevnum:
484 503 lastrevs[module] = revnum
485 504 self.lastrevs = lastrevs
486 505
487 506 def exists(self, path, optrev):
488 507 try:
489 508 svn.client.ls(
490 509 self.url.rstrip(b'/') + b'/' + quote(path),
491 510 optrev,
492 511 False,
493 512 self.ctx,
494 513 )
495 514 return True
496 515 except svn.core.SubversionException:
497 516 return False
498 517
499 518 def getheads(self):
500 519 def isdir(path, revnum):
501 520 kind = self._checkpath(path, revnum)
502 521 return kind == svn.core.svn_node_dir
503 522
504 523 def getcfgpath(name, rev):
505 524 cfgpath = self.ui.config(b'convert', b'svn.' + name)
506 525 if cfgpath is not None and cfgpath.strip() == b'':
507 526 return None
508 527 path = (cfgpath or name).strip(b'/')
509 528 if not self.exists(path, rev):
510 529 if self.module.endswith(path) and name == b'trunk':
511 530 # we are converting from inside this directory
512 531 return None
513 532 if cfgpath:
514 533 raise error.Abort(
515 534 _(b'expected %s to be at %r, but not found')
516 535 % (name, path)
517 536 )
518 537 return None
519 538 self.ui.note(_(b'found %s at %r\n') % (name, path))
520 539 return path
521 540
522 541 rev = optrev(self.last_changed)
523 542 oldmodule = b''
524 543 trunk = getcfgpath(b'trunk', rev)
525 544 self.tags = getcfgpath(b'tags', rev)
526 545 branches = getcfgpath(b'branches', rev)
527 546
528 547 # If the project has a trunk or branches, we will extract heads
529 548 # from them. We keep the project root otherwise.
530 549 if trunk:
531 550 oldmodule = self.module or b''
532 551 self.module += b'/' + trunk
533 552 self.head = self.latest(self.module, self.last_changed)
534 553 if not self.head:
535 554 raise error.Abort(
536 555 _(b'no revision found in module %s') % self.module
537 556 )
538 557
539 558 # First head in the list is the module's head
540 559 self.heads = [self.head]
541 560 if self.tags is not None:
542 561 self.tags = b'%s/%s' % (oldmodule, (self.tags or b'tags'))
543 562
544 563 # Check if branches bring a few more heads to the list
545 564 if branches:
546 565 rpath = self.url.strip(b'/')
547 566 branchnames = svn.client.ls(
548 567 rpath + b'/' + quote(branches), rev, False, self.ctx
549 568 )
550 569 for branch in sorted(branchnames):
551 570 module = b'%s/%s/%s' % (oldmodule, branches, branch)
552 571 if not isdir(module, self.last_changed):
553 572 continue
554 573 brevid = self.latest(module, self.last_changed)
555 574 if not brevid:
556 575 self.ui.note(_(b'ignoring empty branch %s\n') % branch)
557 576 continue
558 577 self.ui.note(
559 578 _(b'found branch %s at %d\n')
560 579 % (branch, self.revnum(brevid))
561 580 )
562 581 self.heads.append(brevid)
563 582
564 583 if self.startrev and self.heads:
565 584 if len(self.heads) > 1:
566 585 raise error.Abort(
567 586 _(
568 587 b'svn: start revision is not supported '
569 588 b'with more than one branch'
570 589 )
571 590 )
572 591 revnum = self.revnum(self.heads[0])
573 592 if revnum < self.startrev:
574 593 raise error.Abort(
575 594 _(b'svn: no revision found after start revision %d')
576 595 % self.startrev
577 596 )
578 597
579 598 return self.heads
580 599
581 600 def _getchanges(self, rev, full):
582 601 (paths, parents) = self.paths[rev]
583 602 copies = {}
584 603 if parents:
585 604 files, self.removed, copies = self.expandpaths(rev, paths, parents)
586 605 if full or not parents:
587 606 # Perform a full checkout on roots
588 607 uuid, module, revnum = revsplit(rev)
589 608 entries = svn.client.ls(
590 609 self.baseurl + quote(module), optrev(revnum), True, self.ctx
591 610 )
592 611 files = [
593 612 n
594 613 for n, e in pycompat.iteritems(entries)
595 614 if e.kind == svn.core.svn_node_file
596 615 ]
597 616 self.removed = set()
598 617
599 618 files.sort()
600 619 files = pycompat.ziplist(files, [rev] * len(files))
601 620 return (files, copies)
602 621
603 622 def getchanges(self, rev, full):
604 623 # reuse cache from getchangedfiles
605 624 if self._changescache[0] == rev and not full:
606 625 (files, copies) = self._changescache[1]
607 626 else:
608 627 (files, copies) = self._getchanges(rev, full)
609 628 # caller caches the result, so free it here to release memory
610 629 del self.paths[rev]
611 630 return (files, copies, set())
612 631
613 632 def getchangedfiles(self, rev, i):
614 633 # called from filemap - cache computed values for reuse in getchanges
615 634 (files, copies) = self._getchanges(rev, False)
616 635 self._changescache = (rev, (files, copies))
617 636 return [f[0] for f in files]
618 637
619 638 def getcommit(self, rev):
620 639 if rev not in self.commits:
621 640 uuid, module, revnum = revsplit(rev)
622 641 self.module = module
623 642 self.reparent(module)
624 643 # We assume that:
625 644 # - requests for revisions after "stop" come from the
626 645 # revision graph backward traversal. Cache all of them
627 646 # down to stop, they will be used eventually.
628 647 # - requests for revisions before "stop" come to get
629 648 # isolated branches parents. Just fetch what is needed.
630 649 stop = self.lastrevs.get(module, 0)
631 650 if revnum < stop:
632 651 stop = revnum + 1
633 652 self._fetch_revisions(revnum, stop)
634 653 if rev not in self.commits:
635 654 raise error.Abort(_(b'svn: revision %s not found') % revnum)
636 655 revcommit = self.commits[rev]
637 656 # caller caches the result, so free it here to release memory
638 657 del self.commits[rev]
639 658 return revcommit
640 659
641 660 def checkrevformat(self, revstr, mapname=b'splicemap'):
642 661 """ fails if revision format does not match the correct format"""
643 662 if not re.match(
644 663 r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
645 664 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
646 665 r'{12,12}(.*)@[0-9]+$',
647 666 revstr,
648 667 ):
649 668 raise error.Abort(
650 669 _(b'%s entry %s is not a valid revision identifier')
651 670 % (mapname, revstr)
652 671 )
653 672
654 673 def numcommits(self):
655 674 return int(self.head.rsplit(b'@', 1)[1]) - self.startrev
656 675
657 676 def gettags(self):
658 677 tags = {}
659 678 if self.tags is None:
660 679 return tags
661 680
662 681 # svn tags are just a convention, project branches left in a
663 682 # 'tags' directory. There is no other relationship than
664 683 # ancestry, which is expensive to discover and makes them hard
665 684 # to update incrementally. Worse, past revisions may be
666 685 # referenced by tags far away in the future, requiring a deep
667 686 # history traversal on every calculation. Current code
668 687 # performs a single backward traversal, tracking moves within
669 688 # the tags directory (tag renaming) and recording a new tag
670 689 # everytime a project is copied from outside the tags
671 690 # directory. It also lists deleted tags, this behaviour may
672 691 # change in the future.
673 692 pendings = []
674 693 tagspath = self.tags
675 694 start = svn.ra.get_latest_revnum(self.ra)
676 695 stream = self._getlog([self.tags], start, self.startrev)
677 696 try:
678 697 for entry in stream:
679 698 origpaths, revnum, author, date, message = entry
680 699 if not origpaths:
681 700 origpaths = []
682 701 copies = [
683 702 (e.copyfrom_path, e.copyfrom_rev, p)
684 703 for p, e in pycompat.iteritems(origpaths)
685 704 if e.copyfrom_path
686 705 ]
687 706 # Apply moves/copies from more specific to general
688 707 copies.sort(reverse=True)
689 708
690 709 srctagspath = tagspath
691 710 if copies and copies[-1][2] == tagspath:
692 711 # Track tags directory moves
693 712 srctagspath = copies.pop()[0]
694 713
695 714 for source, sourcerev, dest in copies:
696 715 if not dest.startswith(tagspath + b'/'):
697 716 continue
698 717 for tag in pendings:
699 718 if tag[0].startswith(dest):
700 719 tagpath = source + tag[0][len(dest) :]
701 720 tag[:2] = [tagpath, sourcerev]
702 721 break
703 722 else:
704 723 pendings.append([source, sourcerev, dest])
705 724
706 725 # Filter out tags with children coming from different
707 726 # parts of the repository like:
708 727 # /tags/tag.1 (from /trunk:10)
709 728 # /tags/tag.1/foo (from /branches/foo:12)
710 729 # Here/tags/tag.1 discarded as well as its children.
711 730 # It happens with tools like cvs2svn. Such tags cannot
712 731 # be represented in mercurial.
713 732 addeds = {
714 733 p: e.copyfrom_path
715 734 for p, e in pycompat.iteritems(origpaths)
716 735 if e.action == b'A' and e.copyfrom_path
717 736 }
718 737 badroots = set()
719 738 for destroot in addeds:
720 739 for source, sourcerev, dest in pendings:
721 740 if not dest.startswith(
722 741 destroot + b'/'
723 742 ) or source.startswith(addeds[destroot] + b'/'):
724 743 continue
725 744 badroots.add(destroot)
726 745 break
727 746
728 747 for badroot in badroots:
729 748 pendings = [
730 749 p
731 750 for p in pendings
732 751 if p[2] != badroot
733 752 and not p[2].startswith(badroot + b'/')
734 753 ]
735 754
736 755 # Tell tag renamings from tag creations
737 756 renamings = []
738 757 for source, sourcerev, dest in pendings:
739 758 tagname = dest.split(b'/')[-1]
740 759 if source.startswith(srctagspath):
741 760 renamings.append([source, sourcerev, tagname])
742 761 continue
743 762 if tagname in tags:
744 763 # Keep the latest tag value
745 764 continue
746 765 # From revision may be fake, get one with changes
747 766 try:
748 767 tagid = self.latest(source, sourcerev)
749 768 if tagid and tagname not in tags:
750 769 tags[tagname] = tagid
751 770 except SvnPathNotFound:
752 771 # It happens when we are following directories
753 772 # we assumed were copied with their parents
754 773 # but were really created in the tag
755 774 # directory.
756 775 pass
757 776 pendings = renamings
758 777 tagspath = srctagspath
759 778 finally:
760 779 stream.close()
761 780 return tags
762 781
763 782 def converted(self, rev, destrev):
764 783 if not self.wc:
765 784 return
766 785 if self.convertfp is None:
767 786 self.convertfp = open(
768 787 os.path.join(self.wc, b'.svn', b'hg-shamap'), b'ab'
769 788 )
770 789 self.convertfp.write(
771 790 util.tonativeeol(b'%s %d\n' % (destrev, self.revnum(rev)))
772 791 )
773 792 self.convertfp.flush()
774 793
775 794 def revid(self, revnum, module=None):
776 795 return b'svn:%s%s@%d' % (self.uuid, module or self.module, revnum)
777 796
778 797 def revnum(self, rev):
779 798 return int(rev.split(b'@')[-1])
780 799
781 800 def latest(self, path, stop=None):
782 801 """Find the latest revid affecting path, up to stop revision
783 802 number. If stop is None, default to repository latest
784 803 revision. It may return a revision in a different module,
785 804 since a branch may be moved without a change being
786 805 reported. Return None if computed module does not belong to
787 806 rootmodule subtree.
788 807 """
789 808
790 809 def findchanges(path, start, stop=None):
791 810 stream = self._getlog([path], start, stop or 1)
792 811 try:
793 812 for entry in stream:
794 813 paths, revnum, author, date, message = entry
795 814 if stop is None and paths:
796 815 # We do not know the latest changed revision,
797 816 # keep the first one with changed paths.
798 817 break
799 818 if revnum <= stop:
800 819 break
801 820
802 821 for p in paths:
803 822 if not path.startswith(p) or not paths[p].copyfrom_path:
804 823 continue
805 824 newpath = paths[p].copyfrom_path + path[len(p) :]
806 825 self.ui.debug(
807 826 b"branch renamed from %s to %s at %d\n"
808 827 % (path, newpath, revnum)
809 828 )
810 829 path = newpath
811 830 break
812 831 if not paths:
813 832 revnum = None
814 833 return revnum, path
815 834 finally:
816 835 stream.close()
817 836
818 837 if not path.startswith(self.rootmodule):
819 838 # Requests on foreign branches may be forbidden at server level
820 839 self.ui.debug(b'ignoring foreign branch %r\n' % path)
821 840 return None
822 841
823 842 if stop is None:
824 843 stop = svn.ra.get_latest_revnum(self.ra)
825 844 try:
826 845 prevmodule = self.reparent(b'')
827 846 dirent = svn.ra.stat(self.ra, path.strip(b'/'), stop)
828 847 self.reparent(prevmodule)
829 848 except svn.core.SubversionException:
830 849 dirent = None
831 850 if not dirent:
832 851 raise SvnPathNotFound(
833 852 _(b'%s not found up to revision %d') % (path, stop)
834 853 )
835 854
836 855 # stat() gives us the previous revision on this line of
837 856 # development, but it might be in *another module*. Fetch the
838 857 # log and detect renames down to the latest revision.
839 858 revnum, realpath = findchanges(path, stop, dirent.created_rev)
840 859 if revnum is None:
841 860 # Tools like svnsync can create empty revision, when
842 861 # synchronizing only a subtree for instance. These empty
843 862 # revisions created_rev still have their original values
844 863 # despite all changes having disappeared and can be
845 864 # returned by ra.stat(), at least when stating the root
846 865 # module. In that case, do not trust created_rev and scan
847 866 # the whole history.
848 867 revnum, realpath = findchanges(path, stop)
849 868 if revnum is None:
850 869 self.ui.debug(b'ignoring empty branch %r\n' % realpath)
851 870 return None
852 871
853 872 if not realpath.startswith(self.rootmodule):
854 873 self.ui.debug(b'ignoring foreign branch %r\n' % realpath)
855 874 return None
856 875 return self.revid(revnum, realpath)
857 876
858 877 def reparent(self, module):
859 878 """Reparent the svn transport and return the previous parent."""
860 879 if self.prevmodule == module:
861 880 return module
862 881 svnurl = self.baseurl + quote(module)
863 882 prevmodule = self.prevmodule
864 883 if prevmodule is None:
865 884 prevmodule = b''
866 885 self.ui.debug(b"reparent to %s\n" % svnurl)
867 886 svn.ra.reparent(self.ra, svnurl)
868 887 self.prevmodule = module
869 888 return prevmodule
870 889
871 890 def expandpaths(self, rev, paths, parents):
872 891 changed, removed = set(), set()
873 892 copies = {}
874 893
875 894 new_module, revnum = revsplit(rev)[1:]
876 895 if new_module != self.module:
877 896 self.module = new_module
878 897 self.reparent(self.module)
879 898
880 899 progress = self.ui.makeprogress(
881 900 _(b'scanning paths'), unit=_(b'paths'), total=len(paths)
882 901 )
883 902 for i, (path, ent) in enumerate(paths):
884 903 progress.update(i, item=path)
885 904 entrypath = self.getrelpath(path)
886 905
887 906 kind = self._checkpath(entrypath, revnum)
888 907 if kind == svn.core.svn_node_file:
889 908 changed.add(self.recode(entrypath))
890 909 if not ent.copyfrom_path or not parents:
891 910 continue
892 911 # Copy sources not in parent revisions cannot be
893 912 # represented, ignore their origin for now
894 913 pmodule, prevnum = revsplit(parents[0])[1:]
895 914 if ent.copyfrom_rev < prevnum:
896 915 continue
897 916 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
898 917 if not copyfrom_path:
899 918 continue
900 919 self.ui.debug(
901 920 b"copied to %s from %s@%s\n"
902 921 % (entrypath, copyfrom_path, ent.copyfrom_rev)
903 922 )
904 923 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
905 924 elif kind == 0: # gone, but had better be a deleted *file*
906 925 self.ui.debug(b"gone from %s\n" % ent.copyfrom_rev)
907 926 pmodule, prevnum = revsplit(parents[0])[1:]
908 927 parentpath = pmodule + b"/" + entrypath
909 928 fromkind = self._checkpath(entrypath, prevnum, pmodule)
910 929
911 930 if fromkind == svn.core.svn_node_file:
912 931 removed.add(self.recode(entrypath))
913 932 elif fromkind == svn.core.svn_node_dir:
914 933 oroot = parentpath.strip(b'/')
915 934 nroot = path.strip(b'/')
916 935 children = self._iterfiles(oroot, prevnum)
917 936 for childpath in children:
918 937 childpath = childpath.replace(oroot, nroot)
919 938 childpath = self.getrelpath(b"/" + childpath, pmodule)
920 939 if childpath:
921 940 removed.add(self.recode(childpath))
922 941 else:
923 942 self.ui.debug(
924 943 b'unknown path in revision %d: %s\n' % (revnum, path)
925 944 )
926 945 elif kind == svn.core.svn_node_dir:
927 946 if ent.action == b'M':
928 947 # If the directory just had a prop change,
929 948 # then we shouldn't need to look for its children.
930 949 continue
931 950 if ent.action == b'R' and parents:
932 951 # If a directory is replacing a file, mark the previous
933 952 # file as deleted
934 953 pmodule, prevnum = revsplit(parents[0])[1:]
935 954 pkind = self._checkpath(entrypath, prevnum, pmodule)
936 955 if pkind == svn.core.svn_node_file:
937 956 removed.add(self.recode(entrypath))
938 957 elif pkind == svn.core.svn_node_dir:
939 958 # We do not know what files were kept or removed,
940 959 # mark them all as changed.
941 960 for childpath in self._iterfiles(pmodule, prevnum):
942 961 childpath = self.getrelpath(b"/" + childpath)
943 962 if childpath:
944 963 changed.add(self.recode(childpath))
945 964
946 965 for childpath in self._iterfiles(path, revnum):
947 966 childpath = self.getrelpath(b"/" + childpath)
948 967 if childpath:
949 968 changed.add(self.recode(childpath))
950 969
951 970 # Handle directory copies
952 971 if not ent.copyfrom_path or not parents:
953 972 continue
954 973 # Copy sources not in parent revisions cannot be
955 974 # represented, ignore their origin for now
956 975 pmodule, prevnum = revsplit(parents[0])[1:]
957 976 if ent.copyfrom_rev < prevnum:
958 977 continue
959 978 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
960 979 if not copyfrompath:
961 980 continue
962 981 self.ui.debug(
963 982 b"mark %s came from %s:%d\n"
964 983 % (path, copyfrompath, ent.copyfrom_rev)
965 984 )
966 985 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
967 986 for childpath in children:
968 987 childpath = self.getrelpath(b"/" + childpath, pmodule)
969 988 if not childpath:
970 989 continue
971 990 copytopath = path + childpath[len(copyfrompath) :]
972 991 copytopath = self.getrelpath(copytopath)
973 992 copies[self.recode(copytopath)] = self.recode(childpath)
974 993
975 994 progress.complete()
976 995 changed.update(removed)
977 996 return (list(changed), removed, copies)
978 997
979 998 def _fetch_revisions(self, from_revnum, to_revnum):
980 999 if from_revnum < to_revnum:
981 1000 from_revnum, to_revnum = to_revnum, from_revnum
982 1001
983 1002 self.child_cset = None
984 1003
985 1004 def parselogentry(orig_paths, revnum, author, date, message):
986 1005 """Return the parsed commit object or None, and True if
987 1006 the revision is a branch root.
988 1007 """
989 1008 self.ui.debug(
990 1009 b"parsing revision %d (%d changes)\n"
991 1010 % (revnum, len(orig_paths))
992 1011 )
993 1012
994 1013 branched = False
995 1014 rev = self.revid(revnum)
996 1015 # branch log might return entries for a parent we already have
997 1016
998 1017 if rev in self.commits or revnum < to_revnum:
999 1018 return None, branched
1000 1019
1001 1020 parents = []
1002 1021 # check whether this revision is the start of a branch or part
1003 1022 # of a branch renaming
1004 1023 orig_paths = sorted(pycompat.iteritems(orig_paths))
1005 1024 root_paths = [
1006 1025 (p, e) for p, e in orig_paths if self.module.startswith(p)
1007 1026 ]
1008 1027 if root_paths:
1009 1028 path, ent = root_paths[-1]
1010 1029 if ent.copyfrom_path:
1011 1030 branched = True
1012 1031 newpath = ent.copyfrom_path + self.module[len(path) :]
1013 1032 # ent.copyfrom_rev may not be the actual last revision
1014 1033 previd = self.latest(newpath, ent.copyfrom_rev)
1015 1034 if previd is not None:
1016 1035 prevmodule, prevnum = revsplit(previd)[1:]
1017 1036 if prevnum >= self.startrev:
1018 1037 parents = [previd]
1019 1038 self.ui.note(
1020 1039 _(b'found parent of branch %s at %d: %s\n')
1021 1040 % (self.module, prevnum, prevmodule)
1022 1041 )
1023 1042 else:
1024 1043 self.ui.debug(b"no copyfrom path, don't know what to do.\n")
1025 1044
1026 1045 paths = []
1027 1046 # filter out unrelated paths
1028 1047 for path, ent in orig_paths:
1029 1048 if self.getrelpath(path) is None:
1030 1049 continue
1031 1050 paths.append((path, ent))
1032 1051
1033 1052 # Example SVN datetime. Includes microseconds.
1034 1053 # ISO-8601 conformant
1035 1054 # '2007-01-04T17:35:00.902377Z'
1036 1055 date = dateutil.parsedate(
1037 1056 date[:19] + b" UTC", [b"%Y-%m-%dT%H:%M:%S"]
1038 1057 )
1039 1058 if self.ui.configbool(b'convert', b'localtimezone'):
1040 1059 date = makedatetimestamp(date[0])
1041 1060
1042 1061 if message:
1043 1062 log = self.recode(message)
1044 1063 else:
1045 1064 log = b''
1046 1065
1047 1066 if author:
1048 1067 author = self.recode(author)
1049 1068 else:
1050 1069 author = b''
1051 1070
1052 1071 try:
1053 1072 branch = self.module.split(b"/")[-1]
1054 1073 if branch == self.trunkname:
1055 1074 branch = None
1056 1075 except IndexError:
1057 1076 branch = None
1058 1077
1059 1078 cset = commit(
1060 1079 author=author,
1061 1080 date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
1062 1081 desc=log,
1063 1082 parents=parents,
1064 1083 branch=branch,
1065 1084 rev=rev,
1066 1085 )
1067 1086
1068 1087 self.commits[rev] = cset
1069 1088 # The parents list is *shared* among self.paths and the
1070 1089 # commit object. Both will be updated below.
1071 1090 self.paths[rev] = (paths, cset.parents)
1072 1091 if self.child_cset and not self.child_cset.parents:
1073 1092 self.child_cset.parents[:] = [rev]
1074 1093 self.child_cset = cset
1075 1094 return cset, branched
1076 1095
1077 1096 self.ui.note(
1078 1097 _(b'fetching revision log for "%s" from %d to %d\n')
1079 1098 % (self.module, from_revnum, to_revnum)
1080 1099 )
1081 1100
1082 1101 try:
1083 1102 firstcset = None
1084 1103 lastonbranch = False
1085 1104 stream = self._getlog([self.module], from_revnum, to_revnum)
1086 1105 try:
1087 1106 for entry in stream:
1088 1107 paths, revnum, author, date, message = entry
1089 1108 if revnum < self.startrev:
1090 1109 lastonbranch = True
1091 1110 break
1092 1111 if not paths:
1093 1112 self.ui.debug(b'revision %d has no entries\n' % revnum)
1094 1113 # If we ever leave the loop on an empty
1095 1114 # revision, do not try to get a parent branch
1096 1115 lastonbranch = lastonbranch or revnum == 0
1097 1116 continue
1098 1117 cset, lastonbranch = parselogentry(
1099 1118 paths, revnum, author, date, message
1100 1119 )
1101 1120 if cset:
1102 1121 firstcset = cset
1103 1122 if lastonbranch:
1104 1123 break
1105 1124 finally:
1106 1125 stream.close()
1107 1126
1108 1127 if not lastonbranch and firstcset and not firstcset.parents:
1109 1128 # The first revision of the sequence (the last fetched one)
1110 1129 # has invalid parents if not a branch root. Find the parent
1111 1130 # revision now, if any.
1112 1131 try:
1113 1132 firstrevnum = self.revnum(firstcset.rev)
1114 1133 if firstrevnum > 1:
1115 1134 latest = self.latest(self.module, firstrevnum - 1)
1116 1135 if latest:
1117 1136 firstcset.parents.append(latest)
1118 1137 except SvnPathNotFound:
1119 1138 pass
1120 1139 except svn.core.SubversionException as xxx_todo_changeme:
1121 1140 (inst, num) = xxx_todo_changeme.args
1122 1141 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
1123 1142 raise error.Abort(
1124 1143 _(b'svn: branch has no revision %s') % to_revnum
1125 1144 )
1126 1145 raise
1127 1146
1128 1147 def getfile(self, file, rev):
1129 1148 # TODO: ra.get_file transmits the whole file instead of diffs.
1130 1149 if file in self.removed:
1131 1150 return None, None
1132 1151 try:
1133 1152 new_module, revnum = revsplit(rev)[1:]
1134 1153 if self.module != new_module:
1135 1154 self.module = new_module
1136 1155 self.reparent(self.module)
1137 1156 io = stringio()
1138 1157 info = svn.ra.get_file(self.ra, file, revnum, io)
1139 1158 data = io.getvalue()
1140 1159 # ra.get_file() seems to keep a reference on the input buffer
1141 1160 # preventing collection. Release it explicitly.
1142 1161 io.close()
1143 1162 if isinstance(info, list):
1144 1163 info = info[-1]
1145 1164 mode = (b"svn:executable" in info) and b'x' or b''
1146 1165 mode = (b"svn:special" in info) and b'l' or mode
1147 1166 except svn.core.SubversionException as e:
1148 1167 notfound = (
1149 1168 svn.core.SVN_ERR_FS_NOT_FOUND,
1150 1169 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND,
1151 1170 )
1152 1171 if e.apr_err in notfound: # File not found
1153 1172 return None, None
1154 1173 raise
1155 1174 if mode == b'l':
1156 1175 link_prefix = b"link "
1157 1176 if data.startswith(link_prefix):
1158 1177 data = data[len(link_prefix) :]
1159 1178 return data, mode
1160 1179
1161 1180 def _iterfiles(self, path, revnum):
1162 1181 """Enumerate all files in path at revnum, recursively."""
1163 1182 path = path.strip(b'/')
1164 1183 pool = svn.core.Pool()
1165 1184 rpath = b'/'.join([self.baseurl, quote(path)]).strip(b'/')
1166 1185 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1167 1186 if path:
1168 1187 path += b'/'
1169 1188 return (
1170 1189 (path + p)
1171 1190 for p, e in pycompat.iteritems(entries)
1172 1191 if e.kind == svn.core.svn_node_file
1173 1192 )
1174 1193
1175 1194 def getrelpath(self, path, module=None):
1176 1195 if module is None:
1177 1196 module = self.module
1178 1197 # Given the repository url of this wc, say
1179 1198 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1180 1199 # extract the "entry" portion (a relative path) from what
1181 1200 # svn log --xml says, i.e.
1182 1201 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1183 1202 # that is to say "tests/PloneTestCase.py"
1184 1203 if path.startswith(module):
1185 1204 relative = path.rstrip(b'/')[len(module) :]
1186 1205 if relative.startswith(b'/'):
1187 1206 return relative[1:]
1188 1207 elif relative == b'':
1189 1208 return relative
1190 1209
1191 1210 # The path is outside our tracked tree...
1192 1211 self.ui.debug(b'%r is not under %r, ignoring\n' % (path, module))
1193 1212 return None
1194 1213
1195 1214 def _checkpath(self, path, revnum, module=None):
1196 1215 if module is not None:
1197 1216 prevmodule = self.reparent(b'')
1198 1217 path = module + b'/' + path
1199 1218 try:
1200 1219 # ra.check_path does not like leading slashes very much, it leads
1201 1220 # to PROPFIND subversion errors
1202 1221 return svn.ra.check_path(self.ra, path.strip(b'/'), revnum)
1203 1222 finally:
1204 1223 if module is not None:
1205 1224 self.reparent(prevmodule)
1206 1225
1207 1226 def _getlog(
1208 1227 self,
1209 1228 paths,
1210 1229 start,
1211 1230 end,
1212 1231 limit=0,
1213 1232 discover_changed_paths=True,
1214 1233 strict_node_history=False,
1215 1234 ):
1216 1235 # Normalize path names, svn >= 1.5 only wants paths relative to
1217 1236 # supplied URL
1218 1237 relpaths = []
1219 1238 for p in paths:
1220 1239 if not p.startswith(b'/'):
1221 1240 p = self.module + b'/' + p
1222 1241 relpaths.append(p.strip(b'/'))
1223 1242 args = [
1224 1243 self.baseurl,
1225 1244 relpaths,
1226 1245 start,
1227 1246 end,
1228 1247 limit,
1229 1248 discover_changed_paths,
1230 1249 strict_node_history,
1231 1250 ]
1232 1251 # developer config: convert.svn.debugsvnlog
1233 1252 if not self.ui.configbool(b'convert', b'svn.debugsvnlog'):
1234 1253 return directlogstream(*args)
1235 1254 arg = encodeargs(args)
1236 1255 hgexe = procutil.hgexecutable()
1237 1256 cmd = b'%s debugsvnlog' % procutil.shellquote(hgexe)
1238 1257 stdin, stdout = procutil.popen2(procutil.quotecommand(cmd))
1239 1258 stdin.write(arg)
1240 1259 try:
1241 1260 stdin.close()
1242 1261 except IOError:
1243 1262 raise error.Abort(
1244 1263 _(
1245 1264 b'Mercurial failed to run itself, check'
1246 1265 b' hg executable is in PATH'
1247 1266 )
1248 1267 )
1249 1268 return logstream(stdout)
1250 1269
1251 1270
1252 1271 pre_revprop_change = b'''#!/bin/sh
1253 1272
1254 1273 REPOS="$1"
1255 1274 REV="$2"
1256 1275 USER="$3"
1257 1276 PROPNAME="$4"
1258 1277 ACTION="$5"
1259 1278
1260 1279 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1261 1280 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1262 1281 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1263 1282
1264 1283 echo "Changing prohibited revision property" >&2
1265 1284 exit 1
1266 1285 '''
1267 1286
1268 1287
1269 1288 class svn_sink(converter_sink, commandline):
1270 1289 commit_re = re.compile(br'Committed revision (\d+).', re.M)
1271 1290 uuid_re = re.compile(br'Repository UUID:\s*(\S+)', re.M)
1272 1291
1273 1292 def prerun(self):
1274 1293 if self.wc:
1275 1294 os.chdir(self.wc)
1276 1295
1277 1296 def postrun(self):
1278 1297 if self.wc:
1279 1298 os.chdir(self.cwd)
1280 1299
1281 1300 def join(self, name):
1282 1301 return os.path.join(self.wc, b'.svn', name)
1283 1302
1284 1303 def revmapfile(self):
1285 1304 return self.join(b'hg-shamap')
1286 1305
1287 1306 def authorfile(self):
1288 1307 return self.join(b'hg-authormap')
1289 1308
1290 1309 def __init__(self, ui, repotype, path):
1291 1310
1292 1311 converter_sink.__init__(self, ui, repotype, path)
1293 1312 commandline.__init__(self, ui, b'svn')
1294 1313 self.delete = []
1295 1314 self.setexec = []
1296 1315 self.delexec = []
1297 1316 self.copies = []
1298 1317 self.wc = None
1299 1318 self.cwd = encoding.getcwd()
1300 1319
1301 1320 created = False
1302 1321 if os.path.isfile(os.path.join(path, b'.svn', b'entries')):
1303 1322 self.wc = os.path.realpath(path)
1304 1323 self.run0(b'update')
1305 1324 else:
1306 1325 if not re.search(br'^(file|http|https|svn|svn\+ssh)://', path):
1307 1326 path = os.path.realpath(path)
1308 1327 if os.path.isdir(os.path.dirname(path)):
1309 1328 if not os.path.exists(
1310 1329 os.path.join(path, b'db', b'fs-type')
1311 1330 ):
1312 1331 ui.status(
1313 1332 _(b"initializing svn repository '%s'\n")
1314 1333 % os.path.basename(path)
1315 1334 )
1316 1335 commandline(ui, b'svnadmin').run0(b'create', path)
1317 1336 created = path
1318 1337 path = util.normpath(path)
1319 1338 if not path.startswith(b'/'):
1320 1339 path = b'/' + path
1321 1340 path = b'file://' + path
1322 1341
1323 1342 wcpath = os.path.join(
1324 1343 encoding.getcwd(), os.path.basename(path) + b'-wc'
1325 1344 )
1326 1345 ui.status(
1327 1346 _(b"initializing svn working copy '%s'\n")
1328 1347 % os.path.basename(wcpath)
1329 1348 )
1330 1349 self.run0(b'checkout', path, wcpath)
1331 1350
1332 1351 self.wc = wcpath
1333 1352 self.opener = vfsmod.vfs(self.wc)
1334 1353 self.wopener = vfsmod.vfs(self.wc)
1335 1354 self.childmap = mapfile(ui, self.join(b'hg-childmap'))
1336 1355 if util.checkexec(self.wc):
1337 1356 self.is_exec = util.isexec
1338 1357 else:
1339 1358 self.is_exec = None
1340 1359
1341 1360 if created:
1342 1361 hook = os.path.join(created, b'hooks', b'pre-revprop-change')
1343 1362 fp = open(hook, b'wb')
1344 1363 fp.write(pre_revprop_change)
1345 1364 fp.close()
1346 1365 util.setflags(hook, False, True)
1347 1366
1348 1367 output = self.run0(b'info')
1349 1368 self.uuid = self.uuid_re.search(output).group(1).strip()
1350 1369
1351 1370 def wjoin(self, *names):
1352 1371 return os.path.join(self.wc, *names)
1353 1372
1354 1373 @propertycache
1355 1374 def manifest(self):
1356 1375 # As of svn 1.7, the "add" command fails when receiving
1357 1376 # already tracked entries, so we have to track and filter them
1358 1377 # ourselves.
1359 1378 m = set()
1360 1379 output = self.run0(b'ls', recursive=True, xml=True)
1361 1380 doc = xml.dom.minidom.parseString(output)
1362 1381 for e in doc.getElementsByTagName('entry'):
1363 1382 for n in e.childNodes:
1364 1383 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1365 1384 continue
1366 1385 name = ''.join(
1367 1386 c.data for c in n.childNodes if c.nodeType == c.TEXT_NODE
1368 1387 )
1369 1388 # Entries are compared with names coming from
1370 1389 # mercurial, so bytes with undefined encoding. Our
1371 1390 # best bet is to assume they are in local
1372 1391 # encoding. They will be passed to command line calls
1373 1392 # later anyway, so they better be.
1374 1393 m.add(encoding.unitolocal(name))
1375 1394 break
1376 1395 return m
1377 1396
1378 1397 def putfile(self, filename, flags, data):
1379 1398 if b'l' in flags:
1380 1399 self.wopener.symlink(data, filename)
1381 1400 else:
1382 1401 try:
1383 1402 if os.path.islink(self.wjoin(filename)):
1384 1403 os.unlink(filename)
1385 1404 except OSError:
1386 1405 pass
1387 1406
1388 1407 if self.is_exec:
1389 1408 # We need to check executability of the file before the change,
1390 1409 # because `vfs.write` is able to reset exec bit.
1391 1410 wasexec = False
1392 1411 if os.path.exists(self.wjoin(filename)):
1393 1412 wasexec = self.is_exec(self.wjoin(filename))
1394 1413
1395 1414 self.wopener.write(filename, data)
1396 1415
1397 1416 if self.is_exec:
1398 1417 if wasexec:
1399 1418 if b'x' not in flags:
1400 1419 self.delexec.append(filename)
1401 1420 else:
1402 1421 if b'x' in flags:
1403 1422 self.setexec.append(filename)
1404 1423 util.setflags(self.wjoin(filename), False, b'x' in flags)
1405 1424
1406 1425 def _copyfile(self, source, dest):
1407 1426 # SVN's copy command pukes if the destination file exists, but
1408 1427 # our copyfile method expects to record a copy that has
1409 1428 # already occurred. Cross the semantic gap.
1410 1429 wdest = self.wjoin(dest)
1411 1430 exists = os.path.lexists(wdest)
1412 1431 if exists:
1413 1432 fd, tempname = pycompat.mkstemp(
1414 1433 prefix=b'hg-copy-', dir=os.path.dirname(wdest)
1415 1434 )
1416 1435 os.close(fd)
1417 1436 os.unlink(tempname)
1418 1437 os.rename(wdest, tempname)
1419 1438 try:
1420 1439 self.run0(b'copy', source, dest)
1421 1440 finally:
1422 1441 self.manifest.add(dest)
1423 1442 if exists:
1424 1443 try:
1425 1444 os.unlink(wdest)
1426 1445 except OSError:
1427 1446 pass
1428 1447 os.rename(tempname, wdest)
1429 1448
1430 1449 def dirs_of(self, files):
1431 1450 dirs = set()
1432 1451 for f in files:
1433 1452 if os.path.isdir(self.wjoin(f)):
1434 1453 dirs.add(f)
1435 1454 i = len(f)
1436 1455 for i in iter(lambda: f.rfind(b'/', 0, i), -1):
1437 1456 dirs.add(f[:i])
1438 1457 return dirs
1439 1458
1440 1459 def add_dirs(self, files):
1441 1460 add_dirs = [
1442 1461 d for d in sorted(self.dirs_of(files)) if d not in self.manifest
1443 1462 ]
1444 1463 if add_dirs:
1445 1464 self.manifest.update(add_dirs)
1446 1465 self.xargs(add_dirs, b'add', non_recursive=True, quiet=True)
1447 1466 return add_dirs
1448 1467
1449 1468 def add_files(self, files):
1450 1469 files = [f for f in files if f not in self.manifest]
1451 1470 if files:
1452 1471 self.manifest.update(files)
1453 1472 self.xargs(files, b'add', quiet=True)
1454 1473 return files
1455 1474
1456 1475 def addchild(self, parent, child):
1457 1476 self.childmap[parent] = child
1458 1477
1459 1478 def revid(self, rev):
1460 1479 return b"svn:%s@%s" % (self.uuid, rev)
1461 1480
1462 1481 def putcommit(
1463 1482 self, files, copies, parents, commit, source, revmap, full, cleanp2
1464 1483 ):
1465 1484 for parent in parents:
1466 1485 try:
1467 1486 return self.revid(self.childmap[parent])
1468 1487 except KeyError:
1469 1488 pass
1470 1489
1471 1490 # Apply changes to working copy
1472 1491 for f, v in files:
1473 1492 data, mode = source.getfile(f, v)
1474 1493 if data is None:
1475 1494 self.delete.append(f)
1476 1495 else:
1477 1496 self.putfile(f, mode, data)
1478 1497 if f in copies:
1479 1498 self.copies.append([copies[f], f])
1480 1499 if full:
1481 1500 self.delete.extend(sorted(self.manifest.difference(files)))
1482 1501 files = [f[0] for f in files]
1483 1502
1484 1503 entries = set(self.delete)
1485 1504 files = frozenset(files)
1486 1505 entries.update(self.add_dirs(files.difference(entries)))
1487 1506 if self.copies:
1488 1507 for s, d in self.copies:
1489 1508 self._copyfile(s, d)
1490 1509 self.copies = []
1491 1510 if self.delete:
1492 1511 self.xargs(self.delete, b'delete')
1493 1512 for f in self.delete:
1494 1513 self.manifest.remove(f)
1495 1514 self.delete = []
1496 1515 entries.update(self.add_files(files.difference(entries)))
1497 1516 if self.delexec:
1498 1517 self.xargs(self.delexec, b'propdel', b'svn:executable')
1499 1518 self.delexec = []
1500 1519 if self.setexec:
1501 1520 self.xargs(self.setexec, b'propset', b'svn:executable', b'*')
1502 1521 self.setexec = []
1503 1522
1504 1523 fd, messagefile = pycompat.mkstemp(prefix=b'hg-convert-')
1505 1524 fp = os.fdopen(fd, 'wb')
1506 1525 fp.write(util.tonativeeol(commit.desc))
1507 1526 fp.close()
1508 1527 try:
1509 1528 output = self.run0(
1510 1529 b'commit',
1511 1530 username=stringutil.shortuser(commit.author),
1512 1531 file=messagefile,
1513 1532 encoding=b'utf-8',
1514 1533 )
1515 1534 try:
1516 1535 rev = self.commit_re.search(output).group(1)
1517 1536 except AttributeError:
1518 1537 if not files:
1519 1538 return parents[0] if parents else b'None'
1520 1539 self.ui.warn(_(b'unexpected svn output:\n'))
1521 1540 self.ui.warn(output)
1522 1541 raise error.Abort(_(b'unable to cope with svn output'))
1523 1542 if commit.rev:
1524 1543 self.run(
1525 1544 b'propset',
1526 1545 b'hg:convert-rev',
1527 1546 commit.rev,
1528 1547 revprop=True,
1529 1548 revision=rev,
1530 1549 )
1531 1550 if commit.branch and commit.branch != b'default':
1532 1551 self.run(
1533 1552 b'propset',
1534 1553 b'hg:convert-branch',
1535 1554 commit.branch,
1536 1555 revprop=True,
1537 1556 revision=rev,
1538 1557 )
1539 1558 for parent in parents:
1540 1559 self.addchild(parent, rev)
1541 1560 return self.revid(rev)
1542 1561 finally:
1543 1562 os.unlink(messagefile)
1544 1563
1545 1564 def puttags(self, tags):
1546 1565 self.ui.warn(_(b'writing Subversion tags is not yet implemented\n'))
1547 1566 return None, None
1548 1567
1549 1568 def hascommitfrommap(self, rev):
1550 1569 # We trust that revisions referenced in a map still is present
1551 1570 # TODO: implement something better if necessary and feasible
1552 1571 return True
1553 1572
1554 1573 def hascommitforsplicemap(self, rev):
1555 1574 # This is not correct as one can convert to an existing subversion
1556 1575 # repository and childmap would not list all revisions. Too bad.
1557 1576 if rev in self.childmap:
1558 1577 return True
1559 1578 raise error.Abort(
1560 1579 _(
1561 1580 b'splice map revision %s not found in subversion '
1562 1581 b'child map (revision lookups are not implemented)'
1563 1582 )
1564 1583 % rev
1565 1584 )
@@ -1,154 +1,174 b''
1 1 #require svn svn-bindings
2 2
3 3 $ cat >> $HGRCPATH <<EOF
4 4 > [extensions]
5 5 > convert =
6 6 > EOF
7 7
8 8 $ svnadmin create svn-repo
9 9 $ svnadmin load -q svn-repo < "$TESTDIR/svn/encoding.svndump"
10 10
11 11 Convert while testing all possible outputs
12 12
13 13 $ hg --debug convert svn-repo A-hg --config progress.debug=1
14 14 initializing destination A-hg repository
15 15 reparent to file:/*/$TESTTMP/svn-repo (glob)
16 16 run hg sink pre-conversion action
17 17 scanning source...
18 18 found trunk at 'trunk'
19 19 found tags at 'tags'
20 20 found branches at 'branches'
21 21 found branch branch\xc3\xa9 at 5 (esc)
22 22 found branch branch\xc3\xa9e at 6 (esc)
23 23 scanning: 1/4 revisions (25.00%)
24 24 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
25 25 fetching revision log for "/trunk" from 4 to 0
26 26 parsing revision 4 (2 changes)
27 27 parsing revision 3 (4 changes)
28 28 parsing revision 2 (3 changes)
29 29 parsing revision 1 (3 changes)
30 30 no copyfrom path, don't know what to do.
31 31 '/branches' is not under '/trunk', ignoring
32 32 '/tags' is not under '/trunk', ignoring
33 33 scanning: 2/4 revisions (50.00%)
34 34 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
35 35 fetching revision log for "/branches/branch\xc3\xa9" from 5 to 0 (esc)
36 36 parsing revision 5 (1 changes)
37 37 reparent to file:/*/$TESTTMP/svn-repo (glob)
38 38 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
39 39 found parent of branch /branches/branch\xc3\xa9 at 4: /trunk (esc)
40 40 scanning: 3/4 revisions (75.00%)
41 41 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
42 42 fetching revision log for "/branches/branch\xc3\xa9e" from 6 to 0 (esc)
43 43 parsing revision 6 (1 changes)
44 44 reparent to file:/*/$TESTTMP/svn-repo (glob)
45 45 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
46 46 found parent of branch /branches/branch\xc3\xa9e at 5: /branches/branch\xc3\xa9 (esc)
47 47 scanning: 4/4 revisions (100.00%)
48 48 scanning: 5/4 revisions (125.00%)
49 49 scanning: 6/4 revisions (150.00%)
50 50 sorting...
51 51 converting...
52 52 5 init projA
53 53 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@1
54 54 converting: 0/6 revisions (0.00%)
55 55 reusing manifest from p1 (no file change)
56 56 committing changelog
57 57 updating the branch cache
58 58 4 hello
59 59 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@2
60 60 converting: 1/6 revisions (16.67%)
61 61 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
62 62 scanning paths: /trunk/\xc3\xa0 0/3 paths (0.00%) (esc)
63 63 scanning paths: /trunk/\xc3\xa0/e\xcc\x81 1/3 paths (33.33%) (esc)
64 64 scanning paths: /trunk/\xc3\xa9 2/3 paths (66.67%) (esc)
65 65 committing files:
66 66 \xc3\xa0/e\xcc\x81 (esc)
67 67 getting files: \xc3\xa0/e\xcc\x81 1/2 files (50.00%) (esc)
68 68 \xc3\xa9 (esc)
69 69 getting files: \xc3\xa9 2/2 files (100.00%) (esc)
70 70 committing manifest
71 71 committing changelog
72 72 updating the branch cache
73 73 3 copy files
74 74 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@3
75 75 converting: 2/6 revisions (33.33%)
76 76 scanning paths: /trunk/\xc3\xa0 0/4 paths (0.00%) (esc)
77 77 gone from -1
78 78 reparent to file:/*/$TESTTMP/svn-repo (glob)
79 79 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
80 80 scanning paths: /trunk/\xc3\xa8 1/4 paths (25.00%) (esc)
81 81 copied to \xc3\xa8 from \xc3\xa9@2 (esc)
82 82 scanning paths: /trunk/\xc3\xa9 2/4 paths (50.00%) (esc)
83 83 gone from -1
84 84 reparent to file:/*/$TESTTMP/svn-repo (glob)
85 85 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
86 86 scanning paths: /trunk/\xc3\xb9 3/4 paths (75.00%) (esc)
87 87 mark /trunk/\xc3\xb9 came from \xc3\xa0:2 (esc)
88 88 getting files: \xc3\xa0/e\xcc\x81 1/4 files (25.00%) (esc)
89 89 getting files: \xc3\xa9 2/4 files (50.00%) (esc)
90 90 committing files:
91 91 \xc3\xa8 (esc)
92 92 getting files: \xc3\xa8 3/4 files (75.00%) (esc)
93 93 \xc3\xa8: copy \xc3\xa9:6b67ccefd5ce6de77e7ead4f5292843a0255329f (esc)
94 94 \xc3\xb9/e\xcc\x81 (esc)
95 95 getting files: \xc3\xb9/e\xcc\x81 4/4 files (100.00%) (esc)
96 96 \xc3\xb9/e\xcc\x81: copy \xc3\xa0/e\xcc\x81:a9092a3d84a37b9993b5c73576f6de29b7ea50f6 (esc)
97 97 committing manifest
98 98 committing changelog
99 99 updating the branch cache
100 100 2 remove files
101 101 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/trunk@4
102 102 converting: 3/6 revisions (50.00%)
103 103 scanning paths: /trunk/\xc3\xa8 0/2 paths (0.00%) (esc)
104 104 gone from -1
105 105 reparent to file:/*/$TESTTMP/svn-repo (glob)
106 106 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
107 107 scanning paths: /trunk/\xc3\xb9 1/2 paths (50.00%) (esc)
108 108 gone from -1
109 109 reparent to file:/*/$TESTTMP/svn-repo (glob)
110 110 reparent to file:/*/$TESTTMP/svn-repo/trunk (glob)
111 111 getting files: \xc3\xa8 1/2 files (50.00%) (esc)
112 112 getting files: \xc3\xb9/e\xcc\x81 2/2 files (100.00%) (esc)
113 113 committing files:
114 114 committing manifest
115 115 committing changelog
116 116 updating the branch cache
117 117 1 branch to branch?
118 118 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/branches/branch?@5
119 119 converting: 4/6 revisions (66.67%)
120 120 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9 (glob)
121 121 scanning paths: /branches/branch\xc3\xa9 0/1 paths (0.00%) (esc)
122 122 reusing manifest from p1 (no file change)
123 123 committing changelog
124 124 updating the branch cache
125 125 0 branch to branch?e
126 126 source: svn:afeb9c47-92ff-4c0c-9f72-e1f6eb8ac9af/branches/branch?e@6
127 127 converting: 5/6 revisions (83.33%)
128 128 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
129 129 scanning paths: /branches/branch\xc3\xa9e 0/1 paths (0.00%) (esc)
130 130 reusing manifest from p1 (no file change)
131 131 committing changelog
132 132 updating the branch cache
133 133 reparent to file:/*/$TESTTMP/svn-repo (glob)
134 134 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
135 135 reparent to file:/*/$TESTTMP/svn-repo (glob)
136 136 reparent to file:/*/$TESTTMP/svn-repo/branches/branch%C3%A9e (glob)
137 137 updating tags
138 138 committing files:
139 139 .hgtags
140 140 committing manifest
141 141 committing changelog
142 142 updating the branch cache
143 143 run hg sink post-conversion action
144 144 $ cd A-hg
145 145 $ hg up
146 146 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
147 147
148 148 Check tags are in UTF-8
149 149
150 150 $ cat .hgtags
151 151 e94e4422020e715add80525e8f0f46c9968689f1 branch\xc3\xa9e (esc)
152 152 f7e66f98380ed1e53a797c5c7a7a2616a7ab377d branch\xc3\xa9 (esc)
153 153
154 154 $ cd ..
155
156 #if py3
157 For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
158 bytes in a filename.
159
160 $ hg convert file:///%ff test
161 initializing destination test repository
162 on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
163 file:///%ff does not look like a CVS checkout
164 $TESTTMP/file:/%ff does not look like a Git repository
165 file:///%ff does not look like a Subversion repository
166 file:///%ff is not a local Mercurial repository
167 file:///%ff does not look like a darcs repository
168 file:///%ff does not look like a monotone repository
169 file:///%ff does not look like a GNU Arch repository
170 file:///%ff does not look like a Bazaar repository
171 file:///%ff does not look like a P4 repository
172 abort: file:///%ff: missing or unsupported repository
173 [255]
174 #endif
General Comments 0
You need to be logged in to leave comments. Login now