##// END OF EJS Templates
py3: pass regex as bytes
Manuel Jacob -
r45498:423e20c7 stable
parent child Browse files
Show More
@@ -1,1589 +1,1589 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 from __future__ import absolute_import
5 5
6 6 import os
7 7 import re
8 8 import xml.dom.minidom
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial.pycompat import open
12 12 from mercurial import (
13 13 encoding,
14 14 error,
15 15 pycompat,
16 16 util,
17 17 vfs as vfsmod,
18 18 )
19 19 from mercurial.utils import (
20 20 dateutil,
21 21 procutil,
22 22 stringutil,
23 23 )
24 24
25 25 from . import common
26 26
27 27 pickle = util.pickle
28 28 stringio = util.stringio
29 29 propertycache = util.propertycache
30 30 urlerr = util.urlerr
31 31 urlreq = util.urlreq
32 32
33 33 commandline = common.commandline
34 34 commit = common.commit
35 35 converter_sink = common.converter_sink
36 36 converter_source = common.converter_source
37 37 decodeargs = common.decodeargs
38 38 encodeargs = common.encodeargs
39 39 makedatetimestamp = common.makedatetimestamp
40 40 mapfile = common.mapfile
41 41 MissingTool = common.MissingTool
42 42 NoRepo = common.NoRepo
43 43
44 44 # Subversion stuff. Works best with very recent Python SVN bindings
45 45 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
46 46 # these bindings.
47 47
48 48 try:
49 49 import svn
50 50 import svn.client
51 51 import svn.core
52 52 import svn.ra
53 53 import svn.delta
54 54 from . import transport
55 55 import warnings
56 56
57 57 warnings.filterwarnings(
58 58 'ignore', module='svn.core', category=DeprecationWarning
59 59 )
60 60 svn.core.SubversionException # trigger import to catch error
61 61
62 62 except ImportError:
63 63 svn = None
64 64
65 65
66 66 class SvnPathNotFound(Exception):
67 67 pass
68 68
69 69
70 70 def revsplit(rev):
71 71 """Parse a revision string and return (uuid, path, revnum).
72 72 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
73 73 ... b'/proj%20B/mytrunk/mytrunk@1')
74 74 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
75 75 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
76 76 ('', '', 1)
77 77 >>> revsplit(b'@7')
78 78 ('', '', 7)
79 79 >>> revsplit(b'7')
80 80 ('', '', 0)
81 81 >>> revsplit(b'bad')
82 82 ('', '', 0)
83 83 """
84 84 parts = rev.rsplit(b'@', 1)
85 85 revnum = 0
86 86 if len(parts) > 1:
87 87 revnum = int(parts[1])
88 88 parts = parts[0].split(b'/', 1)
89 89 uuid = b''
90 90 mod = b''
91 91 if len(parts) > 1 and parts[0].startswith(b'svn:'):
92 92 uuid = parts[0][4:]
93 93 mod = b'/' + parts[1]
94 94 return uuid, mod, revnum
95 95
96 96
97 97 def quote(s):
98 98 # As of svn 1.7, many svn calls expect "canonical" paths. In
99 99 # theory, we should call svn.core.*canonicalize() on all paths
100 100 # before passing them to the API. Instead, we assume the base url
101 101 # is canonical and copy the behaviour of svn URL encoding function
102 102 # so we can extend it safely with new components. The "safe"
103 103 # characters were taken from the "svn_uri__char_validity" table in
104 104 # libsvn_subr/path.c.
105 105 return urlreq.quote(s, b"!$&'()*+,-./:=@_~")
106 106
107 107
108 108 def geturl(path):
109 109 try:
110 110 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
111 111 except svn.core.SubversionException:
112 112 # svn.client.url_from_path() fails with local repositories
113 113 pass
114 114 if os.path.isdir(path):
115 115 path = os.path.normpath(os.path.abspath(path))
116 116 if pycompat.iswindows:
117 117 path = b'/' + util.normpath(path)
118 118 # Module URL is later compared with the repository URL returned
119 119 # by svn API, which is UTF-8.
120 120 path = encoding.tolocal(path)
121 121 path = b'file://%s' % quote(path)
122 122 return svn.core.svn_path_canonicalize(path)
123 123
124 124
125 125 def optrev(number):
126 126 optrev = svn.core.svn_opt_revision_t()
127 127 optrev.kind = svn.core.svn_opt_revision_number
128 128 optrev.value.number = number
129 129 return optrev
130 130
131 131
132 132 class changedpath(object):
133 133 def __init__(self, p):
134 134 self.copyfrom_path = p.copyfrom_path
135 135 self.copyfrom_rev = p.copyfrom_rev
136 136 self.action = p.action
137 137
138 138
139 139 def get_log_child(
140 140 fp,
141 141 url,
142 142 paths,
143 143 start,
144 144 end,
145 145 limit=0,
146 146 discover_changed_paths=True,
147 147 strict_node_history=False,
148 148 ):
149 149 protocol = -1
150 150
151 151 def receiver(orig_paths, revnum, author, date, message, pool):
152 152 paths = {}
153 153 if orig_paths is not None:
154 154 for k, v in pycompat.iteritems(orig_paths):
155 155 paths[k] = changedpath(v)
156 156 pickle.dump((paths, revnum, author, date, message), fp, protocol)
157 157
158 158 try:
159 159 # Use an ra of our own so that our parent can consume
160 160 # our results without confusing the server.
161 161 t = transport.SvnRaTransport(url=url)
162 162 svn.ra.get_log(
163 163 t.ra,
164 164 paths,
165 165 start,
166 166 end,
167 167 limit,
168 168 discover_changed_paths,
169 169 strict_node_history,
170 170 receiver,
171 171 )
172 172 except IOError:
173 173 # Caller may interrupt the iteration
174 174 pickle.dump(None, fp, protocol)
175 175 except Exception as inst:
176 176 pickle.dump(stringutil.forcebytestr(inst), fp, protocol)
177 177 else:
178 178 pickle.dump(None, fp, protocol)
179 179 fp.flush()
180 180 # With large history, cleanup process goes crazy and suddenly
181 181 # consumes *huge* amount of memory. The output file being closed,
182 182 # there is no need for clean termination.
183 183 os._exit(0)
184 184
185 185
186 186 def debugsvnlog(ui, **opts):
187 187 """Fetch SVN log in a subprocess and channel them back to parent to
188 188 avoid memory collection issues.
189 189 """
190 190 if svn is None:
191 191 raise error.Abort(
192 192 _(b'debugsvnlog could not load Subversion python bindings')
193 193 )
194 194
195 195 args = decodeargs(ui.fin.read())
196 196 get_log_child(ui.fout, *args)
197 197
198 198
199 199 class logstream(object):
200 200 """Interruptible revision log iterator."""
201 201
202 202 def __init__(self, stdout):
203 203 self._stdout = stdout
204 204
205 205 def __iter__(self):
206 206 while True:
207 207 try:
208 208 entry = pickle.load(self._stdout)
209 209 except EOFError:
210 210 raise error.Abort(
211 211 _(
212 212 b'Mercurial failed to run itself, check'
213 213 b' hg executable is in PATH'
214 214 )
215 215 )
216 216 try:
217 217 orig_paths, revnum, author, date, message = entry
218 218 except (TypeError, ValueError):
219 219 if entry is None:
220 220 break
221 221 raise error.Abort(_(b"log stream exception '%s'") % entry)
222 222 yield entry
223 223
224 224 def close(self):
225 225 if self._stdout:
226 226 self._stdout.close()
227 227 self._stdout = None
228 228
229 229
230 230 class directlogstream(list):
231 231 """Direct revision log iterator.
232 232 This can be used for debugging and development but it will probably leak
233 233 memory and is not suitable for real conversions."""
234 234
235 235 def __init__(
236 236 self,
237 237 url,
238 238 paths,
239 239 start,
240 240 end,
241 241 limit=0,
242 242 discover_changed_paths=True,
243 243 strict_node_history=False,
244 244 ):
245 245 def receiver(orig_paths, revnum, author, date, message, pool):
246 246 paths = {}
247 247 if orig_paths is not None:
248 248 for k, v in pycompat.iteritems(orig_paths):
249 249 paths[k] = changedpath(v)
250 250 self.append((paths, revnum, author, date, message))
251 251
252 252 # Use an ra of our own so that our parent can consume
253 253 # our results without confusing the server.
254 254 t = transport.SvnRaTransport(url=url)
255 255 svn.ra.get_log(
256 256 t.ra,
257 257 paths,
258 258 start,
259 259 end,
260 260 limit,
261 261 discover_changed_paths,
262 262 strict_node_history,
263 263 receiver,
264 264 )
265 265
266 266 def close(self):
267 267 pass
268 268
269 269
270 270 # Check to see if the given path is a local Subversion repo. Verify this by
271 271 # looking for several svn-specific files and directories in the given
272 272 # directory.
273 273 def filecheck(ui, path, proto):
274 274 for x in (b'locks', b'hooks', b'format', b'db'):
275 275 if not os.path.exists(os.path.join(path, x)):
276 276 return False
277 277 return True
278 278
279 279
280 280 # Check to see if a given path is the root of an svn repo over http. We verify
281 281 # this by requesting a version-controlled URL we know can't exist and looking
282 282 # for the svn-specific "not found" XML.
283 283 def httpcheck(ui, path, proto):
284 284 try:
285 285 opener = urlreq.buildopener()
286 286 rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
287 287 data = rsp.read()
288 288 except urlerr.httperror as inst:
289 289 if inst.code != 404:
290 290 # Except for 404 we cannot know for sure this is not an svn repo
291 291 ui.warn(
292 292 _(
293 293 b'svn: cannot probe remote repository, assume it could '
294 294 b'be a subversion repository. Use --source-type if you '
295 295 b'know better.\n'
296 296 )
297 297 )
298 298 return True
299 299 data = inst.fp.read()
300 300 except Exception:
301 301 # Could be urlerr.urlerror if the URL is invalid or anything else.
302 302 return False
303 303 return b'<m:human-readable errcode="160013">' in data
304 304
305 305
306 306 protomap = {
307 307 b'http': httpcheck,
308 308 b'https': httpcheck,
309 309 b'file': filecheck,
310 310 }
311 311
312 312
313 313 def issvnurl(ui, url):
314 314 try:
315 315 proto, path = url.split(b'://', 1)
316 316 if proto == b'file':
317 317 if (
318 318 pycompat.iswindows
319 319 and path[:1] == b'/'
320 320 and path[1:2].isalpha()
321 321 and path[2:6].lower() == b'%3a/'
322 322 ):
323 323 path = path[:2] + b':/' + path[6:]
324 324 # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
325 325 # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
326 326 # py3 will decode percent-encoded bytes using the utf-8 encoding
327 327 # and the "replace" error handler. This means that it will not
328 328 # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
329 329 # url.open() uses the reverse function (urlreq.pathname2url()) and
330 330 # has a similar problem
331 331 # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
332 332 # sense to solve both problems together and handle all file URLs
333 333 # consistently. For now, we warn.
334 334 unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
335 335 if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
336 336 ui.warn(
337 337 _(
338 338 b'on Python 3, we currently do not support non-UTF-8 '
339 339 b'percent-encoded bytes in file URLs for Subversion '
340 340 b'repositories\n'
341 341 )
342 342 )
343 343 path = pycompat.fsencode(unicodepath)
344 344 except ValueError:
345 345 proto = b'file'
346 346 path = os.path.abspath(url)
347 347 if proto == b'file':
348 348 path = util.pconvert(path)
349 349 check = protomap.get(proto, lambda *args: False)
350 350 while b'/' in path:
351 351 if check(ui, path, proto):
352 352 return True
353 353 path = path.rsplit(b'/', 1)[0]
354 354 return False
355 355
356 356
357 357 # SVN conversion code stolen from bzr-svn and tailor
358 358 #
359 359 # Subversion looks like a versioned filesystem, branches structures
360 360 # are defined by conventions and not enforced by the tool. First,
361 361 # we define the potential branches (modules) as "trunk" and "branches"
362 362 # children directories. Revisions are then identified by their
363 363 # module and revision number (and a repository identifier).
364 364 #
365 365 # The revision graph is really a tree (or a forest). By default, a
366 366 # revision parent is the previous revision in the same module. If the
367 367 # module directory is copied/moved from another module then the
368 368 # revision is the module root and its parent the source revision in
369 369 # the parent module. A revision has at most one parent.
370 370 #
371 371 class svn_source(converter_source):
372 372 def __init__(self, ui, repotype, url, revs=None):
373 373 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
374 374
375 375 if not (
376 376 url.startswith(b'svn://')
377 377 or url.startswith(b'svn+ssh://')
378 378 or (
379 379 os.path.exists(url)
380 380 and os.path.exists(os.path.join(url, b'.svn'))
381 381 )
382 382 or issvnurl(ui, url)
383 383 ):
384 384 raise NoRepo(
385 385 _(b"%s does not look like a Subversion repository") % url
386 386 )
387 387 if svn is None:
388 388 raise MissingTool(_(b'could not load Subversion python bindings'))
389 389
390 390 try:
391 391 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
392 392 if version < (1, 4):
393 393 raise MissingTool(
394 394 _(
395 395 b'Subversion python bindings %d.%d found, '
396 396 b'1.4 or later required'
397 397 )
398 398 % version
399 399 )
400 400 except AttributeError:
401 401 raise MissingTool(
402 402 _(
403 403 b'Subversion python bindings are too old, 1.4 '
404 404 b'or later required'
405 405 )
406 406 )
407 407
408 408 self.lastrevs = {}
409 409
410 410 latest = None
411 411 try:
412 412 # Support file://path@rev syntax. Useful e.g. to convert
413 413 # deleted branches.
414 414 at = url.rfind(b'@')
415 415 if at >= 0:
416 416 latest = int(url[at + 1 :])
417 417 url = url[:at]
418 418 except ValueError:
419 419 pass
420 420 self.url = geturl(url)
421 421 self.encoding = b'UTF-8' # Subversion is always nominal UTF-8
422 422 try:
423 423 self.transport = transport.SvnRaTransport(url=self.url)
424 424 self.ra = self.transport.ra
425 425 self.ctx = self.transport.client
426 426 self.baseurl = svn.ra.get_repos_root(self.ra)
427 427 # Module is either empty or a repository path starting with
428 428 # a slash and not ending with a slash.
429 429 self.module = urlreq.unquote(self.url[len(self.baseurl) :])
430 430 self.prevmodule = None
431 431 self.rootmodule = self.module
432 432 self.commits = {}
433 433 self.paths = {}
434 434 self.uuid = svn.ra.get_uuid(self.ra)
435 435 except svn.core.SubversionException:
436 436 ui.traceback()
437 437 svnversion = b'%d.%d.%d' % (
438 438 svn.core.SVN_VER_MAJOR,
439 439 svn.core.SVN_VER_MINOR,
440 440 svn.core.SVN_VER_MICRO,
441 441 )
442 442 raise NoRepo(
443 443 _(
444 444 b"%s does not look like a Subversion repository "
445 445 b"to libsvn version %s"
446 446 )
447 447 % (self.url, svnversion)
448 448 )
449 449
450 450 if revs:
451 451 if len(revs) > 1:
452 452 raise error.Abort(
453 453 _(
454 454 b'subversion source does not support '
455 455 b'specifying multiple revisions'
456 456 )
457 457 )
458 458 try:
459 459 latest = int(revs[0])
460 460 except ValueError:
461 461 raise error.Abort(
462 462 _(b'svn: revision %s is not an integer') % revs[0]
463 463 )
464 464
465 465 trunkcfg = self.ui.config(b'convert', b'svn.trunk')
466 466 if trunkcfg is None:
467 467 trunkcfg = b'trunk'
468 468 self.trunkname = trunkcfg.strip(b'/')
469 469 self.startrev = self.ui.config(b'convert', b'svn.startrev')
470 470 try:
471 471 self.startrev = int(self.startrev)
472 472 if self.startrev < 0:
473 473 self.startrev = 0
474 474 except ValueError:
475 475 raise error.Abort(
476 476 _(b'svn: start revision %s is not an integer') % self.startrev
477 477 )
478 478
479 479 try:
480 480 self.head = self.latest(self.module, latest)
481 481 except SvnPathNotFound:
482 482 self.head = None
483 483 if not self.head:
484 484 raise error.Abort(
485 485 _(b'no revision found in module %s') % self.module
486 486 )
487 487 self.last_changed = self.revnum(self.head)
488 488
489 489 self._changescache = (None, None)
490 490
491 491 if os.path.exists(os.path.join(url, b'.svn/entries')):
492 492 self.wc = url
493 493 else:
494 494 self.wc = None
495 495 self.convertfp = None
496 496
497 497 def setrevmap(self, revmap):
498 498 lastrevs = {}
499 499 for revid in revmap:
500 500 uuid, module, revnum = revsplit(revid)
501 501 lastrevnum = lastrevs.setdefault(module, revnum)
502 502 if revnum > lastrevnum:
503 503 lastrevs[module] = revnum
504 504 self.lastrevs = lastrevs
505 505
506 506 def exists(self, path, optrev):
507 507 try:
508 508 svn.client.ls(
509 509 self.url.rstrip(b'/') + b'/' + quote(path),
510 510 optrev,
511 511 False,
512 512 self.ctx,
513 513 )
514 514 return True
515 515 except svn.core.SubversionException:
516 516 return False
517 517
518 518 def getheads(self):
519 519 def isdir(path, revnum):
520 520 kind = self._checkpath(path, revnum)
521 521 return kind == svn.core.svn_node_dir
522 522
523 523 def getcfgpath(name, rev):
524 524 cfgpath = self.ui.config(b'convert', b'svn.' + name)
525 525 if cfgpath is not None and cfgpath.strip() == b'':
526 526 return None
527 527 path = (cfgpath or name).strip(b'/')
528 528 if not self.exists(path, rev):
529 529 if self.module.endswith(path) and name == b'trunk':
530 530 # we are converting from inside this directory
531 531 return None
532 532 if cfgpath:
533 533 raise error.Abort(
534 534 _(b'expected %s to be at %r, but not found')
535 535 % (name, path)
536 536 )
537 537 return None
538 538 self.ui.note(
539 539 _(b'found %s at %r\n') % (name, pycompat.bytestr(path))
540 540 )
541 541 return path
542 542
543 543 rev = optrev(self.last_changed)
544 544 oldmodule = b''
545 545 trunk = getcfgpath(b'trunk', rev)
546 546 self.tags = getcfgpath(b'tags', rev)
547 547 branches = getcfgpath(b'branches', rev)
548 548
549 549 # If the project has a trunk or branches, we will extract heads
550 550 # from them. We keep the project root otherwise.
551 551 if trunk:
552 552 oldmodule = self.module or b''
553 553 self.module += b'/' + trunk
554 554 self.head = self.latest(self.module, self.last_changed)
555 555 if not self.head:
556 556 raise error.Abort(
557 557 _(b'no revision found in module %s') % self.module
558 558 )
559 559
560 560 # First head in the list is the module's head
561 561 self.heads = [self.head]
562 562 if self.tags is not None:
563 563 self.tags = b'%s/%s' % (oldmodule, (self.tags or b'tags'))
564 564
565 565 # Check if branches bring a few more heads to the list
566 566 if branches:
567 567 rpath = self.url.strip(b'/')
568 568 branchnames = svn.client.ls(
569 569 rpath + b'/' + quote(branches), rev, False, self.ctx
570 570 )
571 571 for branch in sorted(branchnames):
572 572 module = b'%s/%s/%s' % (oldmodule, branches, branch)
573 573 if not isdir(module, self.last_changed):
574 574 continue
575 575 brevid = self.latest(module, self.last_changed)
576 576 if not brevid:
577 577 self.ui.note(_(b'ignoring empty branch %s\n') % branch)
578 578 continue
579 579 self.ui.note(
580 580 _(b'found branch %s at %d\n')
581 581 % (branch, self.revnum(brevid))
582 582 )
583 583 self.heads.append(brevid)
584 584
585 585 if self.startrev and self.heads:
586 586 if len(self.heads) > 1:
587 587 raise error.Abort(
588 588 _(
589 589 b'svn: start revision is not supported '
590 590 b'with more than one branch'
591 591 )
592 592 )
593 593 revnum = self.revnum(self.heads[0])
594 594 if revnum < self.startrev:
595 595 raise error.Abort(
596 596 _(b'svn: no revision found after start revision %d')
597 597 % self.startrev
598 598 )
599 599
600 600 return self.heads
601 601
602 602 def _getchanges(self, rev, full):
603 603 (paths, parents) = self.paths[rev]
604 604 copies = {}
605 605 if parents:
606 606 files, self.removed, copies = self.expandpaths(rev, paths, parents)
607 607 if full or not parents:
608 608 # Perform a full checkout on roots
609 609 uuid, module, revnum = revsplit(rev)
610 610 entries = svn.client.ls(
611 611 self.baseurl + quote(module), optrev(revnum), True, self.ctx
612 612 )
613 613 files = [
614 614 n
615 615 for n, e in pycompat.iteritems(entries)
616 616 if e.kind == svn.core.svn_node_file
617 617 ]
618 618 self.removed = set()
619 619
620 620 files.sort()
621 621 files = pycompat.ziplist(files, [rev] * len(files))
622 622 return (files, copies)
623 623
624 624 def getchanges(self, rev, full):
625 625 # reuse cache from getchangedfiles
626 626 if self._changescache[0] == rev and not full:
627 627 (files, copies) = self._changescache[1]
628 628 else:
629 629 (files, copies) = self._getchanges(rev, full)
630 630 # caller caches the result, so free it here to release memory
631 631 del self.paths[rev]
632 632 return (files, copies, set())
633 633
634 634 def getchangedfiles(self, rev, i):
635 635 # called from filemap - cache computed values for reuse in getchanges
636 636 (files, copies) = self._getchanges(rev, False)
637 637 self._changescache = (rev, (files, copies))
638 638 return [f[0] for f in files]
639 639
640 640 def getcommit(self, rev):
641 641 if rev not in self.commits:
642 642 uuid, module, revnum = revsplit(rev)
643 643 self.module = module
644 644 self.reparent(module)
645 645 # We assume that:
646 646 # - requests for revisions after "stop" come from the
647 647 # revision graph backward traversal. Cache all of them
648 648 # down to stop, they will be used eventually.
649 649 # - requests for revisions before "stop" come to get
650 650 # isolated branches parents. Just fetch what is needed.
651 651 stop = self.lastrevs.get(module, 0)
652 652 if revnum < stop:
653 653 stop = revnum + 1
654 654 self._fetch_revisions(revnum, stop)
655 655 if rev not in self.commits:
656 656 raise error.Abort(_(b'svn: revision %s not found') % revnum)
657 657 revcommit = self.commits[rev]
658 658 # caller caches the result, so free it here to release memory
659 659 del self.commits[rev]
660 660 return revcommit
661 661
662 662 def checkrevformat(self, revstr, mapname=b'splicemap'):
663 663 """ fails if revision format does not match the correct format"""
664 664 if not re.match(
665 r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
666 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
667 r'{12,12}(.*)@[0-9]+$',
665 br'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
666 br'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
667 br'{12,12}(.*)@[0-9]+$',
668 668 revstr,
669 669 ):
670 670 raise error.Abort(
671 671 _(b'%s entry %s is not a valid revision identifier')
672 672 % (mapname, revstr)
673 673 )
674 674
675 675 def numcommits(self):
676 676 return int(self.head.rsplit(b'@', 1)[1]) - self.startrev
677 677
678 678 def gettags(self):
679 679 tags = {}
680 680 if self.tags is None:
681 681 return tags
682 682
683 683 # svn tags are just a convention, project branches left in a
684 684 # 'tags' directory. There is no other relationship than
685 685 # ancestry, which is expensive to discover and makes them hard
686 686 # to update incrementally. Worse, past revisions may be
687 687 # referenced by tags far away in the future, requiring a deep
688 688 # history traversal on every calculation. Current code
689 689 # performs a single backward traversal, tracking moves within
690 690 # the tags directory (tag renaming) and recording a new tag
691 691 # everytime a project is copied from outside the tags
692 692 # directory. It also lists deleted tags, this behaviour may
693 693 # change in the future.
694 694 pendings = []
695 695 tagspath = self.tags
696 696 start = svn.ra.get_latest_revnum(self.ra)
697 697 stream = self._getlog([self.tags], start, self.startrev)
698 698 try:
699 699 for entry in stream:
700 700 origpaths, revnum, author, date, message = entry
701 701 if not origpaths:
702 702 origpaths = []
703 703 copies = [
704 704 (e.copyfrom_path, e.copyfrom_rev, p)
705 705 for p, e in pycompat.iteritems(origpaths)
706 706 if e.copyfrom_path
707 707 ]
708 708 # Apply moves/copies from more specific to general
709 709 copies.sort(reverse=True)
710 710
711 711 srctagspath = tagspath
712 712 if copies and copies[-1][2] == tagspath:
713 713 # Track tags directory moves
714 714 srctagspath = copies.pop()[0]
715 715
716 716 for source, sourcerev, dest in copies:
717 717 if not dest.startswith(tagspath + b'/'):
718 718 continue
719 719 for tag in pendings:
720 720 if tag[0].startswith(dest):
721 721 tagpath = source + tag[0][len(dest) :]
722 722 tag[:2] = [tagpath, sourcerev]
723 723 break
724 724 else:
725 725 pendings.append([source, sourcerev, dest])
726 726
727 727 # Filter out tags with children coming from different
728 728 # parts of the repository like:
729 729 # /tags/tag.1 (from /trunk:10)
730 730 # /tags/tag.1/foo (from /branches/foo:12)
731 731 # Here/tags/tag.1 discarded as well as its children.
732 732 # It happens with tools like cvs2svn. Such tags cannot
733 733 # be represented in mercurial.
734 734 addeds = {
735 735 p: e.copyfrom_path
736 736 for p, e in pycompat.iteritems(origpaths)
737 737 if e.action == b'A' and e.copyfrom_path
738 738 }
739 739 badroots = set()
740 740 for destroot in addeds:
741 741 for source, sourcerev, dest in pendings:
742 742 if not dest.startswith(
743 743 destroot + b'/'
744 744 ) or source.startswith(addeds[destroot] + b'/'):
745 745 continue
746 746 badroots.add(destroot)
747 747 break
748 748
749 749 for badroot in badroots:
750 750 pendings = [
751 751 p
752 752 for p in pendings
753 753 if p[2] != badroot
754 754 and not p[2].startswith(badroot + b'/')
755 755 ]
756 756
757 757 # Tell tag renamings from tag creations
758 758 renamings = []
759 759 for source, sourcerev, dest in pendings:
760 760 tagname = dest.split(b'/')[-1]
761 761 if source.startswith(srctagspath):
762 762 renamings.append([source, sourcerev, tagname])
763 763 continue
764 764 if tagname in tags:
765 765 # Keep the latest tag value
766 766 continue
767 767 # From revision may be fake, get one with changes
768 768 try:
769 769 tagid = self.latest(source, sourcerev)
770 770 if tagid and tagname not in tags:
771 771 tags[tagname] = tagid
772 772 except SvnPathNotFound:
773 773 # It happens when we are following directories
774 774 # we assumed were copied with their parents
775 775 # but were really created in the tag
776 776 # directory.
777 777 pass
778 778 pendings = renamings
779 779 tagspath = srctagspath
780 780 finally:
781 781 stream.close()
782 782 return tags
783 783
784 784 def converted(self, rev, destrev):
785 785 if not self.wc:
786 786 return
787 787 if self.convertfp is None:
788 788 self.convertfp = open(
789 789 os.path.join(self.wc, b'.svn', b'hg-shamap'), b'ab'
790 790 )
791 791 self.convertfp.write(
792 792 util.tonativeeol(b'%s %d\n' % (destrev, self.revnum(rev)))
793 793 )
794 794 self.convertfp.flush()
795 795
796 796 def revid(self, revnum, module=None):
797 797 return b'svn:%s%s@%d' % (self.uuid, module or self.module, revnum)
798 798
799 799 def revnum(self, rev):
800 800 return int(rev.split(b'@')[-1])
801 801
802 802 def latest(self, path, stop=None):
803 803 """Find the latest revid affecting path, up to stop revision
804 804 number. If stop is None, default to repository latest
805 805 revision. It may return a revision in a different module,
806 806 since a branch may be moved without a change being
807 807 reported. Return None if computed module does not belong to
808 808 rootmodule subtree.
809 809 """
810 810
811 811 def findchanges(path, start, stop=None):
812 812 stream = self._getlog([path], start, stop or 1)
813 813 try:
814 814 for entry in stream:
815 815 paths, revnum, author, date, message = entry
816 816 if stop is None and paths:
817 817 # We do not know the latest changed revision,
818 818 # keep the first one with changed paths.
819 819 break
820 820 if revnum <= stop:
821 821 break
822 822
823 823 for p in paths:
824 824 if not path.startswith(p) or not paths[p].copyfrom_path:
825 825 continue
826 826 newpath = paths[p].copyfrom_path + path[len(p) :]
827 827 self.ui.debug(
828 828 b"branch renamed from %s to %s at %d\n"
829 829 % (path, newpath, revnum)
830 830 )
831 831 path = newpath
832 832 break
833 833 if not paths:
834 834 revnum = None
835 835 return revnum, path
836 836 finally:
837 837 stream.close()
838 838
839 839 if not path.startswith(self.rootmodule):
840 840 # Requests on foreign branches may be forbidden at server level
841 841 self.ui.debug(b'ignoring foreign branch %r\n' % path)
842 842 return None
843 843
844 844 if stop is None:
845 845 stop = svn.ra.get_latest_revnum(self.ra)
846 846 try:
847 847 prevmodule = self.reparent(b'')
848 848 dirent = svn.ra.stat(self.ra, path.strip(b'/'), stop)
849 849 self.reparent(prevmodule)
850 850 except svn.core.SubversionException:
851 851 dirent = None
852 852 if not dirent:
853 853 raise SvnPathNotFound(
854 854 _(b'%s not found up to revision %d') % (path, stop)
855 855 )
856 856
857 857 # stat() gives us the previous revision on this line of
858 858 # development, but it might be in *another module*. Fetch the
859 859 # log and detect renames down to the latest revision.
860 860 revnum, realpath = findchanges(path, stop, dirent.created_rev)
861 861 if revnum is None:
862 862 # Tools like svnsync can create empty revision, when
863 863 # synchronizing only a subtree for instance. These empty
864 864 # revisions created_rev still have their original values
865 865 # despite all changes having disappeared and can be
866 866 # returned by ra.stat(), at least when stating the root
867 867 # module. In that case, do not trust created_rev and scan
868 868 # the whole history.
869 869 revnum, realpath = findchanges(path, stop)
870 870 if revnum is None:
871 871 self.ui.debug(b'ignoring empty branch %r\n' % realpath)
872 872 return None
873 873
874 874 if not realpath.startswith(self.rootmodule):
875 875 self.ui.debug(b'ignoring foreign branch %r\n' % realpath)
876 876 return None
877 877 return self.revid(revnum, realpath)
878 878
879 879 def reparent(self, module):
880 880 """Reparent the svn transport and return the previous parent."""
881 881 if self.prevmodule == module:
882 882 return module
883 883 svnurl = self.baseurl + quote(module)
884 884 prevmodule = self.prevmodule
885 885 if prevmodule is None:
886 886 prevmodule = b''
887 887 self.ui.debug(b"reparent to %s\n" % svnurl)
888 888 svn.ra.reparent(self.ra, svnurl)
889 889 self.prevmodule = module
890 890 return prevmodule
891 891
892 892 def expandpaths(self, rev, paths, parents):
893 893 changed, removed = set(), set()
894 894 copies = {}
895 895
896 896 new_module, revnum = revsplit(rev)[1:]
897 897 if new_module != self.module:
898 898 self.module = new_module
899 899 self.reparent(self.module)
900 900
901 901 progress = self.ui.makeprogress(
902 902 _(b'scanning paths'), unit=_(b'paths'), total=len(paths)
903 903 )
904 904 for i, (path, ent) in enumerate(paths):
905 905 progress.update(i, item=path)
906 906 entrypath = self.getrelpath(path)
907 907
908 908 kind = self._checkpath(entrypath, revnum)
909 909 if kind == svn.core.svn_node_file:
910 910 changed.add(self.recode(entrypath))
911 911 if not ent.copyfrom_path or not parents:
912 912 continue
913 913 # Copy sources not in parent revisions cannot be
914 914 # represented, ignore their origin for now
915 915 pmodule, prevnum = revsplit(parents[0])[1:]
916 916 if ent.copyfrom_rev < prevnum:
917 917 continue
918 918 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
919 919 if not copyfrom_path:
920 920 continue
921 921 self.ui.debug(
922 922 b"copied to %s from %s@%d\n"
923 923 % (entrypath, copyfrom_path, ent.copyfrom_rev)
924 924 )
925 925 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
926 926 elif kind == 0: # gone, but had better be a deleted *file*
927 927 self.ui.debug(b"gone from %d\n" % ent.copyfrom_rev)
928 928 pmodule, prevnum = revsplit(parents[0])[1:]
929 929 parentpath = pmodule + b"/" + entrypath
930 930 fromkind = self._checkpath(entrypath, prevnum, pmodule)
931 931
932 932 if fromkind == svn.core.svn_node_file:
933 933 removed.add(self.recode(entrypath))
934 934 elif fromkind == svn.core.svn_node_dir:
935 935 oroot = parentpath.strip(b'/')
936 936 nroot = path.strip(b'/')
937 937 children = self._iterfiles(oroot, prevnum)
938 938 for childpath in children:
939 939 childpath = childpath.replace(oroot, nroot)
940 940 childpath = self.getrelpath(b"/" + childpath, pmodule)
941 941 if childpath:
942 942 removed.add(self.recode(childpath))
943 943 else:
944 944 self.ui.debug(
945 945 b'unknown path in revision %d: %s\n' % (revnum, path)
946 946 )
947 947 elif kind == svn.core.svn_node_dir:
948 948 if ent.action == b'M':
949 949 # If the directory just had a prop change,
950 950 # then we shouldn't need to look for its children.
951 951 continue
952 952 if ent.action == b'R' and parents:
953 953 # If a directory is replacing a file, mark the previous
954 954 # file as deleted
955 955 pmodule, prevnum = revsplit(parents[0])[1:]
956 956 pkind = self._checkpath(entrypath, prevnum, pmodule)
957 957 if pkind == svn.core.svn_node_file:
958 958 removed.add(self.recode(entrypath))
959 959 elif pkind == svn.core.svn_node_dir:
960 960 # We do not know what files were kept or removed,
961 961 # mark them all as changed.
962 962 for childpath in self._iterfiles(pmodule, prevnum):
963 963 childpath = self.getrelpath(b"/" + childpath)
964 964 if childpath:
965 965 changed.add(self.recode(childpath))
966 966
967 967 for childpath in self._iterfiles(path, revnum):
968 968 childpath = self.getrelpath(b"/" + childpath)
969 969 if childpath:
970 970 changed.add(self.recode(childpath))
971 971
972 972 # Handle directory copies
973 973 if not ent.copyfrom_path or not parents:
974 974 continue
975 975 # Copy sources not in parent revisions cannot be
976 976 # represented, ignore their origin for now
977 977 pmodule, prevnum = revsplit(parents[0])[1:]
978 978 if ent.copyfrom_rev < prevnum:
979 979 continue
980 980 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
981 981 if not copyfrompath:
982 982 continue
983 983 self.ui.debug(
984 984 b"mark %s came from %s:%d\n"
985 985 % (path, copyfrompath, ent.copyfrom_rev)
986 986 )
987 987 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
988 988 for childpath in children:
989 989 childpath = self.getrelpath(b"/" + childpath, pmodule)
990 990 if not childpath:
991 991 continue
992 992 copytopath = path + childpath[len(copyfrompath) :]
993 993 copytopath = self.getrelpath(copytopath)
994 994 copies[self.recode(copytopath)] = self.recode(childpath)
995 995
996 996 progress.complete()
997 997 changed.update(removed)
998 998 return (list(changed), removed, copies)
999 999
1000 1000 def _fetch_revisions(self, from_revnum, to_revnum):
1001 1001 if from_revnum < to_revnum:
1002 1002 from_revnum, to_revnum = to_revnum, from_revnum
1003 1003
1004 1004 self.child_cset = None
1005 1005
1006 1006 def parselogentry(orig_paths, revnum, author, date, message):
1007 1007 """Return the parsed commit object or None, and True if
1008 1008 the revision is a branch root.
1009 1009 """
1010 1010 self.ui.debug(
1011 1011 b"parsing revision %d (%d changes)\n"
1012 1012 % (revnum, len(orig_paths))
1013 1013 )
1014 1014
1015 1015 branched = False
1016 1016 rev = self.revid(revnum)
1017 1017 # branch log might return entries for a parent we already have
1018 1018
1019 1019 if rev in self.commits or revnum < to_revnum:
1020 1020 return None, branched
1021 1021
1022 1022 parents = []
1023 1023 # check whether this revision is the start of a branch or part
1024 1024 # of a branch renaming
1025 1025 orig_paths = sorted(pycompat.iteritems(orig_paths))
1026 1026 root_paths = [
1027 1027 (p, e) for p, e in orig_paths if self.module.startswith(p)
1028 1028 ]
1029 1029 if root_paths:
1030 1030 path, ent = root_paths[-1]
1031 1031 if ent.copyfrom_path:
1032 1032 branched = True
1033 1033 newpath = ent.copyfrom_path + self.module[len(path) :]
1034 1034 # ent.copyfrom_rev may not be the actual last revision
1035 1035 previd = self.latest(newpath, ent.copyfrom_rev)
1036 1036 if previd is not None:
1037 1037 prevmodule, prevnum = revsplit(previd)[1:]
1038 1038 if prevnum >= self.startrev:
1039 1039 parents = [previd]
1040 1040 self.ui.note(
1041 1041 _(b'found parent of branch %s at %d: %s\n')
1042 1042 % (self.module, prevnum, prevmodule)
1043 1043 )
1044 1044 else:
1045 1045 self.ui.debug(b"no copyfrom path, don't know what to do.\n")
1046 1046
1047 1047 paths = []
1048 1048 # filter out unrelated paths
1049 1049 for path, ent in orig_paths:
1050 1050 if self.getrelpath(path) is None:
1051 1051 continue
1052 1052 paths.append((path, ent))
1053 1053
1054 1054 # Example SVN datetime. Includes microseconds.
1055 1055 # ISO-8601 conformant
1056 1056 # '2007-01-04T17:35:00.902377Z'
1057 1057 date = dateutil.parsedate(
1058 1058 date[:19] + b" UTC", [b"%Y-%m-%dT%H:%M:%S"]
1059 1059 )
1060 1060 if self.ui.configbool(b'convert', b'localtimezone'):
1061 1061 date = makedatetimestamp(date[0])
1062 1062
1063 1063 if message:
1064 1064 log = self.recode(message)
1065 1065 else:
1066 1066 log = b''
1067 1067
1068 1068 if author:
1069 1069 author = self.recode(author)
1070 1070 else:
1071 1071 author = b''
1072 1072
1073 1073 try:
1074 1074 branch = self.module.split(b"/")[-1]
1075 1075 if branch == self.trunkname:
1076 1076 branch = None
1077 1077 except IndexError:
1078 1078 branch = None
1079 1079
1080 1080 cset = commit(
1081 1081 author=author,
1082 1082 date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
1083 1083 desc=log,
1084 1084 parents=parents,
1085 1085 branch=branch,
1086 1086 rev=rev,
1087 1087 )
1088 1088
1089 1089 self.commits[rev] = cset
1090 1090 # The parents list is *shared* among self.paths and the
1091 1091 # commit object. Both will be updated below.
1092 1092 self.paths[rev] = (paths, cset.parents)
1093 1093 if self.child_cset and not self.child_cset.parents:
1094 1094 self.child_cset.parents[:] = [rev]
1095 1095 self.child_cset = cset
1096 1096 return cset, branched
1097 1097
1098 1098 self.ui.note(
1099 1099 _(b'fetching revision log for "%s" from %d to %d\n')
1100 1100 % (self.module, from_revnum, to_revnum)
1101 1101 )
1102 1102
1103 1103 try:
1104 1104 firstcset = None
1105 1105 lastonbranch = False
1106 1106 stream = self._getlog([self.module], from_revnum, to_revnum)
1107 1107 try:
1108 1108 for entry in stream:
1109 1109 paths, revnum, author, date, message = entry
1110 1110 if revnum < self.startrev:
1111 1111 lastonbranch = True
1112 1112 break
1113 1113 if not paths:
1114 1114 self.ui.debug(b'revision %d has no entries\n' % revnum)
1115 1115 # If we ever leave the loop on an empty
1116 1116 # revision, do not try to get a parent branch
1117 1117 lastonbranch = lastonbranch or revnum == 0
1118 1118 continue
1119 1119 cset, lastonbranch = parselogentry(
1120 1120 paths, revnum, author, date, message
1121 1121 )
1122 1122 if cset:
1123 1123 firstcset = cset
1124 1124 if lastonbranch:
1125 1125 break
1126 1126 finally:
1127 1127 stream.close()
1128 1128
1129 1129 if not lastonbranch and firstcset and not firstcset.parents:
1130 1130 # The first revision of the sequence (the last fetched one)
1131 1131 # has invalid parents if not a branch root. Find the parent
1132 1132 # revision now, if any.
1133 1133 try:
1134 1134 firstrevnum = self.revnum(firstcset.rev)
1135 1135 if firstrevnum > 1:
1136 1136 latest = self.latest(self.module, firstrevnum - 1)
1137 1137 if latest:
1138 1138 firstcset.parents.append(latest)
1139 1139 except SvnPathNotFound:
1140 1140 pass
1141 1141 except svn.core.SubversionException as xxx_todo_changeme:
1142 1142 (inst, num) = xxx_todo_changeme.args
1143 1143 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
1144 1144 raise error.Abort(
1145 1145 _(b'svn: branch has no revision %s') % to_revnum
1146 1146 )
1147 1147 raise
1148 1148
1149 1149 def getfile(self, file, rev):
1150 1150 # TODO: ra.get_file transmits the whole file instead of diffs.
1151 1151 if file in self.removed:
1152 1152 return None, None
1153 1153 try:
1154 1154 new_module, revnum = revsplit(rev)[1:]
1155 1155 if self.module != new_module:
1156 1156 self.module = new_module
1157 1157 self.reparent(self.module)
1158 1158 io = stringio()
1159 1159 info = svn.ra.get_file(self.ra, file, revnum, io)
1160 1160 data = io.getvalue()
1161 1161 # ra.get_file() seems to keep a reference on the input buffer
1162 1162 # preventing collection. Release it explicitly.
1163 1163 io.close()
1164 1164 if isinstance(info, list):
1165 1165 info = info[-1]
1166 1166 mode = (b"svn:executable" in info) and b'x' or b''
1167 1167 mode = (b"svn:special" in info) and b'l' or mode
1168 1168 except svn.core.SubversionException as e:
1169 1169 notfound = (
1170 1170 svn.core.SVN_ERR_FS_NOT_FOUND,
1171 1171 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND,
1172 1172 )
1173 1173 if e.apr_err in notfound: # File not found
1174 1174 return None, None
1175 1175 raise
1176 1176 if mode == b'l':
1177 1177 link_prefix = b"link "
1178 1178 if data.startswith(link_prefix):
1179 1179 data = data[len(link_prefix) :]
1180 1180 return data, mode
1181 1181
1182 1182 def _iterfiles(self, path, revnum):
1183 1183 """Enumerate all files in path at revnum, recursively."""
1184 1184 path = path.strip(b'/')
1185 1185 pool = svn.core.Pool()
1186 1186 rpath = b'/'.join([self.baseurl, quote(path)]).strip(b'/')
1187 1187 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1188 1188 if path:
1189 1189 path += b'/'
1190 1190 return (
1191 1191 (path + p)
1192 1192 for p, e in pycompat.iteritems(entries)
1193 1193 if e.kind == svn.core.svn_node_file
1194 1194 )
1195 1195
1196 1196 def getrelpath(self, path, module=None):
1197 1197 if module is None:
1198 1198 module = self.module
1199 1199 # Given the repository url of this wc, say
1200 1200 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1201 1201 # extract the "entry" portion (a relative path) from what
1202 1202 # svn log --xml says, i.e.
1203 1203 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1204 1204 # that is to say "tests/PloneTestCase.py"
1205 1205 if path.startswith(module):
1206 1206 relative = path.rstrip(b'/')[len(module) :]
1207 1207 if relative.startswith(b'/'):
1208 1208 return relative[1:]
1209 1209 elif relative == b'':
1210 1210 return relative
1211 1211
1212 1212 # The path is outside our tracked tree...
1213 1213 self.ui.debug(
1214 1214 b'%r is not under %r, ignoring\n'
1215 1215 % (pycompat.bytestr(path), pycompat.bytestr(module))
1216 1216 )
1217 1217 return None
1218 1218
1219 1219 def _checkpath(self, path, revnum, module=None):
1220 1220 if module is not None:
1221 1221 prevmodule = self.reparent(b'')
1222 1222 path = module + b'/' + path
1223 1223 try:
1224 1224 # ra.check_path does not like leading slashes very much, it leads
1225 1225 # to PROPFIND subversion errors
1226 1226 return svn.ra.check_path(self.ra, path.strip(b'/'), revnum)
1227 1227 finally:
1228 1228 if module is not None:
1229 1229 self.reparent(prevmodule)
1230 1230
1231 1231 def _getlog(
1232 1232 self,
1233 1233 paths,
1234 1234 start,
1235 1235 end,
1236 1236 limit=0,
1237 1237 discover_changed_paths=True,
1238 1238 strict_node_history=False,
1239 1239 ):
1240 1240 # Normalize path names, svn >= 1.5 only wants paths relative to
1241 1241 # supplied URL
1242 1242 relpaths = []
1243 1243 for p in paths:
1244 1244 if not p.startswith(b'/'):
1245 1245 p = self.module + b'/' + p
1246 1246 relpaths.append(p.strip(b'/'))
1247 1247 args = [
1248 1248 self.baseurl,
1249 1249 relpaths,
1250 1250 start,
1251 1251 end,
1252 1252 limit,
1253 1253 discover_changed_paths,
1254 1254 strict_node_history,
1255 1255 ]
1256 1256 # developer config: convert.svn.debugsvnlog
1257 1257 if not self.ui.configbool(b'convert', b'svn.debugsvnlog'):
1258 1258 return directlogstream(*args)
1259 1259 arg = encodeargs(args)
1260 1260 hgexe = procutil.hgexecutable()
1261 1261 cmd = b'%s debugsvnlog' % procutil.shellquote(hgexe)
1262 1262 stdin, stdout = procutil.popen2(procutil.quotecommand(cmd))
1263 1263 stdin.write(arg)
1264 1264 try:
1265 1265 stdin.close()
1266 1266 except IOError:
1267 1267 raise error.Abort(
1268 1268 _(
1269 1269 b'Mercurial failed to run itself, check'
1270 1270 b' hg executable is in PATH'
1271 1271 )
1272 1272 )
1273 1273 return logstream(stdout)
1274 1274
1275 1275
1276 1276 pre_revprop_change = b'''#!/bin/sh
1277 1277
1278 1278 REPOS="$1"
1279 1279 REV="$2"
1280 1280 USER="$3"
1281 1281 PROPNAME="$4"
1282 1282 ACTION="$5"
1283 1283
1284 1284 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1285 1285 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1286 1286 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1287 1287
1288 1288 echo "Changing prohibited revision property" >&2
1289 1289 exit 1
1290 1290 '''
1291 1291
1292 1292
1293 1293 class svn_sink(converter_sink, commandline):
1294 1294 commit_re = re.compile(br'Committed revision (\d+).', re.M)
1295 1295 uuid_re = re.compile(br'Repository UUID:\s*(\S+)', re.M)
1296 1296
1297 1297 def prerun(self):
1298 1298 if self.wc:
1299 1299 os.chdir(self.wc)
1300 1300
1301 1301 def postrun(self):
1302 1302 if self.wc:
1303 1303 os.chdir(self.cwd)
1304 1304
1305 1305 def join(self, name):
1306 1306 return os.path.join(self.wc, b'.svn', name)
1307 1307
1308 1308 def revmapfile(self):
1309 1309 return self.join(b'hg-shamap')
1310 1310
1311 1311 def authorfile(self):
1312 1312 return self.join(b'hg-authormap')
1313 1313
1314 1314 def __init__(self, ui, repotype, path):
1315 1315
1316 1316 converter_sink.__init__(self, ui, repotype, path)
1317 1317 commandline.__init__(self, ui, b'svn')
1318 1318 self.delete = []
1319 1319 self.setexec = []
1320 1320 self.delexec = []
1321 1321 self.copies = []
1322 1322 self.wc = None
1323 1323 self.cwd = encoding.getcwd()
1324 1324
1325 1325 created = False
1326 1326 if os.path.isfile(os.path.join(path, b'.svn', b'entries')):
1327 1327 self.wc = os.path.realpath(path)
1328 1328 self.run0(b'update')
1329 1329 else:
1330 1330 if not re.search(br'^(file|http|https|svn|svn\+ssh)://', path):
1331 1331 path = os.path.realpath(path)
1332 1332 if os.path.isdir(os.path.dirname(path)):
1333 1333 if not os.path.exists(
1334 1334 os.path.join(path, b'db', b'fs-type')
1335 1335 ):
1336 1336 ui.status(
1337 1337 _(b"initializing svn repository '%s'\n")
1338 1338 % os.path.basename(path)
1339 1339 )
1340 1340 commandline(ui, b'svnadmin').run0(b'create', path)
1341 1341 created = path
1342 1342 path = util.normpath(path)
1343 1343 if not path.startswith(b'/'):
1344 1344 path = b'/' + path
1345 1345 path = b'file://' + path
1346 1346
1347 1347 wcpath = os.path.join(
1348 1348 encoding.getcwd(), os.path.basename(path) + b'-wc'
1349 1349 )
1350 1350 ui.status(
1351 1351 _(b"initializing svn working copy '%s'\n")
1352 1352 % os.path.basename(wcpath)
1353 1353 )
1354 1354 self.run0(b'checkout', path, wcpath)
1355 1355
1356 1356 self.wc = wcpath
1357 1357 self.opener = vfsmod.vfs(self.wc)
1358 1358 self.wopener = vfsmod.vfs(self.wc)
1359 1359 self.childmap = mapfile(ui, self.join(b'hg-childmap'))
1360 1360 if util.checkexec(self.wc):
1361 1361 self.is_exec = util.isexec
1362 1362 else:
1363 1363 self.is_exec = None
1364 1364
1365 1365 if created:
1366 1366 hook = os.path.join(created, b'hooks', b'pre-revprop-change')
1367 1367 fp = open(hook, b'wb')
1368 1368 fp.write(pre_revprop_change)
1369 1369 fp.close()
1370 1370 util.setflags(hook, False, True)
1371 1371
1372 1372 output = self.run0(b'info')
1373 1373 self.uuid = self.uuid_re.search(output).group(1).strip()
1374 1374
1375 1375 def wjoin(self, *names):
1376 1376 return os.path.join(self.wc, *names)
1377 1377
1378 1378 @propertycache
1379 1379 def manifest(self):
1380 1380 # As of svn 1.7, the "add" command fails when receiving
1381 1381 # already tracked entries, so we have to track and filter them
1382 1382 # ourselves.
1383 1383 m = set()
1384 1384 output = self.run0(b'ls', recursive=True, xml=True)
1385 1385 doc = xml.dom.minidom.parseString(output)
1386 1386 for e in doc.getElementsByTagName('entry'):
1387 1387 for n in e.childNodes:
1388 1388 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1389 1389 continue
1390 1390 name = ''.join(
1391 1391 c.data for c in n.childNodes if c.nodeType == c.TEXT_NODE
1392 1392 )
1393 1393 # Entries are compared with names coming from
1394 1394 # mercurial, so bytes with undefined encoding. Our
1395 1395 # best bet is to assume they are in local
1396 1396 # encoding. They will be passed to command line calls
1397 1397 # later anyway, so they better be.
1398 1398 m.add(encoding.unitolocal(name))
1399 1399 break
1400 1400 return m
1401 1401
1402 1402 def putfile(self, filename, flags, data):
1403 1403 if b'l' in flags:
1404 1404 self.wopener.symlink(data, filename)
1405 1405 else:
1406 1406 try:
1407 1407 if os.path.islink(self.wjoin(filename)):
1408 1408 os.unlink(filename)
1409 1409 except OSError:
1410 1410 pass
1411 1411
1412 1412 if self.is_exec:
1413 1413 # We need to check executability of the file before the change,
1414 1414 # because `vfs.write` is able to reset exec bit.
1415 1415 wasexec = False
1416 1416 if os.path.exists(self.wjoin(filename)):
1417 1417 wasexec = self.is_exec(self.wjoin(filename))
1418 1418
1419 1419 self.wopener.write(filename, data)
1420 1420
1421 1421 if self.is_exec:
1422 1422 if wasexec:
1423 1423 if b'x' not in flags:
1424 1424 self.delexec.append(filename)
1425 1425 else:
1426 1426 if b'x' in flags:
1427 1427 self.setexec.append(filename)
1428 1428 util.setflags(self.wjoin(filename), False, b'x' in flags)
1429 1429
1430 1430 def _copyfile(self, source, dest):
1431 1431 # SVN's copy command pukes if the destination file exists, but
1432 1432 # our copyfile method expects to record a copy that has
1433 1433 # already occurred. Cross the semantic gap.
1434 1434 wdest = self.wjoin(dest)
1435 1435 exists = os.path.lexists(wdest)
1436 1436 if exists:
1437 1437 fd, tempname = pycompat.mkstemp(
1438 1438 prefix=b'hg-copy-', dir=os.path.dirname(wdest)
1439 1439 )
1440 1440 os.close(fd)
1441 1441 os.unlink(tempname)
1442 1442 os.rename(wdest, tempname)
1443 1443 try:
1444 1444 self.run0(b'copy', source, dest)
1445 1445 finally:
1446 1446 self.manifest.add(dest)
1447 1447 if exists:
1448 1448 try:
1449 1449 os.unlink(wdest)
1450 1450 except OSError:
1451 1451 pass
1452 1452 os.rename(tempname, wdest)
1453 1453
1454 1454 def dirs_of(self, files):
1455 1455 dirs = set()
1456 1456 for f in files:
1457 1457 if os.path.isdir(self.wjoin(f)):
1458 1458 dirs.add(f)
1459 1459 i = len(f)
1460 1460 for i in iter(lambda: f.rfind(b'/', 0, i), -1):
1461 1461 dirs.add(f[:i])
1462 1462 return dirs
1463 1463
1464 1464 def add_dirs(self, files):
1465 1465 add_dirs = [
1466 1466 d for d in sorted(self.dirs_of(files)) if d not in self.manifest
1467 1467 ]
1468 1468 if add_dirs:
1469 1469 self.manifest.update(add_dirs)
1470 1470 self.xargs(add_dirs, b'add', non_recursive=True, quiet=True)
1471 1471 return add_dirs
1472 1472
1473 1473 def add_files(self, files):
1474 1474 files = [f for f in files if f not in self.manifest]
1475 1475 if files:
1476 1476 self.manifest.update(files)
1477 1477 self.xargs(files, b'add', quiet=True)
1478 1478 return files
1479 1479
1480 1480 def addchild(self, parent, child):
1481 1481 self.childmap[parent] = child
1482 1482
1483 1483 def revid(self, rev):
1484 1484 return b"svn:%s@%s" % (self.uuid, rev)
1485 1485
1486 1486 def putcommit(
1487 1487 self, files, copies, parents, commit, source, revmap, full, cleanp2
1488 1488 ):
1489 1489 for parent in parents:
1490 1490 try:
1491 1491 return self.revid(self.childmap[parent])
1492 1492 except KeyError:
1493 1493 pass
1494 1494
1495 1495 # Apply changes to working copy
1496 1496 for f, v in files:
1497 1497 data, mode = source.getfile(f, v)
1498 1498 if data is None:
1499 1499 self.delete.append(f)
1500 1500 else:
1501 1501 self.putfile(f, mode, data)
1502 1502 if f in copies:
1503 1503 self.copies.append([copies[f], f])
1504 1504 if full:
1505 1505 self.delete.extend(sorted(self.manifest.difference(files)))
1506 1506 files = [f[0] for f in files]
1507 1507
1508 1508 entries = set(self.delete)
1509 1509 files = frozenset(files)
1510 1510 entries.update(self.add_dirs(files.difference(entries)))
1511 1511 if self.copies:
1512 1512 for s, d in self.copies:
1513 1513 self._copyfile(s, d)
1514 1514 self.copies = []
1515 1515 if self.delete:
1516 1516 self.xargs(self.delete, b'delete')
1517 1517 for f in self.delete:
1518 1518 self.manifest.remove(f)
1519 1519 self.delete = []
1520 1520 entries.update(self.add_files(files.difference(entries)))
1521 1521 if self.delexec:
1522 1522 self.xargs(self.delexec, b'propdel', b'svn:executable')
1523 1523 self.delexec = []
1524 1524 if self.setexec:
1525 1525 self.xargs(self.setexec, b'propset', b'svn:executable', b'*')
1526 1526 self.setexec = []
1527 1527
1528 1528 fd, messagefile = pycompat.mkstemp(prefix=b'hg-convert-')
1529 1529 fp = os.fdopen(fd, 'wb')
1530 1530 fp.write(util.tonativeeol(commit.desc))
1531 1531 fp.close()
1532 1532 try:
1533 1533 output = self.run0(
1534 1534 b'commit',
1535 1535 username=stringutil.shortuser(commit.author),
1536 1536 file=messagefile,
1537 1537 encoding=b'utf-8',
1538 1538 )
1539 1539 try:
1540 1540 rev = self.commit_re.search(output).group(1)
1541 1541 except AttributeError:
1542 1542 if not files:
1543 1543 return parents[0] if parents else b'None'
1544 1544 self.ui.warn(_(b'unexpected svn output:\n'))
1545 1545 self.ui.warn(output)
1546 1546 raise error.Abort(_(b'unable to cope with svn output'))
1547 1547 if commit.rev:
1548 1548 self.run(
1549 1549 b'propset',
1550 1550 b'hg:convert-rev',
1551 1551 commit.rev,
1552 1552 revprop=True,
1553 1553 revision=rev,
1554 1554 )
1555 1555 if commit.branch and commit.branch != b'default':
1556 1556 self.run(
1557 1557 b'propset',
1558 1558 b'hg:convert-branch',
1559 1559 commit.branch,
1560 1560 revprop=True,
1561 1561 revision=rev,
1562 1562 )
1563 1563 for parent in parents:
1564 1564 self.addchild(parent, rev)
1565 1565 return self.revid(rev)
1566 1566 finally:
1567 1567 os.unlink(messagefile)
1568 1568
1569 1569 def puttags(self, tags):
1570 1570 self.ui.warn(_(b'writing Subversion tags is not yet implemented\n'))
1571 1571 return None, None
1572 1572
1573 1573 def hascommitfrommap(self, rev):
1574 1574 # We trust that revisions referenced in a map still is present
1575 1575 # TODO: implement something better if necessary and feasible
1576 1576 return True
1577 1577
1578 1578 def hascommitforsplicemap(self, rev):
1579 1579 # This is not correct as one can convert to an existing subversion
1580 1580 # repository and childmap would not list all revisions. Too bad.
1581 1581 if rev in self.childmap:
1582 1582 return True
1583 1583 raise error.Abort(
1584 1584 _(
1585 1585 b'splice map revision %s not found in subversion '
1586 1586 b'child map (revision lookups are not implemented)'
1587 1587 )
1588 1588 % rev
1589 1589 )
General Comments 0
You need to be logged in to leave comments. Login now