##// END OF EJS Templates
cleanup: drop redundant character escapes outside of `[]`...
Matt Harbison -
r44474:c1ccefb5 default
parent child Browse files
Show More
@@ -1,1565 +1,1565 b''
1 1 # Subversion 1.4/1.5 Python API backend
2 2 #
3 3 # Copyright(C) 2007 Daniel Holth et al
4 4 from __future__ import absolute_import
5 5
6 6 import os
7 7 import re
8 8 import xml.dom.minidom
9 9
10 10 from mercurial.i18n import _
11 11 from mercurial.pycompat import open
12 12 from mercurial import (
13 13 encoding,
14 14 error,
15 15 pycompat,
16 16 util,
17 17 vfs as vfsmod,
18 18 )
19 19 from mercurial.utils import (
20 20 dateutil,
21 21 procutil,
22 22 stringutil,
23 23 )
24 24
25 25 from . import common
26 26
27 27 pickle = util.pickle
28 28 stringio = util.stringio
29 29 propertycache = util.propertycache
30 30 urlerr = util.urlerr
31 31 urlreq = util.urlreq
32 32
33 33 commandline = common.commandline
34 34 commit = common.commit
35 35 converter_sink = common.converter_sink
36 36 converter_source = common.converter_source
37 37 decodeargs = common.decodeargs
38 38 encodeargs = common.encodeargs
39 39 makedatetimestamp = common.makedatetimestamp
40 40 mapfile = common.mapfile
41 41 MissingTool = common.MissingTool
42 42 NoRepo = common.NoRepo
43 43
44 44 # Subversion stuff. Works best with very recent Python SVN bindings
45 45 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
46 46 # these bindings.
47 47
48 48 try:
49 49 import svn
50 50 import svn.client
51 51 import svn.core
52 52 import svn.ra
53 53 import svn.delta
54 54 from . import transport
55 55 import warnings
56 56
57 57 warnings.filterwarnings(
58 58 b'ignore', module=b'svn.core', category=DeprecationWarning
59 59 )
60 60 svn.core.SubversionException # trigger import to catch error
61 61
62 62 except ImportError:
63 63 svn = None
64 64
65 65
66 66 class SvnPathNotFound(Exception):
67 67 pass
68 68
69 69
70 70 def revsplit(rev):
71 71 """Parse a revision string and return (uuid, path, revnum).
72 72 >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
73 73 ... b'/proj%20B/mytrunk/mytrunk@1')
74 74 ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
75 75 >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
76 76 ('', '', 1)
77 77 >>> revsplit(b'@7')
78 78 ('', '', 7)
79 79 >>> revsplit(b'7')
80 80 ('', '', 0)
81 81 >>> revsplit(b'bad')
82 82 ('', '', 0)
83 83 """
84 84 parts = rev.rsplit(b'@', 1)
85 85 revnum = 0
86 86 if len(parts) > 1:
87 87 revnum = int(parts[1])
88 88 parts = parts[0].split(b'/', 1)
89 89 uuid = b''
90 90 mod = b''
91 91 if len(parts) > 1 and parts[0].startswith(b'svn:'):
92 92 uuid = parts[0][4:]
93 93 mod = b'/' + parts[1]
94 94 return uuid, mod, revnum
95 95
96 96
97 97 def quote(s):
98 98 # As of svn 1.7, many svn calls expect "canonical" paths. In
99 99 # theory, we should call svn.core.*canonicalize() on all paths
100 100 # before passing them to the API. Instead, we assume the base url
101 101 # is canonical and copy the behaviour of svn URL encoding function
102 102 # so we can extend it safely with new components. The "safe"
103 103 # characters were taken from the "svn_uri__char_validity" table in
104 104 # libsvn_subr/path.c.
105 105 return urlreq.quote(s, b"!$&'()*+,-./:=@_~")
106 106
107 107
108 108 def geturl(path):
109 109 try:
110 110 return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
111 111 except svn.core.SubversionException:
112 112 # svn.client.url_from_path() fails with local repositories
113 113 pass
114 114 if os.path.isdir(path):
115 115 path = os.path.normpath(os.path.abspath(path))
116 116 if pycompat.iswindows:
117 117 path = b'/' + util.normpath(path)
118 118 # Module URL is later compared with the repository URL returned
119 119 # by svn API, which is UTF-8.
120 120 path = encoding.tolocal(path)
121 121 path = b'file://%s' % quote(path)
122 122 return svn.core.svn_path_canonicalize(path)
123 123
124 124
125 125 def optrev(number):
126 126 optrev = svn.core.svn_opt_revision_t()
127 127 optrev.kind = svn.core.svn_opt_revision_number
128 128 optrev.value.number = number
129 129 return optrev
130 130
131 131
132 132 class changedpath(object):
133 133 def __init__(self, p):
134 134 self.copyfrom_path = p.copyfrom_path
135 135 self.copyfrom_rev = p.copyfrom_rev
136 136 self.action = p.action
137 137
138 138
139 139 def get_log_child(
140 140 fp,
141 141 url,
142 142 paths,
143 143 start,
144 144 end,
145 145 limit=0,
146 146 discover_changed_paths=True,
147 147 strict_node_history=False,
148 148 ):
149 149 protocol = -1
150 150
151 151 def receiver(orig_paths, revnum, author, date, message, pool):
152 152 paths = {}
153 153 if orig_paths is not None:
154 154 for k, v in pycompat.iteritems(orig_paths):
155 155 paths[k] = changedpath(v)
156 156 pickle.dump((paths, revnum, author, date, message), fp, protocol)
157 157
158 158 try:
159 159 # Use an ra of our own so that our parent can consume
160 160 # our results without confusing the server.
161 161 t = transport.SvnRaTransport(url=url)
162 162 svn.ra.get_log(
163 163 t.ra,
164 164 paths,
165 165 start,
166 166 end,
167 167 limit,
168 168 discover_changed_paths,
169 169 strict_node_history,
170 170 receiver,
171 171 )
172 172 except IOError:
173 173 # Caller may interrupt the iteration
174 174 pickle.dump(None, fp, protocol)
175 175 except Exception as inst:
176 176 pickle.dump(stringutil.forcebytestr(inst), fp, protocol)
177 177 else:
178 178 pickle.dump(None, fp, protocol)
179 179 fp.flush()
180 180 # With large history, cleanup process goes crazy and suddenly
181 181 # consumes *huge* amount of memory. The output file being closed,
182 182 # there is no need for clean termination.
183 183 os._exit(0)
184 184
185 185
186 186 def debugsvnlog(ui, **opts):
187 187 """Fetch SVN log in a subprocess and channel them back to parent to
188 188 avoid memory collection issues.
189 189 """
190 190 if svn is None:
191 191 raise error.Abort(
192 192 _(b'debugsvnlog could not load Subversion python bindings')
193 193 )
194 194
195 195 args = decodeargs(ui.fin.read())
196 196 get_log_child(ui.fout, *args)
197 197
198 198
199 199 class logstream(object):
200 200 """Interruptible revision log iterator."""
201 201
202 202 def __init__(self, stdout):
203 203 self._stdout = stdout
204 204
205 205 def __iter__(self):
206 206 while True:
207 207 try:
208 208 entry = pickle.load(self._stdout)
209 209 except EOFError:
210 210 raise error.Abort(
211 211 _(
212 212 b'Mercurial failed to run itself, check'
213 213 b' hg executable is in PATH'
214 214 )
215 215 )
216 216 try:
217 217 orig_paths, revnum, author, date, message = entry
218 218 except (TypeError, ValueError):
219 219 if entry is None:
220 220 break
221 221 raise error.Abort(_(b"log stream exception '%s'") % entry)
222 222 yield entry
223 223
224 224 def close(self):
225 225 if self._stdout:
226 226 self._stdout.close()
227 227 self._stdout = None
228 228
229 229
230 230 class directlogstream(list):
231 231 """Direct revision log iterator.
232 232 This can be used for debugging and development but it will probably leak
233 233 memory and is not suitable for real conversions."""
234 234
235 235 def __init__(
236 236 self,
237 237 url,
238 238 paths,
239 239 start,
240 240 end,
241 241 limit=0,
242 242 discover_changed_paths=True,
243 243 strict_node_history=False,
244 244 ):
245 245 def receiver(orig_paths, revnum, author, date, message, pool):
246 246 paths = {}
247 247 if orig_paths is not None:
248 248 for k, v in pycompat.iteritems(orig_paths):
249 249 paths[k] = changedpath(v)
250 250 self.append((paths, revnum, author, date, message))
251 251
252 252 # Use an ra of our own so that our parent can consume
253 253 # our results without confusing the server.
254 254 t = transport.SvnRaTransport(url=url)
255 255 svn.ra.get_log(
256 256 t.ra,
257 257 paths,
258 258 start,
259 259 end,
260 260 limit,
261 261 discover_changed_paths,
262 262 strict_node_history,
263 263 receiver,
264 264 )
265 265
266 266 def close(self):
267 267 pass
268 268
269 269
270 270 # Check to see if the given path is a local Subversion repo. Verify this by
271 271 # looking for several svn-specific files and directories in the given
272 272 # directory.
273 273 def filecheck(ui, path, proto):
274 274 for x in (b'locks', b'hooks', b'format', b'db'):
275 275 if not os.path.exists(os.path.join(path, x)):
276 276 return False
277 277 return True
278 278
279 279
280 280 # Check to see if a given path is the root of an svn repo over http. We verify
281 281 # this by requesting a version-controlled URL we know can't exist and looking
282 282 # for the svn-specific "not found" XML.
283 283 def httpcheck(ui, path, proto):
284 284 try:
285 285 opener = urlreq.buildopener()
286 286 rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
287 287 data = rsp.read()
288 288 except urlerr.httperror as inst:
289 289 if inst.code != 404:
290 290 # Except for 404 we cannot know for sure this is not an svn repo
291 291 ui.warn(
292 292 _(
293 293 b'svn: cannot probe remote repository, assume it could '
294 294 b'be a subversion repository. Use --source-type if you '
295 295 b'know better.\n'
296 296 )
297 297 )
298 298 return True
299 299 data = inst.fp.read()
300 300 except Exception:
301 301 # Could be urlerr.urlerror if the URL is invalid or anything else.
302 302 return False
303 303 return b'<m:human-readable errcode="160013">' in data
304 304
305 305
306 306 protomap = {
307 307 b'http': httpcheck,
308 308 b'https': httpcheck,
309 309 b'file': filecheck,
310 310 }
311 311
312 312
313 313 def issvnurl(ui, url):
314 314 try:
315 315 proto, path = url.split(b'://', 1)
316 316 if proto == b'file':
317 317 if (
318 318 pycompat.iswindows
319 319 and path[:1] == b'/'
320 320 and path[1:2].isalpha()
321 321 and path[2:6].lower() == b'%3a/'
322 322 ):
323 323 path = path[:2] + b':/' + path[6:]
324 324 path = urlreq.url2pathname(path)
325 325 except ValueError:
326 326 proto = b'file'
327 327 path = os.path.abspath(url)
328 328 if proto == b'file':
329 329 path = util.pconvert(path)
330 330 check = protomap.get(proto, lambda *args: False)
331 331 while b'/' in path:
332 332 if check(ui, path, proto):
333 333 return True
334 334 path = path.rsplit(b'/', 1)[0]
335 335 return False
336 336
337 337
338 338 # SVN conversion code stolen from bzr-svn and tailor
339 339 #
340 340 # Subversion looks like a versioned filesystem, branches structures
341 341 # are defined by conventions and not enforced by the tool. First,
342 342 # we define the potential branches (modules) as "trunk" and "branches"
343 343 # children directories. Revisions are then identified by their
344 344 # module and revision number (and a repository identifier).
345 345 #
346 346 # The revision graph is really a tree (or a forest). By default, a
347 347 # revision parent is the previous revision in the same module. If the
348 348 # module directory is copied/moved from another module then the
349 349 # revision is the module root and its parent the source revision in
350 350 # the parent module. A revision has at most one parent.
351 351 #
352 352 class svn_source(converter_source):
353 353 def __init__(self, ui, repotype, url, revs=None):
354 354 super(svn_source, self).__init__(ui, repotype, url, revs=revs)
355 355
356 356 if not (
357 357 url.startswith(b'svn://')
358 358 or url.startswith(b'svn+ssh://')
359 359 or (
360 360 os.path.exists(url)
361 361 and os.path.exists(os.path.join(url, b'.svn'))
362 362 )
363 363 or issvnurl(ui, url)
364 364 ):
365 365 raise NoRepo(
366 366 _(b"%s does not look like a Subversion repository") % url
367 367 )
368 368 if svn is None:
369 369 raise MissingTool(_(b'could not load Subversion python bindings'))
370 370
371 371 try:
372 372 version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
373 373 if version < (1, 4):
374 374 raise MissingTool(
375 375 _(
376 376 b'Subversion python bindings %d.%d found, '
377 377 b'1.4 or later required'
378 378 )
379 379 % version
380 380 )
381 381 except AttributeError:
382 382 raise MissingTool(
383 383 _(
384 384 b'Subversion python bindings are too old, 1.4 '
385 385 b'or later required'
386 386 )
387 387 )
388 388
389 389 self.lastrevs = {}
390 390
391 391 latest = None
392 392 try:
393 393 # Support file://path@rev syntax. Useful e.g. to convert
394 394 # deleted branches.
395 395 at = url.rfind(b'@')
396 396 if at >= 0:
397 397 latest = int(url[at + 1 :])
398 398 url = url[:at]
399 399 except ValueError:
400 400 pass
401 401 self.url = geturl(url)
402 402 self.encoding = b'UTF-8' # Subversion is always nominal UTF-8
403 403 try:
404 404 self.transport = transport.SvnRaTransport(url=self.url)
405 405 self.ra = self.transport.ra
406 406 self.ctx = self.transport.client
407 407 self.baseurl = svn.ra.get_repos_root(self.ra)
408 408 # Module is either empty or a repository path starting with
409 409 # a slash and not ending with a slash.
410 410 self.module = urlreq.unquote(self.url[len(self.baseurl) :])
411 411 self.prevmodule = None
412 412 self.rootmodule = self.module
413 413 self.commits = {}
414 414 self.paths = {}
415 415 self.uuid = svn.ra.get_uuid(self.ra)
416 416 except svn.core.SubversionException:
417 417 ui.traceback()
418 418 svnversion = b'%d.%d.%d' % (
419 419 svn.core.SVN_VER_MAJOR,
420 420 svn.core.SVN_VER_MINOR,
421 421 svn.core.SVN_VER_MICRO,
422 422 )
423 423 raise NoRepo(
424 424 _(
425 425 b"%s does not look like a Subversion repository "
426 426 b"to libsvn version %s"
427 427 )
428 428 % (self.url, svnversion)
429 429 )
430 430
431 431 if revs:
432 432 if len(revs) > 1:
433 433 raise error.Abort(
434 434 _(
435 435 b'subversion source does not support '
436 436 b'specifying multiple revisions'
437 437 )
438 438 )
439 439 try:
440 440 latest = int(revs[0])
441 441 except ValueError:
442 442 raise error.Abort(
443 443 _(b'svn: revision %s is not an integer') % revs[0]
444 444 )
445 445
446 446 trunkcfg = self.ui.config(b'convert', b'svn.trunk')
447 447 if trunkcfg is None:
448 448 trunkcfg = b'trunk'
449 449 self.trunkname = trunkcfg.strip(b'/')
450 450 self.startrev = self.ui.config(b'convert', b'svn.startrev')
451 451 try:
452 452 self.startrev = int(self.startrev)
453 453 if self.startrev < 0:
454 454 self.startrev = 0
455 455 except ValueError:
456 456 raise error.Abort(
457 457 _(b'svn: start revision %s is not an integer') % self.startrev
458 458 )
459 459
460 460 try:
461 461 self.head = self.latest(self.module, latest)
462 462 except SvnPathNotFound:
463 463 self.head = None
464 464 if not self.head:
465 465 raise error.Abort(
466 466 _(b'no revision found in module %s') % self.module
467 467 )
468 468 self.last_changed = self.revnum(self.head)
469 469
470 470 self._changescache = (None, None)
471 471
472 472 if os.path.exists(os.path.join(url, b'.svn/entries')):
473 473 self.wc = url
474 474 else:
475 475 self.wc = None
476 476 self.convertfp = None
477 477
478 478 def setrevmap(self, revmap):
479 479 lastrevs = {}
480 480 for revid in revmap:
481 481 uuid, module, revnum = revsplit(revid)
482 482 lastrevnum = lastrevs.setdefault(module, revnum)
483 483 if revnum > lastrevnum:
484 484 lastrevs[module] = revnum
485 485 self.lastrevs = lastrevs
486 486
487 487 def exists(self, path, optrev):
488 488 try:
489 489 svn.client.ls(
490 490 self.url.rstrip(b'/') + b'/' + quote(path),
491 491 optrev,
492 492 False,
493 493 self.ctx,
494 494 )
495 495 return True
496 496 except svn.core.SubversionException:
497 497 return False
498 498
499 499 def getheads(self):
500 500 def isdir(path, revnum):
501 501 kind = self._checkpath(path, revnum)
502 502 return kind == svn.core.svn_node_dir
503 503
504 504 def getcfgpath(name, rev):
505 505 cfgpath = self.ui.config(b'convert', b'svn.' + name)
506 506 if cfgpath is not None and cfgpath.strip() == b'':
507 507 return None
508 508 path = (cfgpath or name).strip(b'/')
509 509 if not self.exists(path, rev):
510 510 if self.module.endswith(path) and name == b'trunk':
511 511 # we are converting from inside this directory
512 512 return None
513 513 if cfgpath:
514 514 raise error.Abort(
515 515 _(b'expected %s to be at %r, but not found')
516 516 % (name, path)
517 517 )
518 518 return None
519 519 self.ui.note(_(b'found %s at %r\n') % (name, path))
520 520 return path
521 521
522 522 rev = optrev(self.last_changed)
523 523 oldmodule = b''
524 524 trunk = getcfgpath(b'trunk', rev)
525 525 self.tags = getcfgpath(b'tags', rev)
526 526 branches = getcfgpath(b'branches', rev)
527 527
528 528 # If the project has a trunk or branches, we will extract heads
529 529 # from them. We keep the project root otherwise.
530 530 if trunk:
531 531 oldmodule = self.module or b''
532 532 self.module += b'/' + trunk
533 533 self.head = self.latest(self.module, self.last_changed)
534 534 if not self.head:
535 535 raise error.Abort(
536 536 _(b'no revision found in module %s') % self.module
537 537 )
538 538
539 539 # First head in the list is the module's head
540 540 self.heads = [self.head]
541 541 if self.tags is not None:
542 542 self.tags = b'%s/%s' % (oldmodule, (self.tags or b'tags'))
543 543
544 544 # Check if branches bring a few more heads to the list
545 545 if branches:
546 546 rpath = self.url.strip(b'/')
547 547 branchnames = svn.client.ls(
548 548 rpath + b'/' + quote(branches), rev, False, self.ctx
549 549 )
550 550 for branch in sorted(branchnames):
551 551 module = b'%s/%s/%s' % (oldmodule, branches, branch)
552 552 if not isdir(module, self.last_changed):
553 553 continue
554 554 brevid = self.latest(module, self.last_changed)
555 555 if not brevid:
556 556 self.ui.note(_(b'ignoring empty branch %s\n') % branch)
557 557 continue
558 558 self.ui.note(
559 559 _(b'found branch %s at %d\n')
560 560 % (branch, self.revnum(brevid))
561 561 )
562 562 self.heads.append(brevid)
563 563
564 564 if self.startrev and self.heads:
565 565 if len(self.heads) > 1:
566 566 raise error.Abort(
567 567 _(
568 568 b'svn: start revision is not supported '
569 569 b'with more than one branch'
570 570 )
571 571 )
572 572 revnum = self.revnum(self.heads[0])
573 573 if revnum < self.startrev:
574 574 raise error.Abort(
575 575 _(b'svn: no revision found after start revision %d')
576 576 % self.startrev
577 577 )
578 578
579 579 return self.heads
580 580
581 581 def _getchanges(self, rev, full):
582 582 (paths, parents) = self.paths[rev]
583 583 copies = {}
584 584 if parents:
585 585 files, self.removed, copies = self.expandpaths(rev, paths, parents)
586 586 if full or not parents:
587 587 # Perform a full checkout on roots
588 588 uuid, module, revnum = revsplit(rev)
589 589 entries = svn.client.ls(
590 590 self.baseurl + quote(module), optrev(revnum), True, self.ctx
591 591 )
592 592 files = [
593 593 n
594 594 for n, e in pycompat.iteritems(entries)
595 595 if e.kind == svn.core.svn_node_file
596 596 ]
597 597 self.removed = set()
598 598
599 599 files.sort()
600 600 files = zip(files, [rev] * len(files))
601 601 return (files, copies)
602 602
603 603 def getchanges(self, rev, full):
604 604 # reuse cache from getchangedfiles
605 605 if self._changescache[0] == rev and not full:
606 606 (files, copies) = self._changescache[1]
607 607 else:
608 608 (files, copies) = self._getchanges(rev, full)
609 609 # caller caches the result, so free it here to release memory
610 610 del self.paths[rev]
611 611 return (files, copies, set())
612 612
613 613 def getchangedfiles(self, rev, i):
614 614 # called from filemap - cache computed values for reuse in getchanges
615 615 (files, copies) = self._getchanges(rev, False)
616 616 self._changescache = (rev, (files, copies))
617 617 return [f[0] for f in files]
618 618
619 619 def getcommit(self, rev):
620 620 if rev not in self.commits:
621 621 uuid, module, revnum = revsplit(rev)
622 622 self.module = module
623 623 self.reparent(module)
624 624 # We assume that:
625 625 # - requests for revisions after "stop" come from the
626 626 # revision graph backward traversal. Cache all of them
627 627 # down to stop, they will be used eventually.
628 628 # - requests for revisions before "stop" come to get
629 629 # isolated branches parents. Just fetch what is needed.
630 630 stop = self.lastrevs.get(module, 0)
631 631 if revnum < stop:
632 632 stop = revnum + 1
633 633 self._fetch_revisions(revnum, stop)
634 634 if rev not in self.commits:
635 635 raise error.Abort(_(b'svn: revision %s not found') % revnum)
636 636 revcommit = self.commits[rev]
637 637 # caller caches the result, so free it here to release memory
638 638 del self.commits[rev]
639 639 return revcommit
640 640
641 641 def checkrevformat(self, revstr, mapname=b'splicemap'):
642 642 """ fails if revision format does not match the correct format"""
643 643 if not re.match(
644 644 r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
645 645 r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
646 r'{12,12}(.*)\@[0-9]+$',
646 r'{12,12}(.*)@[0-9]+$',
647 647 revstr,
648 648 ):
649 649 raise error.Abort(
650 650 _(b'%s entry %s is not a valid revision identifier')
651 651 % (mapname, revstr)
652 652 )
653 653
654 654 def numcommits(self):
655 655 return int(self.head.rsplit(b'@', 1)[1]) - self.startrev
656 656
657 657 def gettags(self):
658 658 tags = {}
659 659 if self.tags is None:
660 660 return tags
661 661
662 662 # svn tags are just a convention, project branches left in a
663 663 # 'tags' directory. There is no other relationship than
664 664 # ancestry, which is expensive to discover and makes them hard
665 665 # to update incrementally. Worse, past revisions may be
666 666 # referenced by tags far away in the future, requiring a deep
667 667 # history traversal on every calculation. Current code
668 668 # performs a single backward traversal, tracking moves within
669 669 # the tags directory (tag renaming) and recording a new tag
670 670 # everytime a project is copied from outside the tags
671 671 # directory. It also lists deleted tags, this behaviour may
672 672 # change in the future.
673 673 pendings = []
674 674 tagspath = self.tags
675 675 start = svn.ra.get_latest_revnum(self.ra)
676 676 stream = self._getlog([self.tags], start, self.startrev)
677 677 try:
678 678 for entry in stream:
679 679 origpaths, revnum, author, date, message = entry
680 680 if not origpaths:
681 681 origpaths = []
682 682 copies = [
683 683 (e.copyfrom_path, e.copyfrom_rev, p)
684 684 for p, e in pycompat.iteritems(origpaths)
685 685 if e.copyfrom_path
686 686 ]
687 687 # Apply moves/copies from more specific to general
688 688 copies.sort(reverse=True)
689 689
690 690 srctagspath = tagspath
691 691 if copies and copies[-1][2] == tagspath:
692 692 # Track tags directory moves
693 693 srctagspath = copies.pop()[0]
694 694
695 695 for source, sourcerev, dest in copies:
696 696 if not dest.startswith(tagspath + b'/'):
697 697 continue
698 698 for tag in pendings:
699 699 if tag[0].startswith(dest):
700 700 tagpath = source + tag[0][len(dest) :]
701 701 tag[:2] = [tagpath, sourcerev]
702 702 break
703 703 else:
704 704 pendings.append([source, sourcerev, dest])
705 705
706 706 # Filter out tags with children coming from different
707 707 # parts of the repository like:
708 708 # /tags/tag.1 (from /trunk:10)
709 709 # /tags/tag.1/foo (from /branches/foo:12)
710 710 # Here/tags/tag.1 discarded as well as its children.
711 711 # It happens with tools like cvs2svn. Such tags cannot
712 712 # be represented in mercurial.
713 713 addeds = dict(
714 714 (p, e.copyfrom_path)
715 715 for p, e in pycompat.iteritems(origpaths)
716 716 if e.action == b'A' and e.copyfrom_path
717 717 )
718 718 badroots = set()
719 719 for destroot in addeds:
720 720 for source, sourcerev, dest in pendings:
721 721 if not dest.startswith(
722 722 destroot + b'/'
723 723 ) or source.startswith(addeds[destroot] + b'/'):
724 724 continue
725 725 badroots.add(destroot)
726 726 break
727 727
728 728 for badroot in badroots:
729 729 pendings = [
730 730 p
731 731 for p in pendings
732 732 if p[2] != badroot
733 733 and not p[2].startswith(badroot + b'/')
734 734 ]
735 735
736 736 # Tell tag renamings from tag creations
737 737 renamings = []
738 738 for source, sourcerev, dest in pendings:
739 739 tagname = dest.split(b'/')[-1]
740 740 if source.startswith(srctagspath):
741 741 renamings.append([source, sourcerev, tagname])
742 742 continue
743 743 if tagname in tags:
744 744 # Keep the latest tag value
745 745 continue
746 746 # From revision may be fake, get one with changes
747 747 try:
748 748 tagid = self.latest(source, sourcerev)
749 749 if tagid and tagname not in tags:
750 750 tags[tagname] = tagid
751 751 except SvnPathNotFound:
752 752 # It happens when we are following directories
753 753 # we assumed were copied with their parents
754 754 # but were really created in the tag
755 755 # directory.
756 756 pass
757 757 pendings = renamings
758 758 tagspath = srctagspath
759 759 finally:
760 760 stream.close()
761 761 return tags
762 762
763 763 def converted(self, rev, destrev):
764 764 if not self.wc:
765 765 return
766 766 if self.convertfp is None:
767 767 self.convertfp = open(
768 768 os.path.join(self.wc, b'.svn', b'hg-shamap'), b'ab'
769 769 )
770 770 self.convertfp.write(
771 771 util.tonativeeol(b'%s %d\n' % (destrev, self.revnum(rev)))
772 772 )
773 773 self.convertfp.flush()
774 774
775 775 def revid(self, revnum, module=None):
776 776 return b'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
777 777
778 778 def revnum(self, rev):
779 779 return int(rev.split(b'@')[-1])
780 780
781 781 def latest(self, path, stop=None):
782 782 """Find the latest revid affecting path, up to stop revision
783 783 number. If stop is None, default to repository latest
784 784 revision. It may return a revision in a different module,
785 785 since a branch may be moved without a change being
786 786 reported. Return None if computed module does not belong to
787 787 rootmodule subtree.
788 788 """
789 789
790 790 def findchanges(path, start, stop=None):
791 791 stream = self._getlog([path], start, stop or 1)
792 792 try:
793 793 for entry in stream:
794 794 paths, revnum, author, date, message = entry
795 795 if stop is None and paths:
796 796 # We do not know the latest changed revision,
797 797 # keep the first one with changed paths.
798 798 break
799 799 if revnum <= stop:
800 800 break
801 801
802 802 for p in paths:
803 803 if not path.startswith(p) or not paths[p].copyfrom_path:
804 804 continue
805 805 newpath = paths[p].copyfrom_path + path[len(p) :]
806 806 self.ui.debug(
807 807 b"branch renamed from %s to %s at %d\n"
808 808 % (path, newpath, revnum)
809 809 )
810 810 path = newpath
811 811 break
812 812 if not paths:
813 813 revnum = None
814 814 return revnum, path
815 815 finally:
816 816 stream.close()
817 817
818 818 if not path.startswith(self.rootmodule):
819 819 # Requests on foreign branches may be forbidden at server level
820 820 self.ui.debug(b'ignoring foreign branch %r\n' % path)
821 821 return None
822 822
823 823 if stop is None:
824 824 stop = svn.ra.get_latest_revnum(self.ra)
825 825 try:
826 826 prevmodule = self.reparent(b'')
827 827 dirent = svn.ra.stat(self.ra, path.strip(b'/'), stop)
828 828 self.reparent(prevmodule)
829 829 except svn.core.SubversionException:
830 830 dirent = None
831 831 if not dirent:
832 832 raise SvnPathNotFound(
833 833 _(b'%s not found up to revision %d') % (path, stop)
834 834 )
835 835
836 836 # stat() gives us the previous revision on this line of
837 837 # development, but it might be in *another module*. Fetch the
838 838 # log and detect renames down to the latest revision.
839 839 revnum, realpath = findchanges(path, stop, dirent.created_rev)
840 840 if revnum is None:
841 841 # Tools like svnsync can create empty revision, when
842 842 # synchronizing only a subtree for instance. These empty
843 843 # revisions created_rev still have their original values
844 844 # despite all changes having disappeared and can be
845 845 # returned by ra.stat(), at least when stating the root
846 846 # module. In that case, do not trust created_rev and scan
847 847 # the whole history.
848 848 revnum, realpath = findchanges(path, stop)
849 849 if revnum is None:
850 850 self.ui.debug(b'ignoring empty branch %r\n' % realpath)
851 851 return None
852 852
853 853 if not realpath.startswith(self.rootmodule):
854 854 self.ui.debug(b'ignoring foreign branch %r\n' % realpath)
855 855 return None
856 856 return self.revid(revnum, realpath)
857 857
858 858 def reparent(self, module):
859 859 """Reparent the svn transport and return the previous parent."""
860 860 if self.prevmodule == module:
861 861 return module
862 862 svnurl = self.baseurl + quote(module)
863 863 prevmodule = self.prevmodule
864 864 if prevmodule is None:
865 865 prevmodule = b''
866 866 self.ui.debug(b"reparent to %s\n" % svnurl)
867 867 svn.ra.reparent(self.ra, svnurl)
868 868 self.prevmodule = module
869 869 return prevmodule
870 870
871 871 def expandpaths(self, rev, paths, parents):
872 872 changed, removed = set(), set()
873 873 copies = {}
874 874
875 875 new_module, revnum = revsplit(rev)[1:]
876 876 if new_module != self.module:
877 877 self.module = new_module
878 878 self.reparent(self.module)
879 879
880 880 progress = self.ui.makeprogress(
881 881 _(b'scanning paths'), unit=_(b'paths'), total=len(paths)
882 882 )
883 883 for i, (path, ent) in enumerate(paths):
884 884 progress.update(i, item=path)
885 885 entrypath = self.getrelpath(path)
886 886
887 887 kind = self._checkpath(entrypath, revnum)
888 888 if kind == svn.core.svn_node_file:
889 889 changed.add(self.recode(entrypath))
890 890 if not ent.copyfrom_path or not parents:
891 891 continue
892 892 # Copy sources not in parent revisions cannot be
893 893 # represented, ignore their origin for now
894 894 pmodule, prevnum = revsplit(parents[0])[1:]
895 895 if ent.copyfrom_rev < prevnum:
896 896 continue
897 897 copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
898 898 if not copyfrom_path:
899 899 continue
900 900 self.ui.debug(
901 901 b"copied to %s from %s@%s\n"
902 902 % (entrypath, copyfrom_path, ent.copyfrom_rev)
903 903 )
904 904 copies[self.recode(entrypath)] = self.recode(copyfrom_path)
905 905 elif kind == 0: # gone, but had better be a deleted *file*
906 906 self.ui.debug(b"gone from %s\n" % ent.copyfrom_rev)
907 907 pmodule, prevnum = revsplit(parents[0])[1:]
908 908 parentpath = pmodule + b"/" + entrypath
909 909 fromkind = self._checkpath(entrypath, prevnum, pmodule)
910 910
911 911 if fromkind == svn.core.svn_node_file:
912 912 removed.add(self.recode(entrypath))
913 913 elif fromkind == svn.core.svn_node_dir:
914 914 oroot = parentpath.strip(b'/')
915 915 nroot = path.strip(b'/')
916 916 children = self._iterfiles(oroot, prevnum)
917 917 for childpath in children:
918 918 childpath = childpath.replace(oroot, nroot)
919 919 childpath = self.getrelpath(b"/" + childpath, pmodule)
920 920 if childpath:
921 921 removed.add(self.recode(childpath))
922 922 else:
923 923 self.ui.debug(
924 924 b'unknown path in revision %d: %s\n' % (revnum, path)
925 925 )
926 926 elif kind == svn.core.svn_node_dir:
927 927 if ent.action == b'M':
928 928 # If the directory just had a prop change,
929 929 # then we shouldn't need to look for its children.
930 930 continue
931 931 if ent.action == b'R' and parents:
932 932 # If a directory is replacing a file, mark the previous
933 933 # file as deleted
934 934 pmodule, prevnum = revsplit(parents[0])[1:]
935 935 pkind = self._checkpath(entrypath, prevnum, pmodule)
936 936 if pkind == svn.core.svn_node_file:
937 937 removed.add(self.recode(entrypath))
938 938 elif pkind == svn.core.svn_node_dir:
939 939 # We do not know what files were kept or removed,
940 940 # mark them all as changed.
941 941 for childpath in self._iterfiles(pmodule, prevnum):
942 942 childpath = self.getrelpath(b"/" + childpath)
943 943 if childpath:
944 944 changed.add(self.recode(childpath))
945 945
946 946 for childpath in self._iterfiles(path, revnum):
947 947 childpath = self.getrelpath(b"/" + childpath)
948 948 if childpath:
949 949 changed.add(self.recode(childpath))
950 950
951 951 # Handle directory copies
952 952 if not ent.copyfrom_path or not parents:
953 953 continue
954 954 # Copy sources not in parent revisions cannot be
955 955 # represented, ignore their origin for now
956 956 pmodule, prevnum = revsplit(parents[0])[1:]
957 957 if ent.copyfrom_rev < prevnum:
958 958 continue
959 959 copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
960 960 if not copyfrompath:
961 961 continue
962 962 self.ui.debug(
963 963 b"mark %s came from %s:%d\n"
964 964 % (path, copyfrompath, ent.copyfrom_rev)
965 965 )
966 966 children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
967 967 for childpath in children:
968 968 childpath = self.getrelpath(b"/" + childpath, pmodule)
969 969 if not childpath:
970 970 continue
971 971 copytopath = path + childpath[len(copyfrompath) :]
972 972 copytopath = self.getrelpath(copytopath)
973 973 copies[self.recode(copytopath)] = self.recode(childpath)
974 974
975 975 progress.complete()
976 976 changed.update(removed)
977 977 return (list(changed), removed, copies)
978 978
979 979 def _fetch_revisions(self, from_revnum, to_revnum):
980 980 if from_revnum < to_revnum:
981 981 from_revnum, to_revnum = to_revnum, from_revnum
982 982
983 983 self.child_cset = None
984 984
985 985 def parselogentry(orig_paths, revnum, author, date, message):
986 986 """Return the parsed commit object or None, and True if
987 987 the revision is a branch root.
988 988 """
989 989 self.ui.debug(
990 990 b"parsing revision %d (%d changes)\n"
991 991 % (revnum, len(orig_paths))
992 992 )
993 993
994 994 branched = False
995 995 rev = self.revid(revnum)
996 996 # branch log might return entries for a parent we already have
997 997
998 998 if rev in self.commits or revnum < to_revnum:
999 999 return None, branched
1000 1000
1001 1001 parents = []
1002 1002 # check whether this revision is the start of a branch or part
1003 1003 # of a branch renaming
1004 1004 orig_paths = sorted(pycompat.iteritems(orig_paths))
1005 1005 root_paths = [
1006 1006 (p, e) for p, e in orig_paths if self.module.startswith(p)
1007 1007 ]
1008 1008 if root_paths:
1009 1009 path, ent = root_paths[-1]
1010 1010 if ent.copyfrom_path:
1011 1011 branched = True
1012 1012 newpath = ent.copyfrom_path + self.module[len(path) :]
1013 1013 # ent.copyfrom_rev may not be the actual last revision
1014 1014 previd = self.latest(newpath, ent.copyfrom_rev)
1015 1015 if previd is not None:
1016 1016 prevmodule, prevnum = revsplit(previd)[1:]
1017 1017 if prevnum >= self.startrev:
1018 1018 parents = [previd]
1019 1019 self.ui.note(
1020 1020 _(b'found parent of branch %s at %d: %s\n')
1021 1021 % (self.module, prevnum, prevmodule)
1022 1022 )
1023 1023 else:
1024 1024 self.ui.debug(b"no copyfrom path, don't know what to do.\n")
1025 1025
1026 1026 paths = []
1027 1027 # filter out unrelated paths
1028 1028 for path, ent in orig_paths:
1029 1029 if self.getrelpath(path) is None:
1030 1030 continue
1031 1031 paths.append((path, ent))
1032 1032
1033 1033 # Example SVN datetime. Includes microseconds.
1034 1034 # ISO-8601 conformant
1035 1035 # '2007-01-04T17:35:00.902377Z'
1036 1036 date = dateutil.parsedate(
1037 1037 date[:19] + b" UTC", [b"%Y-%m-%dT%H:%M:%S"]
1038 1038 )
1039 1039 if self.ui.configbool(b'convert', b'localtimezone'):
1040 1040 date = makedatetimestamp(date[0])
1041 1041
1042 1042 if message:
1043 1043 log = self.recode(message)
1044 1044 else:
1045 1045 log = b''
1046 1046
1047 1047 if author:
1048 1048 author = self.recode(author)
1049 1049 else:
1050 1050 author = b''
1051 1051
1052 1052 try:
1053 1053 branch = self.module.split(b"/")[-1]
1054 1054 if branch == self.trunkname:
1055 1055 branch = None
1056 1056 except IndexError:
1057 1057 branch = None
1058 1058
1059 1059 cset = commit(
1060 1060 author=author,
1061 1061 date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
1062 1062 desc=log,
1063 1063 parents=parents,
1064 1064 branch=branch,
1065 1065 rev=rev,
1066 1066 )
1067 1067
1068 1068 self.commits[rev] = cset
1069 1069 # The parents list is *shared* among self.paths and the
1070 1070 # commit object. Both will be updated below.
1071 1071 self.paths[rev] = (paths, cset.parents)
1072 1072 if self.child_cset and not self.child_cset.parents:
1073 1073 self.child_cset.parents[:] = [rev]
1074 1074 self.child_cset = cset
1075 1075 return cset, branched
1076 1076
1077 1077 self.ui.note(
1078 1078 _(b'fetching revision log for "%s" from %d to %d\n')
1079 1079 % (self.module, from_revnum, to_revnum)
1080 1080 )
1081 1081
1082 1082 try:
1083 1083 firstcset = None
1084 1084 lastonbranch = False
1085 1085 stream = self._getlog([self.module], from_revnum, to_revnum)
1086 1086 try:
1087 1087 for entry in stream:
1088 1088 paths, revnum, author, date, message = entry
1089 1089 if revnum < self.startrev:
1090 1090 lastonbranch = True
1091 1091 break
1092 1092 if not paths:
1093 1093 self.ui.debug(b'revision %d has no entries\n' % revnum)
1094 1094 # If we ever leave the loop on an empty
1095 1095 # revision, do not try to get a parent branch
1096 1096 lastonbranch = lastonbranch or revnum == 0
1097 1097 continue
1098 1098 cset, lastonbranch = parselogentry(
1099 1099 paths, revnum, author, date, message
1100 1100 )
1101 1101 if cset:
1102 1102 firstcset = cset
1103 1103 if lastonbranch:
1104 1104 break
1105 1105 finally:
1106 1106 stream.close()
1107 1107
1108 1108 if not lastonbranch and firstcset and not firstcset.parents:
1109 1109 # The first revision of the sequence (the last fetched one)
1110 1110 # has invalid parents if not a branch root. Find the parent
1111 1111 # revision now, if any.
1112 1112 try:
1113 1113 firstrevnum = self.revnum(firstcset.rev)
1114 1114 if firstrevnum > 1:
1115 1115 latest = self.latest(self.module, firstrevnum - 1)
1116 1116 if latest:
1117 1117 firstcset.parents.append(latest)
1118 1118 except SvnPathNotFound:
1119 1119 pass
1120 1120 except svn.core.SubversionException as xxx_todo_changeme:
1121 1121 (inst, num) = xxx_todo_changeme.args
1122 1122 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
1123 1123 raise error.Abort(
1124 1124 _(b'svn: branch has no revision %s') % to_revnum
1125 1125 )
1126 1126 raise
1127 1127
1128 1128 def getfile(self, file, rev):
1129 1129 # TODO: ra.get_file transmits the whole file instead of diffs.
1130 1130 if file in self.removed:
1131 1131 return None, None
1132 1132 try:
1133 1133 new_module, revnum = revsplit(rev)[1:]
1134 1134 if self.module != new_module:
1135 1135 self.module = new_module
1136 1136 self.reparent(self.module)
1137 1137 io = stringio()
1138 1138 info = svn.ra.get_file(self.ra, file, revnum, io)
1139 1139 data = io.getvalue()
1140 1140 # ra.get_file() seems to keep a reference on the input buffer
1141 1141 # preventing collection. Release it explicitly.
1142 1142 io.close()
1143 1143 if isinstance(info, list):
1144 1144 info = info[-1]
1145 1145 mode = (b"svn:executable" in info) and b'x' or b''
1146 1146 mode = (b"svn:special" in info) and b'l' or mode
1147 1147 except svn.core.SubversionException as e:
1148 1148 notfound = (
1149 1149 svn.core.SVN_ERR_FS_NOT_FOUND,
1150 1150 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND,
1151 1151 )
1152 1152 if e.apr_err in notfound: # File not found
1153 1153 return None, None
1154 1154 raise
1155 1155 if mode == b'l':
1156 1156 link_prefix = b"link "
1157 1157 if data.startswith(link_prefix):
1158 1158 data = data[len(link_prefix) :]
1159 1159 return data, mode
1160 1160
1161 1161 def _iterfiles(self, path, revnum):
1162 1162 """Enumerate all files in path at revnum, recursively."""
1163 1163 path = path.strip(b'/')
1164 1164 pool = svn.core.Pool()
1165 1165 rpath = b'/'.join([self.baseurl, quote(path)]).strip(b'/')
1166 1166 entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
1167 1167 if path:
1168 1168 path += b'/'
1169 1169 return (
1170 1170 (path + p)
1171 1171 for p, e in pycompat.iteritems(entries)
1172 1172 if e.kind == svn.core.svn_node_file
1173 1173 )
1174 1174
1175 1175 def getrelpath(self, path, module=None):
1176 1176 if module is None:
1177 1177 module = self.module
1178 1178 # Given the repository url of this wc, say
1179 1179 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
1180 1180 # extract the "entry" portion (a relative path) from what
1181 1181 # svn log --xml says, i.e.
1182 1182 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
1183 1183 # that is to say "tests/PloneTestCase.py"
1184 1184 if path.startswith(module):
1185 1185 relative = path.rstrip(b'/')[len(module) :]
1186 1186 if relative.startswith(b'/'):
1187 1187 return relative[1:]
1188 1188 elif relative == b'':
1189 1189 return relative
1190 1190
1191 1191 # The path is outside our tracked tree...
1192 1192 self.ui.debug(b'%r is not under %r, ignoring\n' % (path, module))
1193 1193 return None
1194 1194
1195 1195 def _checkpath(self, path, revnum, module=None):
1196 1196 if module is not None:
1197 1197 prevmodule = self.reparent(b'')
1198 1198 path = module + b'/' + path
1199 1199 try:
1200 1200 # ra.check_path does not like leading slashes very much, it leads
1201 1201 # to PROPFIND subversion errors
1202 1202 return svn.ra.check_path(self.ra, path.strip(b'/'), revnum)
1203 1203 finally:
1204 1204 if module is not None:
1205 1205 self.reparent(prevmodule)
1206 1206
1207 1207 def _getlog(
1208 1208 self,
1209 1209 paths,
1210 1210 start,
1211 1211 end,
1212 1212 limit=0,
1213 1213 discover_changed_paths=True,
1214 1214 strict_node_history=False,
1215 1215 ):
1216 1216 # Normalize path names, svn >= 1.5 only wants paths relative to
1217 1217 # supplied URL
1218 1218 relpaths = []
1219 1219 for p in paths:
1220 1220 if not p.startswith(b'/'):
1221 1221 p = self.module + b'/' + p
1222 1222 relpaths.append(p.strip(b'/'))
1223 1223 args = [
1224 1224 self.baseurl,
1225 1225 relpaths,
1226 1226 start,
1227 1227 end,
1228 1228 limit,
1229 1229 discover_changed_paths,
1230 1230 strict_node_history,
1231 1231 ]
1232 1232 # developer config: convert.svn.debugsvnlog
1233 1233 if not self.ui.configbool(b'convert', b'svn.debugsvnlog'):
1234 1234 return directlogstream(*args)
1235 1235 arg = encodeargs(args)
1236 1236 hgexe = procutil.hgexecutable()
1237 1237 cmd = b'%s debugsvnlog' % procutil.shellquote(hgexe)
1238 1238 stdin, stdout = procutil.popen2(procutil.quotecommand(cmd))
1239 1239 stdin.write(arg)
1240 1240 try:
1241 1241 stdin.close()
1242 1242 except IOError:
1243 1243 raise error.Abort(
1244 1244 _(
1245 1245 b'Mercurial failed to run itself, check'
1246 1246 b' hg executable is in PATH'
1247 1247 )
1248 1248 )
1249 1249 return logstream(stdout)
1250 1250
1251 1251
1252 1252 pre_revprop_change = b'''#!/bin/sh
1253 1253
1254 1254 REPOS="$1"
1255 1255 REV="$2"
1256 1256 USER="$3"
1257 1257 PROPNAME="$4"
1258 1258 ACTION="$5"
1259 1259
1260 1260 if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
1261 1261 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
1262 1262 if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
1263 1263
1264 1264 echo "Changing prohibited revision property" >&2
1265 1265 exit 1
1266 1266 '''
1267 1267
1268 1268
1269 1269 class svn_sink(converter_sink, commandline):
1270 1270 commit_re = re.compile(br'Committed revision (\d+).', re.M)
1271 1271 uuid_re = re.compile(br'Repository UUID:\s*(\S+)', re.M)
1272 1272
1273 1273 def prerun(self):
1274 1274 if self.wc:
1275 1275 os.chdir(self.wc)
1276 1276
1277 1277 def postrun(self):
1278 1278 if self.wc:
1279 1279 os.chdir(self.cwd)
1280 1280
1281 1281 def join(self, name):
1282 1282 return os.path.join(self.wc, b'.svn', name)
1283 1283
1284 1284 def revmapfile(self):
1285 1285 return self.join(b'hg-shamap')
1286 1286
1287 1287 def authorfile(self):
1288 1288 return self.join(b'hg-authormap')
1289 1289
1290 1290 def __init__(self, ui, repotype, path):
1291 1291
1292 1292 converter_sink.__init__(self, ui, repotype, path)
1293 1293 commandline.__init__(self, ui, b'svn')
1294 1294 self.delete = []
1295 1295 self.setexec = []
1296 1296 self.delexec = []
1297 1297 self.copies = []
1298 1298 self.wc = None
1299 1299 self.cwd = encoding.getcwd()
1300 1300
1301 1301 created = False
1302 1302 if os.path.isfile(os.path.join(path, b'.svn', b'entries')):
1303 1303 self.wc = os.path.realpath(path)
1304 1304 self.run0(b'update')
1305 1305 else:
1306 if not re.search(br'^(file|http|https|svn|svn\+ssh)\://', path):
1306 if not re.search(br'^(file|http|https|svn|svn\+ssh)://', path):
1307 1307 path = os.path.realpath(path)
1308 1308 if os.path.isdir(os.path.dirname(path)):
1309 1309 if not os.path.exists(
1310 1310 os.path.join(path, b'db', b'fs-type')
1311 1311 ):
1312 1312 ui.status(
1313 1313 _(b"initializing svn repository '%s'\n")
1314 1314 % os.path.basename(path)
1315 1315 )
1316 1316 commandline(ui, b'svnadmin').run0(b'create', path)
1317 1317 created = path
1318 1318 path = util.normpath(path)
1319 1319 if not path.startswith(b'/'):
1320 1320 path = b'/' + path
1321 1321 path = b'file://' + path
1322 1322
1323 1323 wcpath = os.path.join(
1324 1324 encoding.getcwd(), os.path.basename(path) + b'-wc'
1325 1325 )
1326 1326 ui.status(
1327 1327 _(b"initializing svn working copy '%s'\n")
1328 1328 % os.path.basename(wcpath)
1329 1329 )
1330 1330 self.run0(b'checkout', path, wcpath)
1331 1331
1332 1332 self.wc = wcpath
1333 1333 self.opener = vfsmod.vfs(self.wc)
1334 1334 self.wopener = vfsmod.vfs(self.wc)
1335 1335 self.childmap = mapfile(ui, self.join(b'hg-childmap'))
1336 1336 if util.checkexec(self.wc):
1337 1337 self.is_exec = util.isexec
1338 1338 else:
1339 1339 self.is_exec = None
1340 1340
1341 1341 if created:
1342 1342 hook = os.path.join(created, b'hooks', b'pre-revprop-change')
1343 1343 fp = open(hook, b'wb')
1344 1344 fp.write(pre_revprop_change)
1345 1345 fp.close()
1346 1346 util.setflags(hook, False, True)
1347 1347
1348 1348 output = self.run0(b'info')
1349 1349 self.uuid = self.uuid_re.search(output).group(1).strip()
1350 1350
1351 1351 def wjoin(self, *names):
1352 1352 return os.path.join(self.wc, *names)
1353 1353
1354 1354 @propertycache
1355 1355 def manifest(self):
1356 1356 # As of svn 1.7, the "add" command fails when receiving
1357 1357 # already tracked entries, so we have to track and filter them
1358 1358 # ourselves.
1359 1359 m = set()
1360 1360 output = self.run0(b'ls', recursive=True, xml=True)
1361 1361 doc = xml.dom.minidom.parseString(output)
1362 1362 for e in doc.getElementsByTagName('entry'):
1363 1363 for n in e.childNodes:
1364 1364 if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
1365 1365 continue
1366 1366 name = ''.join(
1367 1367 c.data for c in n.childNodes if c.nodeType == c.TEXT_NODE
1368 1368 )
1369 1369 # Entries are compared with names coming from
1370 1370 # mercurial, so bytes with undefined encoding. Our
1371 1371 # best bet is to assume they are in local
1372 1372 # encoding. They will be passed to command line calls
1373 1373 # later anyway, so they better be.
1374 1374 m.add(encoding.unitolocal(name))
1375 1375 break
1376 1376 return m
1377 1377
1378 1378 def putfile(self, filename, flags, data):
1379 1379 if b'l' in flags:
1380 1380 self.wopener.symlink(data, filename)
1381 1381 else:
1382 1382 try:
1383 1383 if os.path.islink(self.wjoin(filename)):
1384 1384 os.unlink(filename)
1385 1385 except OSError:
1386 1386 pass
1387 1387
1388 1388 if self.is_exec:
1389 1389 # We need to check executability of the file before the change,
1390 1390 # because `vfs.write` is able to reset exec bit.
1391 1391 wasexec = False
1392 1392 if os.path.exists(self.wjoin(filename)):
1393 1393 wasexec = self.is_exec(self.wjoin(filename))
1394 1394
1395 1395 self.wopener.write(filename, data)
1396 1396
1397 1397 if self.is_exec:
1398 1398 if wasexec:
1399 1399 if b'x' not in flags:
1400 1400 self.delexec.append(filename)
1401 1401 else:
1402 1402 if b'x' in flags:
1403 1403 self.setexec.append(filename)
1404 1404 util.setflags(self.wjoin(filename), False, b'x' in flags)
1405 1405
1406 1406 def _copyfile(self, source, dest):
1407 1407 # SVN's copy command pukes if the destination file exists, but
1408 1408 # our copyfile method expects to record a copy that has
1409 1409 # already occurred. Cross the semantic gap.
1410 1410 wdest = self.wjoin(dest)
1411 1411 exists = os.path.lexists(wdest)
1412 1412 if exists:
1413 1413 fd, tempname = pycompat.mkstemp(
1414 1414 prefix=b'hg-copy-', dir=os.path.dirname(wdest)
1415 1415 )
1416 1416 os.close(fd)
1417 1417 os.unlink(tempname)
1418 1418 os.rename(wdest, tempname)
1419 1419 try:
1420 1420 self.run0(b'copy', source, dest)
1421 1421 finally:
1422 1422 self.manifest.add(dest)
1423 1423 if exists:
1424 1424 try:
1425 1425 os.unlink(wdest)
1426 1426 except OSError:
1427 1427 pass
1428 1428 os.rename(tempname, wdest)
1429 1429
1430 1430 def dirs_of(self, files):
1431 1431 dirs = set()
1432 1432 for f in files:
1433 1433 if os.path.isdir(self.wjoin(f)):
1434 1434 dirs.add(f)
1435 1435 i = len(f)
1436 1436 for i in iter(lambda: f.rfind(b'/', 0, i), -1):
1437 1437 dirs.add(f[:i])
1438 1438 return dirs
1439 1439
1440 1440 def add_dirs(self, files):
1441 1441 add_dirs = [
1442 1442 d for d in sorted(self.dirs_of(files)) if d not in self.manifest
1443 1443 ]
1444 1444 if add_dirs:
1445 1445 self.manifest.update(add_dirs)
1446 1446 self.xargs(add_dirs, b'add', non_recursive=True, quiet=True)
1447 1447 return add_dirs
1448 1448
1449 1449 def add_files(self, files):
1450 1450 files = [f for f in files if f not in self.manifest]
1451 1451 if files:
1452 1452 self.manifest.update(files)
1453 1453 self.xargs(files, b'add', quiet=True)
1454 1454 return files
1455 1455
1456 1456 def addchild(self, parent, child):
1457 1457 self.childmap[parent] = child
1458 1458
1459 1459 def revid(self, rev):
1460 1460 return b"svn:%s@%s" % (self.uuid, rev)
1461 1461
1462 1462 def putcommit(
1463 1463 self, files, copies, parents, commit, source, revmap, full, cleanp2
1464 1464 ):
1465 1465 for parent in parents:
1466 1466 try:
1467 1467 return self.revid(self.childmap[parent])
1468 1468 except KeyError:
1469 1469 pass
1470 1470
1471 1471 # Apply changes to working copy
1472 1472 for f, v in files:
1473 1473 data, mode = source.getfile(f, v)
1474 1474 if data is None:
1475 1475 self.delete.append(f)
1476 1476 else:
1477 1477 self.putfile(f, mode, data)
1478 1478 if f in copies:
1479 1479 self.copies.append([copies[f], f])
1480 1480 if full:
1481 1481 self.delete.extend(sorted(self.manifest.difference(files)))
1482 1482 files = [f[0] for f in files]
1483 1483
1484 1484 entries = set(self.delete)
1485 1485 files = frozenset(files)
1486 1486 entries.update(self.add_dirs(files.difference(entries)))
1487 1487 if self.copies:
1488 1488 for s, d in self.copies:
1489 1489 self._copyfile(s, d)
1490 1490 self.copies = []
1491 1491 if self.delete:
1492 1492 self.xargs(self.delete, b'delete')
1493 1493 for f in self.delete:
1494 1494 self.manifest.remove(f)
1495 1495 self.delete = []
1496 1496 entries.update(self.add_files(files.difference(entries)))
1497 1497 if self.delexec:
1498 1498 self.xargs(self.delexec, b'propdel', b'svn:executable')
1499 1499 self.delexec = []
1500 1500 if self.setexec:
1501 1501 self.xargs(self.setexec, b'propset', b'svn:executable', b'*')
1502 1502 self.setexec = []
1503 1503
1504 1504 fd, messagefile = pycompat.mkstemp(prefix=b'hg-convert-')
1505 1505 fp = os.fdopen(fd, 'wb')
1506 1506 fp.write(util.tonativeeol(commit.desc))
1507 1507 fp.close()
1508 1508 try:
1509 1509 output = self.run0(
1510 1510 b'commit',
1511 1511 username=stringutil.shortuser(commit.author),
1512 1512 file=messagefile,
1513 1513 encoding=b'utf-8',
1514 1514 )
1515 1515 try:
1516 1516 rev = self.commit_re.search(output).group(1)
1517 1517 except AttributeError:
1518 1518 if not files:
1519 1519 return parents[0] if parents else b'None'
1520 1520 self.ui.warn(_(b'unexpected svn output:\n'))
1521 1521 self.ui.warn(output)
1522 1522 raise error.Abort(_(b'unable to cope with svn output'))
1523 1523 if commit.rev:
1524 1524 self.run(
1525 1525 b'propset',
1526 1526 b'hg:convert-rev',
1527 1527 commit.rev,
1528 1528 revprop=True,
1529 1529 revision=rev,
1530 1530 )
1531 1531 if commit.branch and commit.branch != b'default':
1532 1532 self.run(
1533 1533 b'propset',
1534 1534 b'hg:convert-branch',
1535 1535 commit.branch,
1536 1536 revprop=True,
1537 1537 revision=rev,
1538 1538 )
1539 1539 for parent in parents:
1540 1540 self.addchild(parent, rev)
1541 1541 return self.revid(rev)
1542 1542 finally:
1543 1543 os.unlink(messagefile)
1544 1544
1545 1545 def puttags(self, tags):
1546 1546 self.ui.warn(_(b'writing Subversion tags is not yet implemented\n'))
1547 1547 return None, None
1548 1548
1549 1549 def hascommitfrommap(self, rev):
1550 1550 # We trust that revisions referenced in a map still is present
1551 1551 # TODO: implement something better if necessary and feasible
1552 1552 return True
1553 1553
1554 1554 def hascommitforsplicemap(self, rev):
1555 1555 # This is not correct as one can convert to an existing subversion
1556 1556 # repository and childmap would not list all revisions. Too bad.
1557 1557 if rev in self.childmap:
1558 1558 return True
1559 1559 raise error.Abort(
1560 1560 _(
1561 1561 b'splice map revision %s not found in subversion '
1562 1562 b'child map (revision lookups are not implemented)'
1563 1563 )
1564 1564 % rev
1565 1565 )
@@ -1,150 +1,150 b''
1 1 # Copyright 2009, Alexander Solovyov <piranha@piranha.org.ua>
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """extend schemes with shortcuts to repository swarms
7 7
8 8 This extension allows you to specify shortcuts for parent URLs with a
9 9 lot of repositories to act like a scheme, for example::
10 10
11 11 [schemes]
12 12 py = http://code.python.org/hg/
13 13
14 14 After that you can use it like::
15 15
16 16 hg clone py://trunk/
17 17
18 18 Additionally there is support for some more complex schemas, for
19 19 example used by Google Code::
20 20
21 21 [schemes]
22 22 gcode = http://{1}.googlecode.com/hg/
23 23
24 24 The syntax is taken from Mercurial templates, and you have unlimited
25 25 number of variables, starting with ``{1}`` and continuing with
26 26 ``{2}``, ``{3}`` and so on. This variables will receive parts of URL
27 27 supplied, split by ``/``. Anything not specified as ``{part}`` will be
28 28 just appended to an URL.
29 29
30 30 For convenience, the extension adds these schemes by default::
31 31
32 32 [schemes]
33 33 py = http://hg.python.org/
34 34 bb = https://bitbucket.org/
35 35 bb+ssh = ssh://hg@bitbucket.org/
36 36 gcode = https://{1}.googlecode.com/hg/
37 37 kiln = https://{1}.kilnhg.com/Repo/
38 38
39 39 You can override a predefined scheme by defining a new scheme with the
40 40 same name.
41 41 """
42 42 from __future__ import absolute_import
43 43
44 44 import os
45 45 import re
46 46
47 47 from mercurial.i18n import _
48 48 from mercurial import (
49 49 error,
50 50 extensions,
51 51 hg,
52 52 pycompat,
53 53 registrar,
54 54 templater,
55 55 util,
56 56 )
57 57
58 58 cmdtable = {}
59 59 command = registrar.command(cmdtable)
60 60 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
61 61 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
62 62 # be specifying the version(s) of Mercurial they are tested with, or
63 63 # leave the attribute unspecified.
64 64 testedwith = b'ships-with-hg-core'
65 65
66 _partre = re.compile(br'\{(\d+)\}')
66 _partre = re.compile(br'{(\d+)\}')
67 67
68 68
69 69 class ShortRepository(object):
70 70 def __init__(self, url, scheme, templater):
71 71 self.scheme = scheme
72 72 self.templater = templater
73 73 self.url = url
74 74 try:
75 75 self.parts = max(map(int, _partre.findall(self.url)))
76 76 except ValueError:
77 77 self.parts = 0
78 78
79 79 def __repr__(self):
80 80 return b'<ShortRepository: %s>' % self.scheme
81 81
82 82 def instance(self, ui, url, create, intents=None, createopts=None):
83 83 url = self.resolve(url)
84 84 return hg._peerlookup(url).instance(
85 85 ui, url, create, intents=intents, createopts=createopts
86 86 )
87 87
88 88 def resolve(self, url):
89 89 # Should this use the util.url class, or is manual parsing better?
90 90 try:
91 91 url = url.split(b'://', 1)[1]
92 92 except IndexError:
93 93 raise error.Abort(_(b"no '://' in scheme url '%s'") % url)
94 94 parts = url.split(b'/', self.parts)
95 95 if len(parts) > self.parts:
96 96 tail = parts[-1]
97 97 parts = parts[:-1]
98 98 else:
99 99 tail = b''
100 100 context = dict((b'%d' % (i + 1), v) for i, v in enumerate(parts))
101 101 return b''.join(self.templater.process(self.url, context)) + tail
102 102
103 103
104 104 def hasdriveletter(orig, path):
105 105 if path:
106 106 for scheme in schemes:
107 107 if path.startswith(scheme + b':'):
108 108 return False
109 109 return orig(path)
110 110
111 111
112 112 schemes = {
113 113 b'py': b'http://hg.python.org/',
114 114 b'bb': b'https://bitbucket.org/',
115 115 b'bb+ssh': b'ssh://hg@bitbucket.org/',
116 116 b'gcode': b'https://{1}.googlecode.com/hg/',
117 117 b'kiln': b'https://{1}.kilnhg.com/Repo/',
118 118 }
119 119
120 120
121 121 def extsetup(ui):
122 122 schemes.update(dict(ui.configitems(b'schemes')))
123 123 t = templater.engine(templater.parse)
124 124 for scheme, url in schemes.items():
125 125 if (
126 126 pycompat.iswindows
127 127 and len(scheme) == 1
128 128 and scheme.isalpha()
129 129 and os.path.exists(b'%s:\\' % scheme)
130 130 ):
131 131 raise error.Abort(
132 132 _(
133 133 b'custom scheme %s:// conflicts with drive '
134 134 b'letter %s:\\\n'
135 135 )
136 136 % (scheme, scheme.upper())
137 137 )
138 138 hg.schemes[scheme] = ShortRepository(url, scheme, t)
139 139
140 140 extensions.wrapfunction(util, b'hasdriveletter', hasdriveletter)
141 141
142 142
143 143 @command(b'debugexpandscheme', norepo=True)
144 144 def expandscheme(ui, url, **opts):
145 145 """given a repo path, provide the scheme-expanded path
146 146 """
147 147 repo = hg._peerlookup(url)
148 148 if isinstance(repo, ShortRepository):
149 149 url = repo.resolve(url)
150 150 ui.write(url + b'\n')
@@ -1,812 +1,812 b''
1 1 # stringutil.py - utility for generic string formatting, parsing, etc.
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import ast
13 13 import codecs
14 14 import re as remod
15 15 import textwrap
16 16 import types
17 17
18 18 from ..i18n import _
19 19 from ..thirdparty import attr
20 20
21 21 from .. import (
22 22 encoding,
23 23 error,
24 24 pycompat,
25 25 )
26 26
27 27 # regex special chars pulled from https://bugs.python.org/issue29995
28 28 # which was part of Python 3.7.
29 29 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
30 30 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
31 31 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
32 32
33 33
34 34 def reescape(pat):
35 35 """Drop-in replacement for re.escape."""
36 36 # NOTE: it is intentional that this works on unicodes and not
37 37 # bytes, as it's only possible to do the escaping with
38 38 # unicode.translate, not bytes.translate. Sigh.
39 39 wantuni = True
40 40 if isinstance(pat, bytes):
41 41 wantuni = False
42 42 pat = pat.decode('latin1')
43 43 pat = pat.translate(_regexescapemap)
44 44 if wantuni:
45 45 return pat
46 46 return pat.encode('latin1')
47 47
48 48
49 49 def pprint(o, bprefix=False, indent=0, level=0):
50 50 """Pretty print an object."""
51 51 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
52 52
53 53
54 54 def pprintgen(o, bprefix=False, indent=0, level=0):
55 55 """Pretty print an object to a generator of atoms.
56 56
57 57 ``bprefix`` is a flag influencing whether bytestrings are preferred with
58 58 a ``b''`` prefix.
59 59
60 60 ``indent`` controls whether collections and nested data structures
61 61 span multiple lines via the indentation amount in spaces. By default,
62 62 no newlines are emitted.
63 63
64 64 ``level`` specifies the initial indent level. Used if ``indent > 0``.
65 65 """
66 66
67 67 if isinstance(o, bytes):
68 68 if bprefix:
69 69 yield b"b'%s'" % escapestr(o)
70 70 else:
71 71 yield b"'%s'" % escapestr(o)
72 72 elif isinstance(o, bytearray):
73 73 # codecs.escape_encode() can't handle bytearray, so escapestr fails
74 74 # without coercion.
75 75 yield b"bytearray['%s']" % escapestr(bytes(o))
76 76 elif isinstance(o, list):
77 77 if not o:
78 78 yield b'[]'
79 79 return
80 80
81 81 yield b'['
82 82
83 83 if indent:
84 84 level += 1
85 85 yield b'\n'
86 86 yield b' ' * (level * indent)
87 87
88 88 for i, a in enumerate(o):
89 89 for chunk in pprintgen(
90 90 a, bprefix=bprefix, indent=indent, level=level
91 91 ):
92 92 yield chunk
93 93
94 94 if i + 1 < len(o):
95 95 if indent:
96 96 yield b',\n'
97 97 yield b' ' * (level * indent)
98 98 else:
99 99 yield b', '
100 100
101 101 if indent:
102 102 level -= 1
103 103 yield b'\n'
104 104 yield b' ' * (level * indent)
105 105
106 106 yield b']'
107 107 elif isinstance(o, dict):
108 108 if not o:
109 109 yield b'{}'
110 110 return
111 111
112 112 yield b'{'
113 113
114 114 if indent:
115 115 level += 1
116 116 yield b'\n'
117 117 yield b' ' * (level * indent)
118 118
119 119 for i, (k, v) in enumerate(sorted(o.items())):
120 120 for chunk in pprintgen(
121 121 k, bprefix=bprefix, indent=indent, level=level
122 122 ):
123 123 yield chunk
124 124
125 125 yield b': '
126 126
127 127 for chunk in pprintgen(
128 128 v, bprefix=bprefix, indent=indent, level=level
129 129 ):
130 130 yield chunk
131 131
132 132 if i + 1 < len(o):
133 133 if indent:
134 134 yield b',\n'
135 135 yield b' ' * (level * indent)
136 136 else:
137 137 yield b', '
138 138
139 139 if indent:
140 140 level -= 1
141 141 yield b'\n'
142 142 yield b' ' * (level * indent)
143 143
144 144 yield b'}'
145 145 elif isinstance(o, set):
146 146 if not o:
147 147 yield b'set([])'
148 148 return
149 149
150 150 yield b'set(['
151 151
152 152 if indent:
153 153 level += 1
154 154 yield b'\n'
155 155 yield b' ' * (level * indent)
156 156
157 157 for i, k in enumerate(sorted(o)):
158 158 for chunk in pprintgen(
159 159 k, bprefix=bprefix, indent=indent, level=level
160 160 ):
161 161 yield chunk
162 162
163 163 if i + 1 < len(o):
164 164 if indent:
165 165 yield b',\n'
166 166 yield b' ' * (level * indent)
167 167 else:
168 168 yield b', '
169 169
170 170 if indent:
171 171 level -= 1
172 172 yield b'\n'
173 173 yield b' ' * (level * indent)
174 174
175 175 yield b'])'
176 176 elif isinstance(o, tuple):
177 177 if not o:
178 178 yield b'()'
179 179 return
180 180
181 181 yield b'('
182 182
183 183 if indent:
184 184 level += 1
185 185 yield b'\n'
186 186 yield b' ' * (level * indent)
187 187
188 188 for i, a in enumerate(o):
189 189 for chunk in pprintgen(
190 190 a, bprefix=bprefix, indent=indent, level=level
191 191 ):
192 192 yield chunk
193 193
194 194 if i + 1 < len(o):
195 195 if indent:
196 196 yield b',\n'
197 197 yield b' ' * (level * indent)
198 198 else:
199 199 yield b', '
200 200
201 201 if indent:
202 202 level -= 1
203 203 yield b'\n'
204 204 yield b' ' * (level * indent)
205 205
206 206 yield b')'
207 207 elif isinstance(o, types.GeneratorType):
208 208 # Special case of empty generator.
209 209 try:
210 210 nextitem = next(o)
211 211 except StopIteration:
212 212 yield b'gen[]'
213 213 return
214 214
215 215 yield b'gen['
216 216
217 217 if indent:
218 218 level += 1
219 219 yield b'\n'
220 220 yield b' ' * (level * indent)
221 221
222 222 last = False
223 223
224 224 while not last:
225 225 current = nextitem
226 226
227 227 try:
228 228 nextitem = next(o)
229 229 except StopIteration:
230 230 last = True
231 231
232 232 for chunk in pprintgen(
233 233 current, bprefix=bprefix, indent=indent, level=level
234 234 ):
235 235 yield chunk
236 236
237 237 if not last:
238 238 if indent:
239 239 yield b',\n'
240 240 yield b' ' * (level * indent)
241 241 else:
242 242 yield b', '
243 243
244 244 if indent:
245 245 level -= 1
246 246 yield b'\n'
247 247 yield b' ' * (level * indent)
248 248
249 249 yield b']'
250 250 else:
251 251 yield pycompat.byterepr(o)
252 252
253 253
254 254 def prettyrepr(o):
255 255 """Pretty print a representation of a possibly-nested object"""
256 256 lines = []
257 257 rs = pycompat.byterepr(o)
258 258 p0 = p1 = 0
259 259 while p0 < len(rs):
260 260 # '... field=<type ... field=<type ...'
261 261 # ~~~~~~~~~~~~~~~~
262 262 # p0 p1 q0 q1
263 263 q0 = -1
264 264 q1 = rs.find(b'<', p1 + 1)
265 265 if q1 < 0:
266 266 q1 = len(rs)
267 267 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
268 268 # backtrack for ' field=<'
269 269 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
270 270 if q0 < 0:
271 271 q0 = q1
272 272 else:
273 273 q0 += 1 # skip ' '
274 274 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
275 275 assert l >= 0
276 276 lines.append((l, rs[p0:q0].rstrip()))
277 277 p0, p1 = q0, q1
278 278 return b'\n'.join(b' ' * l + s for l, s in lines)
279 279
280 280
281 281 def buildrepr(r):
282 282 """Format an optional printable representation from unexpanded bits
283 283
284 284 ======== =================================
285 285 type(r) example
286 286 ======== =================================
287 287 tuple ('<not %r>', other)
288 288 bytes '<branch closed>'
289 289 callable lambda: '<branch %r>' % sorted(b)
290 290 object other
291 291 ======== =================================
292 292 """
293 293 if r is None:
294 294 return b''
295 295 elif isinstance(r, tuple):
296 296 return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
297 297 elif isinstance(r, bytes):
298 298 return r
299 299 elif callable(r):
300 300 return r()
301 301 else:
302 302 return pprint(r)
303 303
304 304
305 305 def binary(s):
306 306 """return true if a string is binary data"""
307 307 return bool(s and b'\0' in s)
308 308
309 309
310 310 def stringmatcher(pattern, casesensitive=True):
311 311 """
312 312 accepts a string, possibly starting with 're:' or 'literal:' prefix.
313 313 returns the matcher name, pattern, and matcher function.
314 314 missing or unknown prefixes are treated as literal matches.
315 315
316 316 helper for tests:
317 317 >>> def test(pattern, *tests):
318 318 ... kind, pattern, matcher = stringmatcher(pattern)
319 319 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
320 320 >>> def itest(pattern, *tests):
321 321 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
322 322 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
323 323
324 324 exact matching (no prefix):
325 325 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
326 326 ('literal', 'abcdefg', [False, False, True])
327 327
328 328 regex matching ('re:' prefix)
329 329 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
330 330 ('re', 'a.+b', [False, False, True])
331 331
332 332 force exact matches ('literal:' prefix)
333 333 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
334 334 ('literal', 're:foobar', [False, True])
335 335
336 336 unknown prefixes are ignored and treated as literals
337 337 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
338 338 ('literal', 'foo:bar', [False, False, True])
339 339
340 340 case insensitive regex matches
341 341 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
342 342 ('re', 'A.+b', [False, False, True])
343 343
344 344 case insensitive literal matches
345 345 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
346 346 ('literal', 'ABCDEFG', [False, False, True])
347 347 """
348 348 if pattern.startswith(b're:'):
349 349 pattern = pattern[3:]
350 350 try:
351 351 flags = 0
352 352 if not casesensitive:
353 353 flags = remod.I
354 354 regex = remod.compile(pattern, flags)
355 355 except remod.error as e:
356 356 raise error.ParseError(_(b'invalid regular expression: %s') % e)
357 357 return b're', pattern, regex.search
358 358 elif pattern.startswith(b'literal:'):
359 359 pattern = pattern[8:]
360 360
361 361 match = pattern.__eq__
362 362
363 363 if not casesensitive:
364 364 ipat = encoding.lower(pattern)
365 365 match = lambda s: ipat == encoding.lower(s)
366 366 return b'literal', pattern, match
367 367
368 368
369 369 def shortuser(user):
370 370 """Return a short representation of a user name or email address."""
371 371 f = user.find(b'@')
372 372 if f >= 0:
373 373 user = user[:f]
374 374 f = user.find(b'<')
375 375 if f >= 0:
376 376 user = user[f + 1 :]
377 377 f = user.find(b' ')
378 378 if f >= 0:
379 379 user = user[:f]
380 380 f = user.find(b'.')
381 381 if f >= 0:
382 382 user = user[:f]
383 383 return user
384 384
385 385
386 386 def emailuser(user):
387 387 """Return the user portion of an email address."""
388 388 f = user.find(b'@')
389 389 if f >= 0:
390 390 user = user[:f]
391 391 f = user.find(b'<')
392 392 if f >= 0:
393 393 user = user[f + 1 :]
394 394 return user
395 395
396 396
397 397 def email(author):
398 398 '''get email of author.'''
399 399 r = author.find(b'>')
400 400 if r == -1:
401 401 r = None
402 402 return author[author.find(b'<') + 1 : r]
403 403
404 404
405 405 def person(author):
406 406 """Returns the name before an email address,
407 407 interpreting it as per RFC 5322
408 408
409 409 >>> person(b'foo@bar')
410 410 'foo'
411 411 >>> person(b'Foo Bar <foo@bar>')
412 412 'Foo Bar'
413 413 >>> person(b'"Foo Bar" <foo@bar>')
414 414 'Foo Bar'
415 415 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
416 416 'Foo "buz" Bar'
417 417 >>> # The following are invalid, but do exist in real-life
418 418 ...
419 419 >>> person(b'Foo "buz" Bar <foo@bar>')
420 420 'Foo "buz" Bar'
421 421 >>> person(b'"Foo Bar <foo@bar>')
422 422 'Foo Bar'
423 423 """
424 424 if b'@' not in author:
425 425 return author
426 426 f = author.find(b'<')
427 427 if f != -1:
428 428 return author[:f].strip(b' "').replace(b'\\"', b'"')
429 429 f = author.find(b'@')
430 430 return author[:f].replace(b'.', b' ')
431 431
432 432
433 433 @attr.s(hash=True)
434 434 class mailmapping(object):
435 435 '''Represents a username/email key or value in
436 436 a mailmap file'''
437 437
438 438 email = attr.ib()
439 439 name = attr.ib(default=None)
440 440
441 441
442 442 def _ismailmaplineinvalid(names, emails):
443 443 '''Returns True if the parsed names and emails
444 444 in a mailmap entry are invalid.
445 445
446 446 >>> # No names or emails fails
447 447 >>> names, emails = [], []
448 448 >>> _ismailmaplineinvalid(names, emails)
449 449 True
450 450 >>> # Only one email fails
451 451 >>> emails = [b'email@email.com']
452 452 >>> _ismailmaplineinvalid(names, emails)
453 453 True
454 454 >>> # One email and one name passes
455 455 >>> names = [b'Test Name']
456 456 >>> _ismailmaplineinvalid(names, emails)
457 457 False
458 458 >>> # No names but two emails passes
459 459 >>> names = []
460 460 >>> emails = [b'proper@email.com', b'commit@email.com']
461 461 >>> _ismailmaplineinvalid(names, emails)
462 462 False
463 463 '''
464 464 return not emails or not names and len(emails) < 2
465 465
466 466
467 467 def parsemailmap(mailmapcontent):
468 468 """Parses data in the .mailmap format
469 469
470 470 >>> mmdata = b"\\n".join([
471 471 ... b'# Comment',
472 472 ... b'Name <commit1@email.xx>',
473 473 ... b'<name@email.xx> <commit2@email.xx>',
474 474 ... b'Name <proper@email.xx> <commit3@email.xx>',
475 475 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
476 476 ... ])
477 477 >>> mm = parsemailmap(mmdata)
478 478 >>> for key in sorted(mm.keys()):
479 479 ... print(key)
480 480 mailmapping(email='commit1@email.xx', name=None)
481 481 mailmapping(email='commit2@email.xx', name=None)
482 482 mailmapping(email='commit3@email.xx', name=None)
483 483 mailmapping(email='commit4@email.xx', name='Commit')
484 484 >>> for val in sorted(mm.values()):
485 485 ... print(val)
486 486 mailmapping(email='commit1@email.xx', name='Name')
487 487 mailmapping(email='name@email.xx', name=None)
488 488 mailmapping(email='proper@email.xx', name='Name')
489 489 mailmapping(email='proper@email.xx', name='Name')
490 490 """
491 491 mailmap = {}
492 492
493 493 if mailmapcontent is None:
494 494 return mailmap
495 495
496 496 for line in mailmapcontent.splitlines():
497 497
498 498 # Don't bother checking the line if it is a comment or
499 499 # is an improperly formed author field
500 500 if line.lstrip().startswith(b'#'):
501 501 continue
502 502
503 503 # names, emails hold the parsed emails and names for each line
504 504 # name_builder holds the words in a persons name
505 505 names, emails = [], []
506 506 namebuilder = []
507 507
508 508 for element in line.split():
509 509 if element.startswith(b'#'):
510 510 # If we reach a comment in the mailmap file, move on
511 511 break
512 512
513 513 elif element.startswith(b'<') and element.endswith(b'>'):
514 514 # We have found an email.
515 515 # Parse it, and finalize any names from earlier
516 516 emails.append(element[1:-1]) # Slice off the "<>"
517 517
518 518 if namebuilder:
519 519 names.append(b' '.join(namebuilder))
520 520 namebuilder = []
521 521
522 522 # Break if we have found a second email, any other
523 523 # data does not fit the spec for .mailmap
524 524 if len(emails) > 1:
525 525 break
526 526
527 527 else:
528 528 # We have found another word in the committers name
529 529 namebuilder.append(element)
530 530
531 531 # Check to see if we have parsed the line into a valid form
532 532 # We require at least one email, and either at least one
533 533 # name or a second email
534 534 if _ismailmaplineinvalid(names, emails):
535 535 continue
536 536
537 537 mailmapkey = mailmapping(
538 538 email=emails[-1], name=names[-1] if len(names) == 2 else None,
539 539 )
540 540
541 541 mailmap[mailmapkey] = mailmapping(
542 542 email=emails[0], name=names[0] if names else None,
543 543 )
544 544
545 545 return mailmap
546 546
547 547
548 548 def mapname(mailmap, author):
549 549 """Returns the author field according to the mailmap cache, or
550 550 the original author field.
551 551
552 552 >>> mmdata = b"\\n".join([
553 553 ... b'# Comment',
554 554 ... b'Name <commit1@email.xx>',
555 555 ... b'<name@email.xx> <commit2@email.xx>',
556 556 ... b'Name <proper@email.xx> <commit3@email.xx>',
557 557 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
558 558 ... ])
559 559 >>> m = parsemailmap(mmdata)
560 560 >>> mapname(m, b'Commit <commit1@email.xx>')
561 561 'Name <commit1@email.xx>'
562 562 >>> mapname(m, b'Name <commit2@email.xx>')
563 563 'Name <name@email.xx>'
564 564 >>> mapname(m, b'Commit <commit3@email.xx>')
565 565 'Name <proper@email.xx>'
566 566 >>> mapname(m, b'Commit <commit4@email.xx>')
567 567 'Name <proper@email.xx>'
568 568 >>> mapname(m, b'Unknown Name <unknown@email.com>')
569 569 'Unknown Name <unknown@email.com>'
570 570 """
571 571 # If the author field coming in isn't in the correct format,
572 572 # or the mailmap is empty just return the original author field
573 573 if not isauthorwellformed(author) or not mailmap:
574 574 return author
575 575
576 576 # Turn the user name into a mailmapping
577 577 commit = mailmapping(name=person(author), email=email(author))
578 578
579 579 try:
580 580 # Try and use both the commit email and name as the key
581 581 proper = mailmap[commit]
582 582
583 583 except KeyError:
584 584 # If the lookup fails, use just the email as the key instead
585 585 # We call this commit2 as not to erase original commit fields
586 586 commit2 = mailmapping(email=commit.email)
587 587 proper = mailmap.get(commit2, mailmapping(None, None))
588 588
589 589 # Return the author field with proper values filled in
590 590 return b'%s <%s>' % (
591 591 proper.name if proper.name else commit.name,
592 592 proper.email if proper.email else commit.email,
593 593 )
594 594
595 595
596 _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')
596 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
597 597
598 598
599 599 def isauthorwellformed(author):
600 600 '''Return True if the author field is well formed
601 601 (ie "Contributor Name <contrib@email.dom>")
602 602
603 603 >>> isauthorwellformed(b'Good Author <good@author.com>')
604 604 True
605 605 >>> isauthorwellformed(b'Author <good@author.com>')
606 606 True
607 607 >>> isauthorwellformed(b'Bad Author')
608 608 False
609 609 >>> isauthorwellformed(b'Bad Author <author@author.com')
610 610 False
611 611 >>> isauthorwellformed(b'Bad Author author@author.com')
612 612 False
613 613 >>> isauthorwellformed(b'<author@author.com>')
614 614 False
615 615 >>> isauthorwellformed(b'Bad Author <author>')
616 616 False
617 617 '''
618 618 return _correctauthorformat.match(author) is not None
619 619
620 620
621 621 def ellipsis(text, maxlength=400):
622 622 """Trim string to at most maxlength (default: 400) columns in display."""
623 623 return encoding.trim(text, maxlength, ellipsis=b'...')
624 624
625 625
626 626 def escapestr(s):
627 627 if isinstance(s, memoryview):
628 628 s = bytes(s)
629 629 # call underlying function of s.encode('string_escape') directly for
630 630 # Python 3 compatibility
631 631 return codecs.escape_encode(s)[0]
632 632
633 633
634 634 def unescapestr(s):
635 635 return codecs.escape_decode(s)[0]
636 636
637 637
638 638 def forcebytestr(obj):
639 639 """Portably format an arbitrary object (e.g. exception) into a byte
640 640 string."""
641 641 try:
642 642 return pycompat.bytestr(obj)
643 643 except UnicodeEncodeError:
644 644 # non-ascii string, may be lossy
645 645 return pycompat.bytestr(encoding.strtolocal(str(obj)))
646 646
647 647
648 648 def uirepr(s):
649 649 # Avoid double backslash in Windows path repr()
650 650 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
651 651
652 652
653 653 # delay import of textwrap
654 654 def _MBTextWrapper(**kwargs):
655 655 class tw(textwrap.TextWrapper):
656 656 """
657 657 Extend TextWrapper for width-awareness.
658 658
659 659 Neither number of 'bytes' in any encoding nor 'characters' is
660 660 appropriate to calculate terminal columns for specified string.
661 661
662 662 Original TextWrapper implementation uses built-in 'len()' directly,
663 663 so overriding is needed to use width information of each characters.
664 664
665 665 In addition, characters classified into 'ambiguous' width are
666 666 treated as wide in East Asian area, but as narrow in other.
667 667
668 668 This requires use decision to determine width of such characters.
669 669 """
670 670
671 671 def _cutdown(self, ucstr, space_left):
672 672 l = 0
673 673 colwidth = encoding.ucolwidth
674 674 for i in pycompat.xrange(len(ucstr)):
675 675 l += colwidth(ucstr[i])
676 676 if space_left < l:
677 677 return (ucstr[:i], ucstr[i:])
678 678 return ucstr, b''
679 679
680 680 # overriding of base class
681 681 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
682 682 space_left = max(width - cur_len, 1)
683 683
684 684 if self.break_long_words:
685 685 cut, res = self._cutdown(reversed_chunks[-1], space_left)
686 686 cur_line.append(cut)
687 687 reversed_chunks[-1] = res
688 688 elif not cur_line:
689 689 cur_line.append(reversed_chunks.pop())
690 690
691 691 # this overriding code is imported from TextWrapper of Python 2.6
692 692 # to calculate columns of string by 'encoding.ucolwidth()'
693 693 def _wrap_chunks(self, chunks):
694 694 colwidth = encoding.ucolwidth
695 695
696 696 lines = []
697 697 if self.width <= 0:
698 698 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
699 699
700 700 # Arrange in reverse order so items can be efficiently popped
701 701 # from a stack of chucks.
702 702 chunks.reverse()
703 703
704 704 while chunks:
705 705
706 706 # Start the list of chunks that will make up the current line.
707 707 # cur_len is just the length of all the chunks in cur_line.
708 708 cur_line = []
709 709 cur_len = 0
710 710
711 711 # Figure out which static string will prefix this line.
712 712 if lines:
713 713 indent = self.subsequent_indent
714 714 else:
715 715 indent = self.initial_indent
716 716
717 717 # Maximum width for this line.
718 718 width = self.width - len(indent)
719 719
720 720 # First chunk on line is whitespace -- drop it, unless this
721 721 # is the very beginning of the text (i.e. no lines started yet).
722 722 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
723 723 del chunks[-1]
724 724
725 725 while chunks:
726 726 l = colwidth(chunks[-1])
727 727
728 728 # Can at least squeeze this chunk onto the current line.
729 729 if cur_len + l <= width:
730 730 cur_line.append(chunks.pop())
731 731 cur_len += l
732 732
733 733 # Nope, this line is full.
734 734 else:
735 735 break
736 736
737 737 # The current line is full, and the next chunk is too big to
738 738 # fit on *any* line (not just this one).
739 739 if chunks and colwidth(chunks[-1]) > width:
740 740 self._handle_long_word(chunks, cur_line, cur_len, width)
741 741
742 742 # If the last chunk on this line is all whitespace, drop it.
743 743 if (
744 744 self.drop_whitespace
745 745 and cur_line
746 746 and cur_line[-1].strip() == r''
747 747 ):
748 748 del cur_line[-1]
749 749
750 750 # Convert current line back to a string and store it in list
751 751 # of all lines (return value).
752 752 if cur_line:
753 753 lines.append(indent + ''.join(cur_line))
754 754
755 755 return lines
756 756
757 757 global _MBTextWrapper
758 758 _MBTextWrapper = tw
759 759 return tw(**kwargs)
760 760
761 761
762 762 def wrap(line, width, initindent=b'', hangindent=b''):
763 763 maxindent = max(len(hangindent), len(initindent))
764 764 if width <= maxindent:
765 765 # adjust for weird terminal size
766 766 width = max(78, maxindent + 1)
767 767 line = line.decode(
768 768 pycompat.sysstr(encoding.encoding),
769 769 pycompat.sysstr(encoding.encodingmode),
770 770 )
771 771 initindent = initindent.decode(
772 772 pycompat.sysstr(encoding.encoding),
773 773 pycompat.sysstr(encoding.encodingmode),
774 774 )
775 775 hangindent = hangindent.decode(
776 776 pycompat.sysstr(encoding.encoding),
777 777 pycompat.sysstr(encoding.encodingmode),
778 778 )
779 779 wrapper = _MBTextWrapper(
780 780 width=width, initial_indent=initindent, subsequent_indent=hangindent
781 781 )
782 782 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
783 783
784 784
785 785 _booleans = {
786 786 b'1': True,
787 787 b'yes': True,
788 788 b'true': True,
789 789 b'on': True,
790 790 b'always': True,
791 791 b'0': False,
792 792 b'no': False,
793 793 b'false': False,
794 794 b'off': False,
795 795 b'never': False,
796 796 }
797 797
798 798
799 799 def parsebool(s):
800 800 """Parse s into a boolean.
801 801
802 802 If s is not a valid boolean, returns None.
803 803 """
804 804 return _booleans.get(s.lower(), None)
805 805
806 806
807 807 def evalpythonliteral(s):
808 808 """Evaluate a string containing a Python literal expression"""
809 809 # We could backport our tokenizer hack to rewrite '' to u'' if we want
810 810 if pycompat.ispy3:
811 811 return ast.literal_eval(s.decode('latin1'))
812 812 return ast.literal_eval(s)
General Comments 0
You need to be logged in to leave comments. Login now