##// END OF EJS Templates
hg: support Mercurial 6.1 without util.url
Mads Kiilerich -
r8704:da519b97 stable
parent child Browse files
Show More
@@ -1,827 +1,832 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 vcs.backends.git.repository
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Git repository implementation.
7 7
8 8 :created_on: Apr 8, 2010
9 9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 10 """
11 11
12 12 import errno
13 13 import logging
14 14 import os
15 15 import re
16 16 import time
17 17 import urllib.error
18 18 import urllib.parse
19 19 import urllib.request
20 20 from collections import OrderedDict
21 21
22 import mercurial.util # import url as hg_url
22
23 try:
24 from mercurial.utils.urlutil import url as hg_url
25 except ImportError: # urlutil was introduced in Mercurial 5.8
26 from mercurial.util import url as hg_url
27
23 28 from dulwich.client import SubprocessGitClient
24 29 from dulwich.config import ConfigFile
25 30 from dulwich.objects import Tag
26 31 from dulwich.repo import NotGitRepository, Repo
27 32 from dulwich.server import update_server_info
28 33
29 34 from kallithea.lib.vcs import subprocessio
30 35 from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
31 36 from kallithea.lib.vcs.conf import settings
32 37 from kallithea.lib.vcs.exceptions import (BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError,
33 38 TagDoesNotExistError)
34 39 from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_str
35 40 from kallithea.lib.vcs.utils.helpers import get_urllib_request_handlers
36 41 from kallithea.lib.vcs.utils.lazy import LazyProperty
37 42 from kallithea.lib.vcs.utils.paths import abspath, get_user_home
38 43
39 44 from . import changeset, inmemory, workdir
40 45
41 46
42 47 SHA_PATTERN = re.compile(r'^([0-9a-fA-F]{12}|[0-9a-fA-F]{40})$')
43 48
44 49 log = logging.getLogger(__name__)
45 50
46 51
47 52 class GitRepository(BaseRepository):
48 53 """
49 54 Git repository backend.
50 55 """
51 56 DEFAULT_BRANCH_NAME = 'master'
52 57 scm = 'git'
53 58
54 59 def __init__(self, repo_path, create=False, src_url=None,
55 60 update_after_clone=False, bare=False, baseui=None):
56 61 baseui # unused
57 62 self.path = abspath(repo_path)
58 63 self.repo = self._get_repo(create, src_url, update_after_clone, bare)
59 64 self.bare = self.repo.bare
60 65
61 66 @property
62 67 def _config_files(self):
63 68 return [
64 69 self.bare and abspath(self.path, 'config')
65 70 or abspath(self.path, '.git', 'config'),
66 71 abspath(get_user_home(), '.gitconfig'),
67 72 ]
68 73
69 74 @property
70 75 def _repo(self):
71 76 return self.repo
72 77
73 78 @property
74 79 def head(self):
75 80 try:
76 81 return self._repo.head()
77 82 except KeyError:
78 83 return None
79 84
80 85 @property
81 86 def _empty(self):
82 87 """
83 88 Checks if repository is empty ie. without any changesets
84 89 """
85 90
86 91 try:
87 92 self.revisions[0]
88 93 except (KeyError, IndexError):
89 94 return True
90 95 return False
91 96
92 97 @LazyProperty
93 98 def revisions(self):
94 99 """
95 100 Returns list of revisions' ids, in ascending order. Being lazy
96 101 attribute allows external tools to inject shas from cache.
97 102 """
98 103 return self._get_all_revisions()
99 104
100 105 @classmethod
101 106 def _run_git_command(cls, cmd, cwd=None):
102 107 """
103 108 Runs given ``cmd`` as git command and returns output bytes in a tuple
104 109 (stdout, stderr) ... or raise RepositoryError.
105 110
106 111 :param cmd: git command to be executed
107 112 :param cwd: passed directly to subprocess
108 113 """
109 114 # need to clean fix GIT_DIR !
110 115 gitenv = dict(os.environ)
111 116 gitenv.pop('GIT_DIR', None)
112 117 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
113 118
114 119 assert isinstance(cmd, list), cmd
115 120 cmd = [settings.GIT_EXECUTABLE_PATH, '-c', 'core.quotepath=false'] + cmd
116 121 try:
117 122 p = subprocessio.SubprocessIOChunker(cmd, cwd=cwd, env=gitenv, shell=False)
118 123 except (EnvironmentError, OSError) as err:
119 124 # output from the failing process is in str(EnvironmentError)
120 125 msg = ("Couldn't run git command %s.\n"
121 126 "Subprocess failed with '%s': %s\n" %
122 127 (cmd, type(err).__name__, err)
123 128 ).strip()
124 129 log.error(msg)
125 130 raise RepositoryError(msg)
126 131
127 132 try:
128 133 stdout = b''.join(p.output)
129 134 stderr = b''.join(p.error)
130 135 finally:
131 136 p.close()
132 137 # TODO: introduce option to make commands fail if they have any stderr output?
133 138 if stderr:
134 139 log.debug('stderr from %s:\n%s', cmd, stderr)
135 140 else:
136 141 log.debug('stderr from %s: None', cmd)
137 142 return stdout, stderr
138 143
139 144 def run_git_command(self, cmd):
140 145 """
141 146 Runs given ``cmd`` as git command with cwd set to current repo.
142 147 Returns stdout as unicode str ... or raise RepositoryError.
143 148 """
144 149 cwd = None
145 150 if os.path.isdir(self.path):
146 151 cwd = self.path
147 152 stdout, _stderr = self._run_git_command(cmd, cwd=cwd)
148 153 return safe_str(stdout)
149 154
150 155 @staticmethod
151 156 def _check_url(url):
152 157 r"""
153 158 Raise URLError if url doesn't seem like a valid safe Git URL. We
154 159 only allow http, https, git, and ssh URLs.
155 160
156 161 For http and https URLs, make a connection and probe to see if it is valid.
157 162
158 163 >>> GitRepository._check_url('git://example.com/my%20fine repo')
159 164
160 165 >>> GitRepository._check_url('http://example.com:65537/repo')
161 166 Traceback (most recent call last):
162 167 ...
163 168 urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
164 169 >>> GitRepository._check_url('foo')
165 170 Traceback (most recent call last):
166 171 ...
167 172 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'foo'>
168 173 >>> GitRepository._check_url('file:///repo')
169 174 Traceback (most recent call last):
170 175 ...
171 176 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'file:///repo'>
172 177 >>> GitRepository._check_url('git+http://example.com/repo')
173 178 Traceback (most recent call last):
174 179 ...
175 180 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'git+http://example.com/repo'>
176 181 >>> GitRepository._check_url('git://example.com/%09')
177 182 Traceback (most recent call last):
178 183 ...
179 184 urllib.error.URLError: <urlopen error Invalid escape character in path: '%'>
180 185 >>> GitRepository._check_url('git://example.com/%x00')
181 186 Traceback (most recent call last):
182 187 ...
183 188 urllib.error.URLError: <urlopen error Invalid escape character in path: '%'>
184 189 >>> GitRepository._check_url(r'git://example.com/\u0009')
185 190 Traceback (most recent call last):
186 191 ...
187 192 urllib.error.URLError: <urlopen error Invalid escape character in path: '\'>
188 193 >>> GitRepository._check_url(r'git://example.com/\t')
189 194 Traceback (most recent call last):
190 195 ...
191 196 urllib.error.URLError: <urlopen error Invalid escape character in path: '\'>
192 197 >>> GitRepository._check_url('git://example.com/\t')
193 198 Traceback (most recent call last):
194 199 ...
195 200 urllib.error.URLError: <urlopen error Invalid ...>
196 201
197 202 The failure above will be one of, depending on the level of WhatWG support:
198 203 urllib.error.URLError: <urlopen error Invalid whitespace character in path: '\t'>
199 204 urllib.error.URLError: <urlopen error Invalid url: 'git://example.com/ ' normalizes to 'git://example.com/'>
200 205 """
201 206 try:
202 207 parsed_url = urllib.parse.urlparse(url)
203 208 parsed_url.port # trigger netloc parsing which might raise ValueError
204 209 except ValueError:
205 210 raise urllib.error.URLError("Error parsing URL: %r" % url)
206 211
207 212 # check first if it's not an local url
208 213 if os.path.isabs(url) and os.path.isdir(url):
209 214 return
210 215
211 216 unparsed_url = urllib.parse.urlunparse(parsed_url)
212 217 if unparsed_url != url:
213 218 raise urllib.error.URLError("Invalid url: '%s' normalizes to '%s'" % (url, unparsed_url))
214 219
215 220 if parsed_url.scheme == 'git':
216 221 # Mitigate problems elsewhere with incorrect handling of encoded paths.
217 222 # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere.
218 223 # Space is the only allowed whitespace character - directly or % encoded. No other % or \ is allowed.
219 224 for c in parsed_url.path.replace('%20', ' '):
220 225 if c in '%\\':
221 226 raise urllib.error.URLError("Invalid escape character in path: '%s'" % c)
222 227 if c.isspace() and c != ' ':
223 228 raise urllib.error.URLError("Invalid whitespace character in path: %r" % c)
224 229 return
225 230
226 231 if parsed_url.scheme not in ['http', 'https']:
227 232 raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
228 233
229 url_obj = mercurial.util.url(safe_bytes(url))
234 url_obj = hg_url(safe_bytes(url))
230 235 test_uri, handlers = get_urllib_request_handlers(url_obj)
231 236 if not test_uri.endswith(b'info/refs'):
232 237 test_uri = test_uri.rstrip(b'/') + b'/info/refs'
233 238
234 239 url_obj.passwd = b'*****'
235 240 cleaned_uri = str(url_obj)
236 241
237 242 o = urllib.request.build_opener(*handlers)
238 243 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
239 244
240 245 req = urllib.request.Request(
241 246 "%s?%s" % (
242 247 safe_str(test_uri),
243 248 urllib.parse.urlencode({"service": 'git-upload-pack'})
244 249 ))
245 250
246 251 try:
247 252 resp = o.open(req)
248 253 if resp.code != 200:
249 254 raise Exception('Return Code is not 200')
250 255 except Exception as e:
251 256 # means it cannot be cloned
252 257 raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
253 258
254 259 # now detect if it's proper git repo
255 260 gitdata = resp.read()
256 261 if b'service=git-upload-pack' not in gitdata:
257 262 raise urllib.error.URLError(
258 263 "url [%s] does not look like an git" % cleaned_uri)
259 264
260 265 def _get_repo(self, create, src_url=None, update_after_clone=False,
261 266 bare=False):
262 267 if create and os.path.exists(self.path):
263 268 raise RepositoryError("Location already exist")
264 269 if src_url and not create:
265 270 raise RepositoryError("Create should be set to True if src_url is "
266 271 "given (clone operation creates repository)")
267 272 try:
268 273 if create and src_url:
269 274 GitRepository._check_url(src_url)
270 275 self.clone(src_url, update_after_clone, bare)
271 276 return Repo(self.path)
272 277 elif create:
273 278 os.makedirs(self.path)
274 279 if bare:
275 280 return Repo.init_bare(self.path)
276 281 else:
277 282 return Repo.init(self.path)
278 283 else:
279 284 return Repo(self.path)
280 285 except (NotGitRepository, OSError) as err:
281 286 raise RepositoryError(err)
282 287
283 288 def _get_all_revisions(self):
284 289 # we must check if this repo is not empty, since later command
285 290 # fails if it is. And it's cheaper to ask than throw the subprocess
286 291 # errors
287 292 try:
288 293 self._repo.head()
289 294 except KeyError:
290 295 return []
291 296
292 297 rev_filter = settings.GIT_REV_FILTER
293 298 cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
294 299 try:
295 300 so = self.run_git_command(cmd)
296 301 except RepositoryError:
297 302 # Can be raised for empty repositories
298 303 return []
299 304 return so.splitlines()
300 305
301 306 def _get_all_revisions2(self):
302 307 # alternate implementation using dulwich
303 308 includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
304 309 if type_ != b'T']
305 310 return [c.commit.id for c in self._repo.get_walker(include=includes)]
306 311
307 312 def _get_revision(self, revision):
308 313 """
309 314 Given any revision identifier, returns a 40 char string with revision hash.
310 315 """
311 316 if self._empty:
312 317 raise EmptyRepositoryError("There are no changesets yet")
313 318
314 319 if revision in (None, '', 'tip', 'HEAD', 'head', -1):
315 320 revision = -1
316 321
317 322 if isinstance(revision, int):
318 323 try:
319 324 return self.revisions[revision]
320 325 except IndexError:
321 326 msg = "Revision %r does not exist for %s" % (revision, self.name)
322 327 raise ChangesetDoesNotExistError(msg)
323 328
324 329 if isinstance(revision, str):
325 330 if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
326 331 try:
327 332 return self.revisions[int(revision)]
328 333 except IndexError:
329 334 msg = "Revision %r does not exist for %s" % (revision, self)
330 335 raise ChangesetDoesNotExistError(msg)
331 336
332 337 # get by branch/tag name
333 338 _ref_revision = self._parsed_refs.get(safe_bytes(revision))
334 339 if _ref_revision: # and _ref_revision[1] in [b'H', b'RH', b'T']:
335 340 return ascii_str(_ref_revision[0])
336 341
337 342 if revision in self.revisions:
338 343 return revision
339 344
340 345 # maybe it's a tag ? we don't have them in self.revisions
341 346 if revision in self.tags.values():
342 347 return revision
343 348
344 349 if SHA_PATTERN.match(revision):
345 350 msg = "Revision %r does not exist for %s" % (revision, self.name)
346 351 raise ChangesetDoesNotExistError(msg)
347 352
348 353 raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
349 354
350 355 def get_ref_revision(self, ref_type, ref_name):
351 356 """
352 357 Returns ``GitChangeset`` object representing repository's
353 358 changeset at the given ``revision``.
354 359 """
355 360 return self._get_revision(ref_name)
356 361
357 362 def _get_archives(self, archive_name='tip'):
358 363
359 364 for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
360 365 yield {"type": i[0], "extension": i[1], "node": archive_name}
361 366
362 367 def _get_url(self, url):
363 368 """
364 369 Returns normalized url. If schema is not given, would fall to
365 370 filesystem (``file:///``) schema.
366 371 """
367 372 if url != 'default' and '://' not in url:
368 373 url = ':///'.join(('file', url))
369 374 return url
370 375
371 376 @LazyProperty
372 377 def name(self):
373 378 return os.path.basename(self.path)
374 379
375 380 @LazyProperty
376 381 def last_change(self):
377 382 """
378 383 Returns last change made on this repository as datetime object
379 384 """
380 385 return date_fromtimestamp(self._get_mtime(), makedate()[1])
381 386
382 387 def _get_mtime(self):
383 388 try:
384 389 return time.mktime(self.get_changeset().date.timetuple())
385 390 except RepositoryError:
386 391 idx_loc = '' if self.bare else '.git'
387 392 # fallback to filesystem
388 393 in_path = os.path.join(self.path, idx_loc, "index")
389 394 he_path = os.path.join(self.path, idx_loc, "HEAD")
390 395 if os.path.exists(in_path):
391 396 return os.stat(in_path).st_mtime
392 397 else:
393 398 return os.stat(he_path).st_mtime
394 399
395 400 @LazyProperty
396 401 def description(self):
397 402 return safe_str(self._repo.get_description() or b'unknown')
398 403
399 404 @property
400 405 def branches(self):
401 406 if not self.revisions:
402 407 return {}
403 408 _branches = [(safe_str(key), ascii_str(sha))
404 409 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
405 410 return OrderedDict(sorted(_branches, key=(lambda ctx: ctx[0]), reverse=False))
406 411
407 412 @LazyProperty
408 413 def closed_branches(self):
409 414 return {}
410 415
411 416 @LazyProperty
412 417 def tags(self):
413 418 return self._get_tags()
414 419
415 420 def _get_tags(self):
416 421 if not self.revisions:
417 422 return {}
418 423 _tags = [(safe_str(key), ascii_str(sha))
419 424 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
420 425 return OrderedDict(sorted(_tags, key=(lambda ctx: ctx[0]), reverse=True))
421 426
422 427 def tag(self, name, user, revision=None, message=None, date=None,
423 428 **kwargs):
424 429 """
425 430 Creates and returns a tag for the given ``revision``.
426 431
427 432 :param name: name for new tag
428 433 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
429 434 :param revision: changeset id for which new tag would be created
430 435 :param message: message of the tag's commit
431 436 :param date: date of tag's commit
432 437
433 438 :raises TagAlreadyExistError: if tag with same name already exists
434 439 """
435 440 if name in self.tags:
436 441 raise TagAlreadyExistError("Tag %s already exists" % name)
437 442 changeset = self.get_changeset(revision)
438 443 message = message or "Added tag %s for commit %s" % (name,
439 444 changeset.raw_id)
440 445 self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
441 446
442 447 self._parsed_refs = self._get_parsed_refs()
443 448 self.tags = self._get_tags()
444 449 return changeset
445 450
446 451 def remove_tag(self, name, user, message=None, date=None):
447 452 """
448 453 Removes tag with the given ``name``.
449 454
450 455 :param name: name of the tag to be removed
451 456 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
452 457 :param message: message of the tag's removal commit
453 458 :param date: date of tag's removal commit
454 459
455 460 :raises TagDoesNotExistError: if tag with given name does not exists
456 461 """
457 462 if name not in self.tags:
458 463 raise TagDoesNotExistError("Tag %s does not exist" % name)
459 464 # self._repo.refs is a DiskRefsContainer, and .path gives the full absolute path of '.git'
460 465 tagpath = os.path.join(safe_str(self._repo.refs.path), 'refs', 'tags', name)
461 466 try:
462 467 os.remove(tagpath)
463 468 self._parsed_refs = self._get_parsed_refs()
464 469 self.tags = self._get_tags()
465 470 except OSError as e:
466 471 raise RepositoryError(e.strerror)
467 472
468 473 @LazyProperty
469 474 def bookmarks(self):
470 475 """
471 476 Gets bookmarks for this repository
472 477 """
473 478 return {}
474 479
475 480 @LazyProperty
476 481 def _parsed_refs(self):
477 482 return self._get_parsed_refs()
478 483
479 484 def _get_parsed_refs(self):
480 485 """Return refs as a dict, like:
481 486 { b'v0.2.0': [b'599ba911aa24d2981225f3966eb659dfae9e9f30', b'T'] }
482 487 """
483 488 _repo = self._repo
484 489 refs = _repo.get_refs()
485 490 keys = [(b'refs/heads/', b'H'),
486 491 (b'refs/remotes/origin/', b'RH'),
487 492 (b'refs/tags/', b'T')]
488 493 _refs = {}
489 494 for ref, sha in refs.items():
490 495 for k, type_ in keys:
491 496 if ref.startswith(k):
492 497 _key = ref[len(k):]
493 498 if type_ == b'T':
494 499 obj = _repo.get_object(sha)
495 500 if isinstance(obj, Tag):
496 501 sha = _repo.get_object(sha).object[1]
497 502 _refs[_key] = [sha, type_]
498 503 break
499 504 return _refs
500 505
501 506 def _heads(self, reverse=False):
502 507 refs = self._repo.get_refs()
503 508 heads = {}
504 509
505 510 for key, val in refs.items():
506 511 for ref_key in [b'refs/heads/', b'refs/remotes/origin/']:
507 512 if key.startswith(ref_key):
508 513 n = key[len(ref_key):]
509 514 if n not in [b'HEAD']:
510 515 heads[n] = val
511 516
512 517 return heads if reverse else dict((y, x) for x, y in heads.items())
513 518
514 519 def get_changeset(self, revision=None):
515 520 """
516 521 Returns ``GitChangeset`` object representing commit from git repository
517 522 at the given revision or head (most recent commit) if None given.
518 523 """
519 524 if isinstance(revision, changeset.GitChangeset):
520 525 return revision
521 526 return changeset.GitChangeset(repository=self, revision=self._get_revision(revision))
522 527
523 528 def get_changesets(self, start=None, end=None, start_date=None,
524 529 end_date=None, branch_name=None, reverse=False, max_revisions=None):
525 530 """
526 531 Returns iterator of ``GitChangeset`` objects from start to end (both
527 532 are inclusive), in ascending date order (unless ``reverse`` is set).
528 533
529 534 :param start: changeset ID, as str; first returned changeset
530 535 :param end: changeset ID, as str; last returned changeset
531 536 :param start_date: if specified, changesets with commit date less than
532 537 ``start_date`` would be filtered out from returned set
533 538 :param end_date: if specified, changesets with commit date greater than
534 539 ``end_date`` would be filtered out from returned set
535 540 :param branch_name: if specified, changesets not reachable from given
536 541 branch would be filtered out from returned set
537 542 :param reverse: if ``True``, returned generator would be reversed
538 543 (meaning that returned changesets would have descending date order)
539 544
540 545 :raise BranchDoesNotExistError: If given ``branch_name`` does not
541 546 exist.
542 547 :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
543 548 ``end`` could not be found.
544 549
545 550 """
546 551 if branch_name and branch_name not in self.branches:
547 552 raise BranchDoesNotExistError("Branch '%s' not found"
548 553 % branch_name)
549 554 # actually we should check now if it's not an empty repo to not spaw
550 555 # subprocess commands
551 556 if self._empty:
552 557 raise EmptyRepositoryError("There are no changesets yet")
553 558
554 559 # %H at format means (full) commit hash, initial hashes are retrieved
555 560 # in ascending date order
556 561 cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
557 562 if max_revisions:
558 563 cmd += ['--max-count=%s' % max_revisions]
559 564 if start_date:
560 565 cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
561 566 if end_date:
562 567 cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
563 568 if branch_name:
564 569 cmd.append(branch_name)
565 570 else:
566 571 cmd.append(settings.GIT_REV_FILTER)
567 572
568 573 revs = self.run_git_command(cmd).splitlines()
569 574 start_pos = 0
570 575 end_pos = len(revs)
571 576 if start:
572 577 _start = self._get_revision(start)
573 578 try:
574 579 start_pos = revs.index(_start)
575 580 except ValueError:
576 581 pass
577 582
578 583 if end is not None:
579 584 _end = self._get_revision(end)
580 585 try:
581 586 end_pos = revs.index(_end)
582 587 except ValueError:
583 588 pass
584 589
585 590 if None not in [start, end] and start_pos > end_pos:
586 591 raise RepositoryError('start cannot be after end')
587 592
588 593 if end_pos is not None:
589 594 end_pos += 1
590 595
591 596 revs = revs[start_pos:end_pos]
592 597 if reverse:
593 598 revs.reverse()
594 599
595 600 return CollectionGenerator(self, revs)
596 601
597 602 def get_diff_changesets(self, org_rev, other_repo, other_rev):
598 603 """
599 604 Returns lists of changesets that can be merged from this repo @org_rev
600 605 to other_repo @other_rev
601 606 ... and the other way
602 607 ... and the ancestors that would be used for merge
603 608
604 609 :param org_rev: the revision we want our compare to be made
605 610 :param other_repo: repo object, most likely the fork of org_repo. It has
606 611 all changesets that we need to obtain
607 612 :param other_rev: revision we want out compare to be made on other_repo
608 613 """
609 614 org_changesets = []
610 615 ancestors = None
611 616 if org_rev == other_rev:
612 617 other_changesets = []
613 618 elif self != other_repo:
614 619 gitrepo = Repo(self.path)
615 620 SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
616 621
617 622 gitrepo_remote = Repo(other_repo.path)
618 623 SubprocessGitClient(thin_packs=False).fetch(self.path, gitrepo_remote)
619 624
620 625 revs = [
621 626 ascii_str(x.commit.id)
622 627 for x in gitrepo_remote.get_walker(include=[ascii_bytes(other_rev)],
623 628 exclude=[ascii_bytes(org_rev)])
624 629 ]
625 630 other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
626 631 if other_changesets:
627 632 ancestors = [other_changesets[0].parents[0].raw_id]
628 633 else:
629 634 # no changesets from other repo, ancestor is the other_rev
630 635 ancestors = [other_rev]
631 636
632 637 gitrepo.close()
633 638 gitrepo_remote.close()
634 639
635 640 else:
636 641 so = self.run_git_command(
637 642 ['log', '--reverse', '--pretty=format:%H',
638 643 '-s', '%s..%s' % (org_rev, other_rev)]
639 644 )
640 645 other_changesets = [self.get_changeset(cs)
641 646 for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
642 647 so = self.run_git_command(
643 648 ['merge-base', org_rev, other_rev]
644 649 )
645 650 ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
646 651
647 652 return other_changesets, org_changesets, ancestors
648 653
649 654 def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
650 655 context=3):
651 656 """
652 657 Returns (git like) *diff*, as plain bytes text. Shows changes
653 658 introduced by ``rev2`` since ``rev1``.
654 659
655 660 :param rev1: Entry point from which diff is shown. Can be
656 661 ``self.EMPTY_CHANGESET`` - in this case, patch showing all
657 662 the changes since empty state of the repository until ``rev2``
658 663 :param rev2: Until which revision changes should be shown.
659 664 :param ignore_whitespace: If set to ``True``, would not show whitespace
660 665 changes. Defaults to ``False``.
661 666 :param context: How many lines before/after changed lines should be
662 667 shown. Defaults to ``3``. Due to limitations in Git, if
663 668 value passed-in is greater than ``2**31-1``
664 669 (``2147483647``), it will be set to ``2147483647``
665 670 instead. If negative value is passed-in, it will be set to
666 671 ``0`` instead.
667 672 """
668 673
669 674 # Git internally uses a signed long int for storing context
670 675 # size (number of lines to show before and after the
671 676 # differences). This can result in integer overflow, so we
672 677 # ensure the requested context is smaller by one than the
673 678 # number that would cause the overflow. It is highly unlikely
674 679 # that a single file will contain that many lines, so this
675 680 # kind of change should not cause any realistic consequences.
676 681 overflowed_long_int = 2**31
677 682
678 683 if context >= overflowed_long_int:
679 684 context = overflowed_long_int - 1
680 685
681 686 # Negative context values make no sense, and will result in
682 687 # errors. Ensure this does not happen.
683 688 if context < 0:
684 689 context = 0
685 690
686 691 flags = ['-U%s' % context, '--full-index', '--binary', '-p', '-M', '--abbrev=40']
687 692 if ignore_whitespace:
688 693 flags.append('-w')
689 694
690 695 if hasattr(rev1, 'raw_id'):
691 696 rev1 = getattr(rev1, 'raw_id')
692 697
693 698 if hasattr(rev2, 'raw_id'):
694 699 rev2 = getattr(rev2, 'raw_id')
695 700
696 701 if rev1 == self.EMPTY_CHANGESET:
697 702 rev2 = self.get_changeset(rev2).raw_id
698 703 cmd = ['show'] + flags + [rev2]
699 704 else:
700 705 rev1 = self.get_changeset(rev1).raw_id
701 706 rev2 = self.get_changeset(rev2).raw_id
702 707 cmd = ['diff'] + flags + [rev1, rev2]
703 708
704 709 if path:
705 710 cmd += ['--', path]
706 711
707 712 stdout, stderr = self._run_git_command(cmd, cwd=self.path)
708 713 # If we used 'show' command, strip first few lines (until actual diff
709 714 # starts)
710 715 if rev1 == self.EMPTY_CHANGESET:
711 716 parts = stdout.split(b'\ndiff ', 1)
712 717 if len(parts) > 1:
713 718 stdout = b'diff ' + parts[1]
714 719 return stdout
715 720
716 721 @LazyProperty
717 722 def in_memory_changeset(self):
718 723 """
719 724 Returns ``GitInMemoryChangeset`` object for this repository.
720 725 """
721 726 return inmemory.GitInMemoryChangeset(self)
722 727
723 728 def clone(self, url, update_after_clone=True, bare=False):
724 729 """
725 730 Tries to clone changes from external location.
726 731
727 732 :param update_after_clone: If set to ``False``, git won't checkout
728 733 working directory
729 734 :param bare: If set to ``True``, repository would be cloned into
730 735 *bare* git repository (no working directory at all).
731 736 """
732 737 url = self._get_url(url)
733 738 cmd = ['clone', '-q']
734 739 if bare:
735 740 cmd.append('--bare')
736 741 elif not update_after_clone:
737 742 cmd.append('--no-checkout')
738 743 cmd += ['--', url, self.path]
739 744 # If error occurs run_git_command raises RepositoryError already
740 745 self.run_git_command(cmd)
741 746
742 747 def pull(self, url):
743 748 """
744 749 Tries to pull changes from external location.
745 750 """
746 751 url = self._get_url(url)
747 752 cmd = ['pull', '--ff-only', url]
748 753 # If error occurs run_git_command raises RepositoryError already
749 754 self.run_git_command(cmd)
750 755
751 756 def fetch(self, url):
752 757 """
753 758 Tries to pull changes from external location.
754 759 """
755 760 url = self._get_url(url)
756 761 so = self.run_git_command(['ls-remote', '-h', url])
757 762 cmd = ['fetch', url, '--']
758 763 for line in so.splitlines():
759 764 sha, ref = line.split('\t')
760 765 cmd.append('+%s:%s' % (ref, ref))
761 766 self.run_git_command(cmd)
762 767
763 768 def _update_server_info(self):
764 769 """
765 770 runs gits update-server-info command in this repo instance
766 771 """
767 772 try:
768 773 update_server_info(self._repo)
769 774 except OSError as e:
770 775 if e.errno not in [errno.ENOENT, errno.EROFS]:
771 776 raise
772 777 # Workaround for dulwich crashing on for example its own dulwich/tests/data/repos/simple_merge.git/info/refs.lock
773 778 log.error('Ignoring %s running update-server-info: %s', type(e).__name__, e)
774 779
775 780 @LazyProperty
776 781 def workdir(self):
777 782 """
778 783 Returns ``Workdir`` instance for this repository.
779 784 """
780 785 return workdir.GitWorkdir(self)
781 786
782 787 def get_config_value(self, section, name, config_file=None):
783 788 """
784 789 Returns configuration value for a given [``section``] and ``name``.
785 790
786 791 :param section: Section we want to retrieve value from
787 792 :param name: Name of configuration we want to retrieve
788 793 :param config_file: A path to file which should be used to retrieve
789 794 configuration from (might also be a list of file paths)
790 795 """
791 796 if config_file is None:
792 797 config_file = []
793 798 elif isinstance(config_file, str):
794 799 config_file = [config_file]
795 800
796 801 def gen_configs():
797 802 for path in config_file + self._config_files:
798 803 try:
799 804 yield ConfigFile.from_path(path)
800 805 except (IOError, OSError, ValueError):
801 806 continue
802 807
803 808 for config in gen_configs():
804 809 try:
805 810 value = config.get(section, name)
806 811 except KeyError:
807 812 continue
808 813 return None if value is None else safe_str(value)
809 814 return None
810 815
811 816 def get_user_name(self, config_file=None):
812 817 """
813 818 Returns user's name from global configuration file.
814 819
815 820 :param config_file: A path to file which should be used to retrieve
816 821 configuration from (might also be a list of file paths)
817 822 """
818 823 return self.get_config_value('user', 'name', config_file)
819 824
820 825 def get_user_email(self, config_file=None):
821 826 """
822 827 Returns user's email from global configuration file.
823 828
824 829 :param config_file: A path to file which should be used to retrieve
825 830 configuration from (might also be a list of file paths)
826 831 """
827 832 return self.get_config_value('user', 'email', config_file)
@@ -1,685 +1,690 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 vcs.backends.hg.repository
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Mercurial repository implementation.
7 7
8 8 :created_on: Apr 8, 2010
9 9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 10 """
11 11
12 12 import datetime
13 13 import logging
14 14 import os
15 15 import time
16 16 import urllib.error
17 17 import urllib.parse
18 18 import urllib.request
19 19 from collections import OrderedDict
20 20
21 21 import mercurial.commands
22 22 import mercurial.error
23 23 import mercurial.exchange
24 24 import mercurial.hg
25 25 import mercurial.hgweb
26 26 import mercurial.httppeer
27 27 import mercurial.localrepo
28 28 import mercurial.match
29 29 import mercurial.mdiff
30 30 import mercurial.node
31 31 import mercurial.patch
32 32 import mercurial.scmutil
33 33 import mercurial.sshpeer
34 34 import mercurial.tags
35 35 import mercurial.ui
36 36 import mercurial.unionrepo
37 import mercurial.util
37
38
39 try:
40 from mercurial.utils.urlutil import url as hg_url
41 except ImportError: # urlutil was introduced in Mercurial 5.8
42 from mercurial.util import url as hg_url
38 43
39 44 from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
40 45 from kallithea.lib.vcs.exceptions import (BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError,
41 46 TagDoesNotExistError, VCSError)
42 47 from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, author_email, author_name, date_fromtimestamp, makedate, safe_bytes, safe_str
43 48 from kallithea.lib.vcs.utils.helpers import get_urllib_request_handlers
44 49 from kallithea.lib.vcs.utils.lazy import LazyProperty
45 50 from kallithea.lib.vcs.utils.paths import abspath
46 51
47 52 from . import changeset, inmemory, workdir
48 53
49 54
50 55 log = logging.getLogger(__name__)
51 56
52 57
53 58 class MercurialRepository(BaseRepository):
54 59 """
55 60 Mercurial repository backend
56 61 """
57 62 DEFAULT_BRANCH_NAME = 'default'
58 63 scm = 'hg'
59 64
60 65 def __init__(self, repo_path, create=False, baseui=None, src_url=None,
61 66 update_after_clone=False):
62 67 """
63 68 Raises RepositoryError if repository could not be find at the given
64 69 ``repo_path``.
65 70
66 71 :param repo_path: local path of the repository
67 72 :param create=False: if set to True, would try to create repository if
68 73 it does not exist rather than raising exception
69 74 :param baseui=None: user data
70 75 :param src_url=None: would try to clone repository from given location
71 76 :param update_after_clone=False: sets update of working copy after
72 77 making a clone
73 78 """
74 79
75 80 if not isinstance(repo_path, str):
76 81 raise VCSError('Mercurial backend requires repository path to '
77 82 'be instance of <str> got %s instead' %
78 83 type(repo_path))
79 84 self.path = abspath(repo_path)
80 85 self.baseui = baseui or mercurial.ui.ui()
81 86 # We've set path and ui, now we can set _repo itself
82 87 self._repo = self._get_repo(create, src_url, update_after_clone)
83 88
84 89 @property
85 90 def _empty(self):
86 91 """
87 92 Checks if repository is empty ie. without any changesets
88 93 """
89 94 # TODO: Following raises errors when using InMemoryChangeset...
90 95 # return len(self._repo.changelog) == 0
91 96 return len(self.revisions) == 0
92 97
93 98 @LazyProperty
94 99 def revisions(self):
95 100 """
96 101 Returns list of revisions' ids, in ascending order. Being lazy
97 102 attribute allows external tools to inject shas from cache.
98 103 """
99 104 return self._get_all_revisions()
100 105
101 106 @LazyProperty
102 107 def name(self):
103 108 return os.path.basename(self.path)
104 109
105 110 @LazyProperty
106 111 def branches(self):
107 112 return self._get_branches()
108 113
109 114 @LazyProperty
110 115 def closed_branches(self):
111 116 return self._get_branches(normal=False, closed=True)
112 117
113 118 @LazyProperty
114 119 def allbranches(self):
115 120 """
116 121 List all branches, including closed branches.
117 122 """
118 123 return self._get_branches(closed=True)
119 124
120 125 def _get_branches(self, normal=True, closed=False):
121 126 """
122 127 Gets branches for this repository
123 128 Returns only not closed branches by default
124 129
125 130 :param closed: return also closed branches for mercurial
126 131 :param normal: return also normal branches
127 132 """
128 133
129 134 if self._empty:
130 135 return {}
131 136
132 137 bt = OrderedDict()
133 138 for bn, _heads, node, isclosed in sorted(self._repo.branchmap().iterbranches()):
134 139 if isclosed:
135 140 if closed:
136 141 bt[safe_str(bn)] = ascii_str(mercurial.node.hex(node))
137 142 else:
138 143 if normal:
139 144 bt[safe_str(bn)] = ascii_str(mercurial.node.hex(node))
140 145 return bt
141 146
142 147 @LazyProperty
143 148 def tags(self):
144 149 """
145 150 Gets tags for this repository
146 151 """
147 152 return self._get_tags()
148 153
149 154 def _get_tags(self):
150 155 if self._empty:
151 156 return {}
152 157
153 158 return OrderedDict(sorted(
154 159 ((safe_str(n), ascii_str(mercurial.node.hex(h))) for n, h in self._repo.tags().items()),
155 160 reverse=True,
156 161 key=lambda x: x[0], # sort by name
157 162 ))
158 163
159 164 def tag(self, name, user, revision=None, message=None, date=None,
160 165 **kwargs):
161 166 """
162 167 Creates and returns a tag for the given ``revision``.
163 168
164 169 :param name: name for new tag
165 170 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
166 171 :param revision: changeset id for which new tag would be created
167 172 :param message: message of the tag's commit
168 173 :param date: date of tag's commit
169 174
170 175 :raises TagAlreadyExistError: if tag with same name already exists
171 176 """
172 177 if name in self.tags:
173 178 raise TagAlreadyExistError("Tag %s already exists" % name)
174 179 changeset = self.get_changeset(revision)
175 180 local = kwargs.setdefault('local', False)
176 181
177 182 if message is None:
178 183 message = "Added tag %s for changeset %s" % (name,
179 184 changeset.short_id)
180 185
181 186 if date is None:
182 187 date = safe_bytes(datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))
183 188
184 189 try:
185 190 mercurial.tags.tag(self._repo, safe_bytes(name), changeset._ctx.node(), safe_bytes(message), local, safe_bytes(user), date)
186 191 except mercurial.error.Abort as e:
187 192 raise RepositoryError(e.args[0])
188 193
189 194 # Reinitialize tags
190 195 self.tags = self._get_tags()
191 196 tag_id = self.tags[name]
192 197
193 198 return self.get_changeset(revision=tag_id)
194 199
195 200 def remove_tag(self, name, user, message=None, date=None):
196 201 """
197 202 Removes tag with the given ``name``.
198 203
199 204 :param name: name of the tag to be removed
200 205 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
201 206 :param message: message of the tag's removal commit
202 207 :param date: date of tag's removal commit
203 208
204 209 :raises TagDoesNotExistError: if tag with given name does not exists
205 210 """
206 211 if name not in self.tags:
207 212 raise TagDoesNotExistError("Tag %s does not exist" % name)
208 213 if message is None:
209 214 message = "Removed tag %s" % name
210 215 if date is None:
211 216 date = safe_bytes(datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))
212 217 local = False
213 218
214 219 try:
215 220 mercurial.tags.tag(self._repo, safe_bytes(name), mercurial.node.nullid, safe_bytes(message), local, safe_bytes(user), date)
216 221 self.tags = self._get_tags()
217 222 except mercurial.error.Abort as e:
218 223 raise RepositoryError(e.args[0])
219 224
220 225 @LazyProperty
221 226 def bookmarks(self):
222 227 """
223 228 Gets bookmarks for this repository
224 229 """
225 230 return self._get_bookmarks()
226 231
227 232 def _get_bookmarks(self):
228 233 if self._empty:
229 234 return {}
230 235
231 236 return OrderedDict(sorted(
232 237 ((safe_str(n), ascii_str(mercurial.node.hex(h))) for n, h in self._repo._bookmarks.items()),
233 238 reverse=True,
234 239 key=lambda x: x[0], # sort by name
235 240 ))
236 241
237 242 def _get_all_revisions(self):
238 243 return [ascii_str(self._repo[x].hex()) for x in self._repo.filtered(b'visible').changelog.revs()]
239 244
240 245 def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
241 246 context=3):
242 247 """
243 248 Returns (git like) *diff*, as plain text. Shows changes introduced by
244 249 ``rev2`` since ``rev1``.
245 250
246 251 :param rev1: Entry point from which diff is shown. Can be
247 252 ``self.EMPTY_CHANGESET`` - in this case, patch showing all
248 253 the changes since empty state of the repository until ``rev2``
249 254 :param rev2: Until which revision changes should be shown.
250 255 :param ignore_whitespace: If set to ``True``, would not show whitespace
251 256 changes. Defaults to ``False``.
252 257 :param context: How many lines before/after changed lines should be
253 258 shown. Defaults to ``3``. If negative value is passed-in, it will be
254 259 set to ``0`` instead.
255 260 """
256 261
257 262 # Negative context values make no sense, and will result in
258 263 # errors. Ensure this does not happen.
259 264 if context < 0:
260 265 context = 0
261 266
262 267 if hasattr(rev1, 'raw_id'):
263 268 rev1 = getattr(rev1, 'raw_id')
264 269
265 270 if hasattr(rev2, 'raw_id'):
266 271 rev2 = getattr(rev2, 'raw_id')
267 272
268 273 # Check if given revisions are present at repository (may raise
269 274 # ChangesetDoesNotExistError)
270 275 if rev1 != self.EMPTY_CHANGESET:
271 276 self.get_changeset(rev1)
272 277 self.get_changeset(rev2)
273 278 if path:
274 279 file_filter = mercurial.match.exact([safe_bytes(path)])
275 280 else:
276 281 file_filter = None
277 282
278 283 return b''.join(mercurial.patch.diff(self._repo, rev1, rev2, match=file_filter,
279 284 opts=mercurial.mdiff.diffopts(git=True,
280 285 showfunc=True,
281 286 ignorews=ignore_whitespace,
282 287 context=context)))
283 288
284 289 @staticmethod
285 290 def _check_url(url, repoui=None):
286 291 r"""
287 292 Raise URLError if url doesn't seem like a valid safe Hg URL. We
288 293 only allow http, https, ssh, and hg-git URLs.
289 294
290 295 For http, https and git URLs, make a connection and probe to see if it is valid.
291 296
292 297 On failures it'll raise urllib2.HTTPError, exception is also thrown
293 298 when the return code is non 200
294 299
295 300 >>> MercurialRepository._check_url('file:///repo')
296 301
297 302 >>> MercurialRepository._check_url('http://example.com:65537/repo')
298 303 Traceback (most recent call last):
299 304 ...
300 305 urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
301 306 >>> MercurialRepository._check_url('foo')
302 307 Traceback (most recent call last):
303 308 ...
304 309 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'foo'>
305 310 >>> MercurialRepository._check_url('git+ssh://example.com/my%20fine repo')
306 311 Traceback (most recent call last):
307 312 ...
308 313 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'git+ssh://example.com/my%20fine repo'>
309 314 >>> MercurialRepository._check_url('svn+http://example.com/repo')
310 315 Traceback (most recent call last):
311 316 ...
312 317 urllib.error.URLError: <urlopen error Unsupported protocol in URL 'svn+http://example.com/repo'>
313 318 """
314 319 try:
315 320 parsed_url = urllib.parse.urlparse(url)
316 321 parsed_url.port # trigger netloc parsing which might raise ValueError
317 322 except ValueError:
318 323 raise urllib.error.URLError("Error parsing URL: %r" % url)
319 324
320 325 # check first if it's not an local url
321 326 if os.path.isabs(url) and os.path.isdir(url) or parsed_url.scheme == 'file':
322 327 # When creating repos, _get_url will use file protocol for local paths
323 328 return
324 329
325 330 if parsed_url.scheme not in ['http', 'https', 'ssh', 'git+http', 'git+https']:
326 331 raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
327 332
328 333 url = safe_bytes(url)
329 334
330 335 if parsed_url.scheme == 'ssh':
331 336 # in case of invalid uri or authentication issues, sshpeer will
332 337 # throw an exception.
333 338 mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
334 339 return
335 340
336 341 if '+' in parsed_url.scheme: # strip 'git+' for hg-git URLs
337 342 url = url.split(b'+', 1)[1]
338 343
339 url_obj = mercurial.util.url(url)
344 url_obj = hg_url(url)
340 345 test_uri, handlers = get_urllib_request_handlers(url_obj)
341 346
342 347 url_obj.passwd = b'*****'
343 348 cleaned_uri = str(url_obj)
344 349
345 350 o = urllib.request.build_opener(*handlers)
346 351 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
347 352 ('Accept', 'application/mercurial-0.1')]
348 353
349 354 req = urllib.request.Request(
350 355 "%s?%s" % (
351 356 safe_str(test_uri),
352 357 urllib.parse.urlencode({
353 358 'cmd': 'between',
354 359 'pairs': "%s-%s" % ('0' * 40, '0' * 40),
355 360 })
356 361 ))
357 362
358 363 try:
359 364 resp = o.open(req)
360 365 if resp.code != 200:
361 366 raise Exception('Return Code is not 200')
362 367 except Exception as e:
363 368 # means it cannot be cloned
364 369 raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
365 370
366 371 if parsed_url.scheme in ['http', 'https']: # skip git+http://... etc
367 372 # now check if it's a proper hg repo
368 373 try:
369 374 mercurial.httppeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
370 375 except Exception as e:
371 376 raise urllib.error.URLError(
372 377 "url [%s] does not look like an hg repo org_exc: %s"
373 378 % (cleaned_uri, e))
374 379
375 380 def _get_repo(self, create, src_url=None, update_after_clone=False):
376 381 """
377 382 Function will check for mercurial repository in given path and return
378 383 a localrepo object. If there is no repository in that path it will
379 384 raise an exception unless ``create`` parameter is set to True - in
380 385 that case repository would be created and returned.
381 386 If ``src_url`` is given, would try to clone repository from the
382 387 location at given clone_point. Additionally it'll make update to
383 388 working copy accordingly to ``update_after_clone`` flag
384 389 """
385 390 try:
386 391 if src_url:
387 392 url = self._get_url(src_url)
388 393 opts = {}
389 394 if not update_after_clone:
390 395 opts.update({'noupdate': True})
391 396 MercurialRepository._check_url(url, self.baseui)
392 397 mercurial.commands.clone(self.baseui, safe_bytes(url), safe_bytes(self.path), **opts)
393 398
394 399 # Don't try to create if we've already cloned repo
395 400 create = False
396 401 return mercurial.localrepo.instance(self.baseui, safe_bytes(self.path), create=create)
397 402 except (mercurial.error.Abort, mercurial.error.RepoError) as err:
398 403 if create:
399 404 msg = "Cannot create repository at %s. Original error was %s" \
400 405 % (self.name, err)
401 406 else:
402 407 msg = "Not valid repository at %s. Original error was %s" \
403 408 % (self.name, err)
404 409 raise RepositoryError(msg)
405 410
406 411 @LazyProperty
407 412 def in_memory_changeset(self):
408 413 return inmemory.MercurialInMemoryChangeset(self)
409 414
410 415 @LazyProperty
411 416 def description(self):
412 417 _desc = self._repo.ui.config(b'web', b'description', None, untrusted=True)
413 418 return safe_str(_desc or b'unknown')
414 419
415 420 @LazyProperty
416 421 def last_change(self):
417 422 """
418 423 Returns last change made on this repository as datetime object
419 424 """
420 425 return date_fromtimestamp(self._get_mtime(), makedate()[1])
421 426
422 427 def _get_mtime(self):
423 428 try:
424 429 return time.mktime(self.get_changeset().date.timetuple())
425 430 except RepositoryError:
426 431 # fallback to filesystem
427 432 cl_path = os.path.join(self.path, '.hg', "00changelog.i")
428 433 st_path = os.path.join(self.path, '.hg', "store")
429 434 if os.path.exists(cl_path):
430 435 return os.stat(cl_path).st_mtime
431 436 else:
432 437 return os.stat(st_path).st_mtime
433 438
434 439 def _get_revision(self, revision):
435 440 """
436 441 Given any revision identifier, returns a 40 char string with revision hash.
437 442
438 443 :param revision: str or int or None
439 444 """
440 445 if self._empty:
441 446 raise EmptyRepositoryError("There are no changesets yet")
442 447
443 448 if revision in [-1, None]:
444 449 revision = b'tip'
445 450 elif isinstance(revision, str):
446 451 revision = safe_bytes(revision)
447 452
448 453 try:
449 454 if isinstance(revision, int):
450 455 return ascii_str(self._repo[revision].hex())
451 456 return ascii_str(mercurial.scmutil.revsymbol(self._repo, revision).hex())
452 457 except (IndexError, ValueError, mercurial.error.RepoLookupError, TypeError):
453 458 msg = "Revision %r does not exist for %s" % (safe_str(revision), self.name)
454 459 raise ChangesetDoesNotExistError(msg)
455 460 except (LookupError, ):
456 461 msg = "Ambiguous identifier `%s` for %s" % (safe_str(revision), self.name)
457 462 raise ChangesetDoesNotExistError(msg)
458 463
459 464 def get_ref_revision(self, ref_type, ref_name):
460 465 """
461 466 Returns revision number for the given reference.
462 467 """
463 468 if ref_type == 'rev' and not ref_name.strip('0'):
464 469 return self.EMPTY_CHANGESET
465 470 # lookup up the exact node id
466 471 _revset_predicates = {
467 472 'branch': 'branch',
468 473 'book': 'bookmark',
469 474 'tag': 'tag',
470 475 'rev': 'id',
471 476 }
472 477 # avoid expensive branch(x) iteration over whole repo
473 478 rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
474 479 try:
475 480 revs = self._repo.revs(rev_spec, ref_name, ref_name)
476 481 except LookupError:
477 482 msg = "Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name)
478 483 raise ChangesetDoesNotExistError(msg)
479 484 except mercurial.error.RepoLookupError:
480 485 msg = "Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name)
481 486 raise ChangesetDoesNotExistError(msg)
482 487 if revs:
483 488 revision = revs.last()
484 489 else:
485 490 # TODO: just report 'not found'?
486 491 revision = ref_name
487 492
488 493 return self._get_revision(revision)
489 494
490 495 def _get_archives(self, archive_name='tip'):
491 496 allowed = self.baseui.configlist(b"web", b"allow_archive",
492 497 untrusted=True)
493 498 for name, ext in [(b'zip', '.zip'), (b'gz', '.tar.gz'), (b'bz2', '.tar.bz2')]:
494 499 if name in allowed or self._repo.ui.configbool(b"web",
495 500 b"allow" + name,
496 501 untrusted=True):
497 502 yield {"type": safe_str(name), "extension": ext, "node": archive_name}
498 503
499 504 def _get_url(self, url):
500 505 """
501 506 Returns normalized url. If schema is not given, fall back to
502 507 filesystem (``file:///``) schema.
503 508 """
504 509 if url != 'default' and '://' not in url:
505 510 url = "file:" + urllib.request.pathname2url(url)
506 511 return url
507 512
508 513 def get_changeset(self, revision=None):
509 514 """
510 515 Returns ``MercurialChangeset`` object representing repository's
511 516 changeset at the given ``revision``.
512 517 """
513 518 return changeset.MercurialChangeset(repository=self, revision=self._get_revision(revision))
514 519
515 520 def get_changesets(self, start=None, end=None, start_date=None,
516 521 end_date=None, branch_name=None, reverse=False, max_revisions=None):
517 522 """
518 523 Returns iterator of ``MercurialChangeset`` objects from start to end
519 524 (both are inclusive)
520 525
521 526 :param start: None, str, int or mercurial lookup format
522 527 :param end: None, str, int or mercurial lookup format
523 528 :param start_date:
524 529 :param end_date:
525 530 :param branch_name:
526 531 :param reversed: return changesets in reversed order
527 532 """
528 533 start_raw_id = self._get_revision(start)
529 534 start_pos = None if start is None else self.revisions.index(start_raw_id)
530 535 end_raw_id = self._get_revision(end)
531 536 end_pos = None if end is None else self.revisions.index(end_raw_id)
532 537
533 538 if start_pos is not None and end_pos is not None and start_pos > end_pos:
534 539 raise RepositoryError("Start revision '%s' cannot be "
535 540 "after end revision '%s'" % (start, end))
536 541
537 542 if branch_name and branch_name not in self.allbranches:
538 543 msg = "Branch %r not found in %s" % (branch_name, self.name)
539 544 raise BranchDoesNotExistError(msg)
540 545 if end_pos is not None:
541 546 end_pos += 1
542 547 # filter branches
543 548 filter_ = []
544 549 if branch_name:
545 550 filter_.append(b'branch("%s")' % safe_bytes(branch_name))
546 551 if start_date:
547 552 filter_.append(b'date(">%s")' % safe_bytes(str(start_date)))
548 553 if end_date:
549 554 filter_.append(b'date("<%s")' % safe_bytes(str(end_date)))
550 555 if filter_ or max_revisions:
551 556 if filter_:
552 557 revspec = b' and '.join(filter_)
553 558 else:
554 559 revspec = b'all()'
555 560 if max_revisions:
556 561 revspec = b'limit(%s, %d)' % (revspec, max_revisions)
557 562 revisions = mercurial.scmutil.revrange(self._repo, [revspec])
558 563 else:
559 564 revisions = self.revisions
560 565
561 566 # this is very much a hack to turn this into a list; a better solution
562 567 # would be to get rid of this function entirely and use revsets
563 568 revs = list(revisions)[start_pos:end_pos]
564 569 if reverse:
565 570 revs.reverse()
566 571
567 572 return CollectionGenerator(self, revs)
568 573
569 574 def get_diff_changesets(self, org_rev, other_repo, other_rev):
570 575 """
571 576 Returns lists of changesets that can be merged from this repo @org_rev
572 577 to other_repo @other_rev
573 578 ... and the other way
574 579 ... and the ancestors that would be used for merge
575 580
576 581 :param org_rev: the revision we want our compare to be made
577 582 :param other_repo: repo object, most likely the fork of org_repo. It has
578 583 all changesets that we need to obtain
579 584 :param other_rev: revision we want out compare to be made on other_repo
580 585 """
581 586 ancestors = None
582 587 if org_rev == other_rev:
583 588 org_changesets = []
584 589 other_changesets = []
585 590
586 591 else:
587 592 # case two independent repos
588 593 if self != other_repo:
589 594 hgrepo = mercurial.unionrepo.makeunionrepository(other_repo.baseui,
590 595 safe_bytes(other_repo.path),
591 596 safe_bytes(self.path))
592 597 # all ancestors of other_rev will be in other_repo and
593 598 # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
594 599
595 600 # no remote compare do it on the same repository
596 601 else:
597 602 hgrepo = other_repo._repo
598 603
599 604 ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
600 605 hgrepo.revs(b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev))]
601 606 if ancestors:
602 607 log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev)
603 608 else:
604 609 log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev)
605 610 ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
606 611 hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev))] # FIXME: expensive!
607 612
608 613 other_changesets = [
609 614 other_repo.get_changeset(rev)
610 615 for rev in hgrepo.revs(
611 616 b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
612 617 ascii_bytes(other_rev), ascii_bytes(org_rev), ascii_bytes(org_rev))
613 618 ]
614 619 org_changesets = [
615 620 self.get_changeset(ascii_str(hgrepo[rev].hex()))
616 621 for rev in hgrepo.revs(
617 622 b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
618 623 ascii_bytes(org_rev), ascii_bytes(other_rev), ascii_bytes(other_rev))
619 624 ]
620 625
621 626 return other_changesets, org_changesets, ancestors
622 627
623 628 def pull(self, url):
624 629 """
625 630 Tries to pull changes from external location.
626 631 """
627 632 other = mercurial.hg.peer(self._repo, {}, safe_bytes(self._get_url(url)))
628 633 try:
629 634 mercurial.exchange.pull(self._repo, other, heads=None, force=None)
630 635 except mercurial.error.Abort as err:
631 636 # Propagate error but with vcs's type
632 637 raise RepositoryError(str(err))
633 638
634 639 @LazyProperty
635 640 def workdir(self):
636 641 """
637 642 Returns ``Workdir`` instance for this repository.
638 643 """
639 644 return workdir.MercurialWorkdir(self)
640 645
641 646 def get_config_value(self, section, name=None, config_file=None):
642 647 """
643 648 Returns configuration value for a given [``section``] and ``name``.
644 649
645 650 :param section: Section we want to retrieve value from
646 651 :param name: Name of configuration we want to retrieve
647 652 :param config_file: A path to file which should be used to retrieve
648 653 configuration from (might also be a list of file paths)
649 654 """
650 655 if config_file is None:
651 656 config_file = []
652 657 elif isinstance(config_file, str):
653 658 config_file = [config_file]
654 659
655 660 config = self._repo.ui
656 661 if config_file:
657 662 config = mercurial.ui.ui()
658 663 for path in config_file:
659 664 config.readconfig(safe_bytes(path))
660 665 value = config.config(safe_bytes(section), safe_bytes(name))
661 666 return value if value is None else safe_str(value)
662 667
663 668 def get_user_name(self, config_file=None):
664 669 """
665 670 Returns user's name from global configuration file.
666 671
667 672 :param config_file: A path to file which should be used to retrieve
668 673 configuration from (might also be a list of file paths)
669 674 """
670 675 username = self.get_config_value('ui', 'username', config_file=config_file)
671 676 if username:
672 677 return author_name(username)
673 678 return None
674 679
675 680 def get_user_email(self, config_file=None):
676 681 """
677 682 Returns user's email from global configuration file.
678 683
679 684 :param config_file: A path to file which should be used to retrieve
680 685 configuration from (might also be a list of file paths)
681 686 """
682 687 username = self.get_config_value('ui', 'username', config_file=config_file)
683 688 if username:
684 689 return author_email(username)
685 690 return None
@@ -1,265 +1,265 b''
1 1 """
2 2 Utilities aimed to help achieve mostly basic tasks.
3 3 """
4 4
5 5 import datetime
6 6 import logging
7 7 import os
8 8 import re
9 9 import time
10 10 import urllib.request
11 11
12 12 import mercurial.url
13 13 from pygments import highlight
14 14 from pygments.formatters import TerminalFormatter
15 15 from pygments.lexers import ClassNotFound, guess_lexer_for_filename
16 16
17 17 from kallithea.lib.vcs import backends
18 18 from kallithea.lib.vcs.exceptions import RepositoryError, VCSError
19 19 from kallithea.lib.vcs.utils import safe_str
20 20 from kallithea.lib.vcs.utils.paths import abspath
21 21
22 22
23 23 ALIASES = ['hg', 'git']
24 24
25 25
26 26 def get_scm(path, search_up=False, explicit_alias=None):
27 27 """
28 28 Returns one of alias from ``ALIASES`` (in order of precedence same as
29 29 shortcuts given in ``ALIASES``) and top working dir path for the given
30 30 argument. If no scm-specific directory is found or more than one scm is
31 31 found at that directory, ``VCSError`` is raised.
32 32
33 33 :param search_up: if set to ``True``, this function would try to
34 34 move up to parent directory every time no scm is recognized for the
35 35 currently checked path. Default: ``False``.
36 36 :param explicit_alias: can be one of available backend aliases, when given
37 37 it will return given explicit alias in repositories under more than one
38 38 version control, if explicit_alias is different than found it will raise
39 39 VCSError
40 40 """
41 41 if not os.path.isdir(path):
42 42 raise VCSError("Given path %s is not a directory" % path)
43 43
44 44 while True:
45 45 found_scms = [(scm, path) for scm in get_scms_for_path(path)]
46 46 if found_scms or not search_up:
47 47 break
48 48 newpath = abspath(path, '..')
49 49 if newpath == path:
50 50 break
51 51 path = newpath
52 52
53 53 if len(found_scms) > 1:
54 54 for scm in found_scms:
55 55 if scm[0] == explicit_alias:
56 56 return scm
57 57 raise VCSError('More than one [%s] scm found at given path %s'
58 58 % (', '.join((x[0] for x in found_scms)), path))
59 59
60 60 if len(found_scms) == 0:
61 61 raise VCSError('No scm found at given path %s' % path)
62 62
63 63 return found_scms[0]
64 64
65 65
66 66 def get_scms_for_path(path):
67 67 """
68 68 Returns all scm's found at the given path. If no scm is recognized
69 69 - empty list is returned.
70 70
71 71 :param path: path to directory which should be checked. May be callable.
72 72
73 73 :raises VCSError: if given ``path`` is not a directory
74 74 """
75 75 if hasattr(path, '__call__'):
76 76 path = path()
77 77 if not os.path.isdir(path):
78 78 raise VCSError("Given path %r is not a directory" % path)
79 79
80 80 result = []
81 81 for key in ALIASES:
82 82 # find .hg / .git
83 83 dirname = os.path.join(path, '.' + key)
84 84 if os.path.isdir(dirname):
85 85 result.append(key)
86 86 continue
87 87 # find rm__.hg / rm__.git too - left overs from old method for deleting
88 88 dirname = os.path.join(path, 'rm__.' + key)
89 89 if os.path.isdir(dirname):
90 90 return result
91 91 # We still need to check if it's not bare repository as
92 92 # bare repos don't have working directories
93 93 try:
94 94 backends.get_backend(key)(path)
95 95 result.append(key)
96 96 continue
97 97 except RepositoryError:
98 98 # Wrong backend
99 99 pass
100 100 except VCSError:
101 101 # No backend at all
102 102 pass
103 103 return result
104 104
105 105
106 106 def get_scm_size(alias, root_path):
107 107 if not alias.startswith('.'):
108 108 alias += '.'
109 109
110 110 size_scm, size_root = 0, 0
111 111 for path, dirs, files in os.walk(root_path):
112 112 if path.find(alias) != -1:
113 113 for f in files:
114 114 try:
115 115 size_scm += os.path.getsize(os.path.join(path, f))
116 116 except OSError:
117 117 pass
118 118 else:
119 119 for f in files:
120 120 try:
121 121 size_root += os.path.getsize(os.path.join(path, f))
122 122 except OSError:
123 123 pass
124 124
125 125 return size_scm, size_root
126 126
127 127
128 128 def get_highlighted_code(name, code, type='terminal'):
129 129 """
130 130 If pygments are available on the system
131 131 then returned output is colored. Otherwise
132 132 unchanged content is returned.
133 133 """
134 134 try:
135 135 lexer = guess_lexer_for_filename(name, code)
136 136 formatter = TerminalFormatter()
137 137 content = highlight(code, lexer, formatter)
138 138 except ClassNotFound:
139 139 logging.debug("Couldn't guess Lexer, will not use pygments.")
140 140 content = code
141 141 return content
142 142
143 143
144 144 def parse_changesets(text):
145 145 """
146 146 Returns dictionary with *start*, *main* and *end* ids.
147 147
148 148 Examples::
149 149
150 150 >>> parse_changesets('aaabbb')
151 151 {'start': None, 'main': 'aaabbb', 'end': None}
152 152 >>> parse_changesets('aaabbb..cccddd')
153 153 {'start': 'aaabbb', 'end': 'cccddd', 'main': None}
154 154
155 155 """
156 156 text = text.strip()
157 157 CID_RE = r'[a-zA-Z0-9]+'
158 158 if '..' not in text:
159 159 m = re.match(r'^(?P<cid>%s)$' % CID_RE, text)
160 160 if m:
161 161 return {
162 162 'start': None,
163 163 'main': text,
164 164 'end': None,
165 165 }
166 166 else:
167 167 RE = r'^(?P<start>%s)?\.{2,3}(?P<end>%s)?$' % (CID_RE, CID_RE)
168 168 m = re.match(RE, text)
169 169 if m:
170 170 result = m.groupdict()
171 171 result['main'] = None
172 172 return result
173 173 raise ValueError("IDs not recognized")
174 174
175 175
176 176 def parse_datetime(text):
177 177 """
178 178 Parses given text and returns ``datetime.datetime`` instance or raises
179 179 ``ValueError``.
180 180
181 181 :param text: string of desired date/datetime or something more verbose,
182 182 like *yesterday*, *2weeks 3days*, etc.
183 183 """
184 184
185 185 text = text.strip().lower()
186 186
187 187 INPUT_FORMATS = (
188 188 '%Y-%m-%d %H:%M:%S',
189 189 '%Y-%m-%d %H:%M',
190 190 '%Y-%m-%d',
191 191 '%m/%d/%Y %H:%M:%S',
192 192 '%m/%d/%Y %H:%M',
193 193 '%m/%d/%Y',
194 194 '%m/%d/%y %H:%M:%S',
195 195 '%m/%d/%y %H:%M',
196 196 '%m/%d/%y',
197 197 )
198 198 for format in INPUT_FORMATS:
199 199 try:
200 200 return datetime.datetime(*time.strptime(text, format)[:6])
201 201 except ValueError:
202 202 pass
203 203
204 204 # Try descriptive texts
205 205 if text == 'tomorrow':
206 206 future = datetime.datetime.now() + datetime.timedelta(days=1)
207 207 args = future.timetuple()[:3] + (23, 59, 59)
208 208 return datetime.datetime(*args)
209 209 elif text == 'today':
210 210 return datetime.datetime(*datetime.datetime.today().timetuple()[:3])
211 211 elif text == 'now':
212 212 return datetime.datetime.now()
213 213 elif text == 'yesterday':
214 214 past = datetime.datetime.now() - datetime.timedelta(days=1)
215 215 return datetime.datetime(*past.timetuple()[:3])
216 216 else:
217 217 days = 0
218 218 matched = re.match(
219 219 r'^((?P<weeks>\d+) ?w(eeks?)?)? ?((?P<days>\d+) ?d(ays?)?)?$', text)
220 220 if matched:
221 221 groupdict = matched.groupdict()
222 222 if groupdict['days']:
223 223 days += int(matched.groupdict()['days'])
224 224 if groupdict['weeks']:
225 225 days += int(matched.groupdict()['weeks']) * 7
226 226 past = datetime.datetime.now() - datetime.timedelta(days=days)
227 227 return datetime.datetime(*past.timetuple()[:3])
228 228
229 229 raise ValueError('Wrong date: "%s"' % text)
230 230
231 231
232 232 def get_dict_for_attrs(obj, attrs):
233 233 """
234 234 Returns dictionary for each attribute from given ``obj``.
235 235 """
236 236 data = {}
237 237 for attr in attrs:
238 238 data[attr] = getattr(obj, attr)
239 239 return data
240 240
241 241 def get_urllib_request_handlers(url_obj):
242 242 handlers = []
243 243 test_uri, authinfo = url_obj.authinfo()
244 244
245 245 if authinfo:
246 246 # authinfo is a tuple (realm, uris, user, password) where 'uris' itself
247 247 # is a tuple of URIs.
248 # If url_obj is obtained via mercurial.util.url, the obtained authinfo
248 # If url_obj is obtained via mercurial urlutil, the obtained authinfo
249 249 # values will be bytes, e.g.
250 250 # (None, (b'http://127.0.0.1/repo', b'127.0.0.1'), b'user', b'pass')
251 251 # However, urllib expects strings, not bytes, so we must convert them.
252 252
253 253 # create a password manager
254 254 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
255 255 passmgr.add_password(
256 256 safe_str(authinfo[0]) if authinfo[0] else None, # realm
257 257 tuple(safe_str(x) for x in authinfo[1]), # uris
258 258 safe_str(authinfo[2]), # user
259 259 safe_str(authinfo[3]), # password
260 260 )
261 261
262 262 handlers.extend((mercurial.url.httpbasicauthhandler(passmgr),
263 263 mercurial.url.httpdigestauthhandler(passmgr)))
264 264
265 265 return test_uri, handlers
General Comments 0
You need to be logged in to leave comments. Login now