rhodecode-enterprise-ce Commit - r474:50981b14

vcs: Implemented a gevent compatible Curl class, part of ...

Martin Bornhold -

r474:50981b14 default

parent child

rhodecode/lib/vcs/geventcurl.py

0 created 644 +224 0

			@@ -0,0 +1,224 b''
		1	# -- coding: utf-8 --
		2
		3	# Copyright (C) 2016-2016 RhodeCode GmbH
		4	#
		5	# This program is free software: you can redistribute it and/or modify
		6	# it under the terms of the GNU Affero General Public License, version 3
		7	# (only), as published by the Free Software Foundation.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU Affero General Public License
		15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		16	#
		17	# This program is dual-licensed. If you wish to learn more about the
		18	# RhodeCode Enterprise Edition, including its added features, Support services,
		19	# and proprietary license terms, please see https://rhodecode.com/licenses/
		20
		21	"""
		22	This serves as a drop in replacement for pycurl. It implements the pycurl Curl
		23	class in a way that is compatible with gevent.
		24	"""
		25
		26
		27	import logging
		28	import gevent
		29	import pycurl
		30
		31	# Import everything from pycurl.
		32	# This allows us to use this module as a drop in replacement of pycurl.
		33	from pycurl import * # noqa
		34
		35	from gevent import core
		36	from gevent.hub import Waiter
		37
		38
		39	log = logging.getLogger(__name__)
		40
		41
		42	class GeventCurlMulti(object):
		43	"""
		44	Wrapper around pycurl.CurlMulti that integrates it into gevent's event
		45	loop.
		46	"""
		47
		48	def __init__(self, loop=None):
		49	self._watchers = {}
		50	self._timeout = None
		51	self.loop = loop or gevent.get_hub().loop
		52
		53	# Setup curl's multi instance.
		54	self._curl_multi = pycurl.CurlMulti()
		55	self.setopt(pycurl.M_TIMERFUNCTION, self._set_timeout)
		56	self.setopt(pycurl.M_SOCKETFUNCTION, self._handle_socket)
		57
		58	def __getattr__(self, item):
		59	"""
		60	The pycurl.CurlMulti class is final and we cannot subclass it.
		61	Therefore we are wrapping it and forward everything to it here.
		62	"""
		63	return getattr(self._curl_multi, item)
		64
		65	def add_handle(self, curl):
		66	"""
		67	Add handle variant that also takes care about the initial invocation of
		68	socket action method. This is done by setting an immediate timeout.
		69	"""
		70	result = self._curl_multi.add_handle(curl)
		71	self._set_timeout(0)
		72	return result
		73
		74	def _handle_socket(self, event, fd, multi, data):
		75	"""
		76	Called by libcurl when it wants to change the file descriptors it cares
		77	about.
		78	"""
		79	event_map = {
		80	pycurl.POLL_NONE: core.NONE,
		81	pycurl.POLL_IN: core.READ,
		82	pycurl.POLL_OUT: core.WRITE,
		83	pycurl.POLL_INOUT: core.READ \| core.WRITE
		84	}
		85
		86	if event == pycurl.POLL_REMOVE:
		87	watcher = self._watchers.pop(fd, None)
		88	if watcher is not None:
		89	watcher.stop()
		90	else:
		91	gloop_event = event_map[event]
		92	watcher = self._watchers.get(fd)
		93	if watcher is None:
		94	watcher = self.loop.io(fd, gloop_event)
		95	watcher.start(self._handle_events, fd, pass_events=True)
		96	self._watchers[fd] = watcher
		97	else:
		98	if watcher.events != gloop_event:
		99	watcher.stop()
		100	watcher.events = gloop_event
		101	watcher.start(self._handle_events, fd, pass_events=True)
		102
		103	def _set_timeout(self, msecs):
		104	"""
		105	Called by libcurl to schedule a timeout.
		106	"""
		107	if self._timeout is not None:
		108	self._timeout.stop()
		109	self._timeout = self.loop.timer(msecs/1000.0)
		110	self._timeout.start(self._handle_timeout)
		111
		112	def _handle_events(self, events, fd):
		113	action = 0
		114	if events & core.READ:
		115	action \|= pycurl.CSELECT_IN
		116	if events & core.WRITE:
		117	action \|= pycurl.CSELECT_OUT
		118	while True:
		119	try:
		120	ret, num_handles = self._curl_multi.socket_action(fd, action)
		121	except pycurl.error, e:
		122	ret = e.args[0]
		123	if ret != pycurl.E_CALL_MULTI_PERFORM:
		124	break
		125	self._finish_pending_requests()
		126
		127	def _handle_timeout(self):
		128	"""
		129	Called by IOLoop when the requested timeout has passed.
		130	"""
		131	if self._timeout is not None:
		132	self._timeout.stop()
		133	self._timeout = None
		134	while True:
		135	try:
		136	ret, num_handles = self._curl_multi.socket_action(
		137	pycurl.SOCKET_TIMEOUT, 0)
		138	except pycurl.error, e:
		139	ret = e.args[0]
		140	if ret != pycurl.E_CALL_MULTI_PERFORM:
		141	break
		142	self._finish_pending_requests()
		143
		144	# In theory, we shouldn't have to do this because curl will call
		145	# _set_timeout whenever the timeout changes. However, sometimes after
		146	# _handle_timeout we will need to reschedule immediately even though
		147	# nothing has changed from curl's perspective. This is because when
		148	# socket_action is called with SOCKET_TIMEOUT, libcurl decides
		149	# internally which timeouts need to be processed by using a monotonic
		150	# clock (where available) while tornado uses python's time.time() to
		151	# decide when timeouts have occurred. When those clocks disagree on
		152	# elapsed time (as they will whenever there is an NTP adjustment),
		153	# tornado might call _handle_timeout before libcurl is ready. After
		154	# each timeout, resync the scheduled timeout with libcurl's current
		155	# state.
		156	new_timeout = self._curl_multi.timeout()
		157	if new_timeout >= 0:
		158	self._set_timeout(new_timeout)
		159
		160	def _finish_pending_requests(self):
		161	"""
		162	Process any requests that were completed by the last call to
		163	multi.socket_action.
		164	"""
		165	while True:
		166	num_q, ok_list, err_list = self._curl_multi.info_read()
		167	for curl in ok_list:
		168	curl.waiter.switch()
		169	for curl, errnum, errmsg in err_list:
		170	curl.waiter.throw(Exception('%s %s' % (errnum, errmsg)))
		171	if num_q == 0:
		172	break
		173
		174
		175	class GeventCurl(object):
		176	"""
		177	Gevent compatible implementation of the pycurl.Curl class. Essentially a
		178	wrapper around pycurl.Curl with a customized perform method. It uses the
		179	GeventCurlMulti class to implement a blocking API to libcurl's "easy"
		180	interface.
		181	"""
		182
		183	# Reference to the GeventCurlMulti instance.
		184	_multi_instance = None
		185
		186	def __init__(self):
		187	self._curl = pycurl.Curl()
		188
		189	def __getattr__(self, item):
		190	"""
		191	The pycurl.Curl class is final and we cannot subclass it. Therefore we
		192	are wrapping it and forward everything to it here.
		193	"""
		194	return getattr(self._curl, item)
		195
		196	@property
		197	def _multi(self):
		198	"""
		199	Lazy property that returns the GeventCurlMulti instance. The value is
		200	cached as a class attribute. Therefore only one instance per process
		201	exists.
		202	"""
		203	if GeventCurl._multi_instance is None:
		204	GeventCurl._multi_instance = GeventCurlMulti()
		205	return GeventCurl._multi_instance
		206
		207	def perform(self):
		208	"""
		209	This perform method is compatible with gevent because it uses gevent
		210	synchronization mechanisms to wait for the request to finish.
		211	"""
		212	waiter = self._curl.waiter = Waiter()
		213	try:
		214	self._multi.add_handle(self._curl)
		215	response = waiter.get()
		216	finally:
		217	self._multi.remove_handle(self._curl)
		218	del self._curl.waiter
		219
		220	return response
		221
		222	# Curl is originally imported from pycurl. At this point we override it with
		223	# our custom implementation.
		224	Curl = GeventCurl

rhodecode/lib/vcs/__init__.py

0 +14 -2

              # -*- coding: utf-8 -*-
              # Copyright (C) 2014-2016  RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              """
              Various version Control System version lib (vcs) management abstraction layer
              for Python. Build with server client architecture.
              """
              VERSION = (0, 5, 0, 'dev')
              __version__ = '.'.join((str(each) for each in VERSION[:4]))
              __all__ = [
                  'get_version', 'get_repo', 'get_backend',
                  'VCSError', 'RepositoryError', 'CommitError'
                  ]
              import atexit
              import logging
              import subprocess
              import time
              import urlparse
              from cStringIO import StringIO
-             import pycurl
              import Pyro4
              from Pyro4.errors import CommunicationError
              from rhodecode.lib.vcs.conf import settings
              from rhodecode.lib.vcs.backends import get_repo, get_backend
              from rhodecode.lib.vcs.exceptions import (
                  VCSError, RepositoryError, CommitError)
+             log = logging.getLogger(__name__)
-             log = logging.getLogger(__name__)
+             # The pycurl library directly accesses C API functions and is not patched by
+             # gevent. This will potentially lead to deadlocks due to incompatibility to
+             # gevent. Therefore we check if gevent is active and import a gevent compatible
+             # wrapper in that case.
+             try:
+                 from gevent import monkey
+                 if monkey.is_module_patched('__builtin__'):
+                     import geventcurl as pycurl
+                     log.debug('Using gevent comapatible pycurl: %s', pycurl)
+                 else:
+                     import pycurl
+             except ImportError:
+                 import pycurl
              def get_version():
                  """
                  Returns shorter version (digit parts only) as string.
                  """
                  return '.'.join((str(each) for each in VERSION[:3]))
              def connect_pyro4(server_and_port):
                  from rhodecode.lib.vcs import connection, client
                  from rhodecode.lib.middleware.utils import scm_app
                  git_remote = client.RequestScopeProxyFactory(
                      settings.pyro_remote(settings.PYRO_GIT, server_and_port))
                  hg_remote = client.RequestScopeProxyFactory(
                      settings.pyro_remote(settings.PYRO_HG, server_and_port))
                  svn_remote = client.RequestScopeProxyFactory(
                      settings.pyro_remote(settings.PYRO_SVN, server_and_port))
                  connection.Git = client.RepoMaker(proxy_factory=git_remote)
                  connection.Hg = client.RepoMaker(proxy_factory=hg_remote)
                  connection.Svn = client.RepoMaker(proxy_factory=svn_remote)
                  scm_app.GIT_REMOTE_WSGI = Pyro4.Proxy(
                      settings.pyro_remote(
                          settings.PYRO_GIT_REMOTE_WSGI, server_and_port))
                  scm_app.HG_REMOTE_WSGI = Pyro4.Proxy(
                      settings.pyro_remote(
                          settings.PYRO_HG_REMOTE_WSGI, server_and_port))
                  @atexit.register
                  def free_connection_resources():
                      connection.Git = None
                      connection.Hg = None
                      connection.Svn = None
              def connect_http(server_and_port):
                  from rhodecode.lib.vcs import connection, client_http
                  from rhodecode.lib.middleware.utils import scm_app
                  session_factory = client_http.ThreadlocalSessionFactory()
                  connection.Git = client_http.RepoMaker(
                      server_and_port, '/git', session_factory)
                  connection.Hg = client_http.RepoMaker(
                      server_and_port, '/hg', session_factory)
                  connection.Svn = client_http.RepoMaker(
                      server_and_port, '/svn', session_factory)
                  scm_app.HG_REMOTE_WSGI = client_http.VcsHttpProxy(
                      server_and_port, '/proxy/hg')
                  scm_app.GIT_REMOTE_WSGI = client_http.VcsHttpProxy(
                      server_and_port, '/proxy/git')
                  @atexit.register
                  def free_connection_resources():
                      connection.Git = None
                      connection.Hg = None
                      connection.Svn = None
              def connect_vcs(server_and_port, protocol='pyro4'):
                  """
                  Initializes the connection to the vcs server.
                  :param server_and_port: str, e.g. "localhost:9900"
                  :param protocol: str, "pyro4" or "http"
                  """
                  if protocol == 'pyro4':
                      connect_pyro4(server_and_port)
                  elif protocol == 'http':
                      connect_http(server_and_port)
              # TODO: johbo: This function should be moved into our test suite, there is
              # no reason to support starting the vcsserver in Enterprise itself.
              def start_vcs_server(server_and_port, protocol='pyro4', log_level=None):
                  """
                  Starts the vcs server in a subprocess.
                  """
                  log.info('Starting VCSServer as a sub process with %s protocol', protocol)
                  if protocol == 'http':
                      return _start_http_vcs_server(server_and_port, log_level)
                  elif protocol == 'pyro4':
                      return _start_pyro4_vcs_server(server_and_port, log_level)
              def _start_pyro4_vcs_server(server_and_port, log_level=None):
                  _try_to_shutdown_running_server(server_and_port)
                  host, port = server_and_port.rsplit(":", 1)
                  host = host.strip('[]')
                  args = [
                      'vcsserver', '--port', port, '--host', host, '--locale', 'en_US.UTF-8',
                      '--threadpool', '32']
                  if log_level:
                      args += ['--log-level', log_level]
                  proc = subprocess.Popen(args)
                  def cleanup_server_process():
                      proc.kill()
                  atexit.register(cleanup_server_process)
                  server = create_vcsserver_proxy(server_and_port, protocol='pyro4')
                  _wait_until_vcs_server_is_reachable(server)
              def _start_http_vcs_server(server_and_port, log_level=None):
                  # TODO: mikhail: shutdown if an http server already runs
                  host, port = server_and_port.rsplit(":", 1)
                  args = [
                      'pserve', 'vcsserver/development_pyramid.ini',
                      'http_port=%s' % (port, ), 'http_host=%s' % (host, )]
                  proc = subprocess.Popen(args)
                  def cleanup_server_process():
                      proc.kill()
                  atexit.register(cleanup_server_process)
                  server = create_vcsserver_proxy(server_and_port, protocol='http')
                  _wait_until_vcs_server_is_reachable(server)
              def _wait_until_vcs_server_is_reachable(server):
                  while xrange(80):  # max 40s of sleep
                      try:
                          server.ping()
                          break
                      except (CommunicationError, pycurl.error):
                          pass
                      time.sleep(0.5)
              def _try_to_shutdown_running_server(server_and_port):
                  server = create_vcsserver_proxy(server_and_port)
                  try:
                      server.shutdown()
                  except (CommunicationError, pycurl.error):
                      return
                  # TODO: Not sure why this is important, but without it the following start
                  # of the server fails.
                  server = create_vcsserver_proxy(server_and_port)
                  server.ping()
              def create_vcsserver_proxy(server_and_port, protocol='pyro4'):
                  if protocol == 'pyro4':
                      return _create_vcsserver_proxy_pyro4(server_and_port)
                  elif protocol == 'http':
                      return _create_vcsserver_proxy_http(server_and_port)
              def _create_vcsserver_proxy_pyro4(server_and_port):
                  server = Pyro4.Proxy(
                      settings.pyro_remote(settings.PYRO_VCSSERVER, server_and_port))
                  return server
              def _create_vcsserver_proxy_http(server_and_port):
                  from rhodecode.lib.vcs import client_http
                  session = _create_http_rpc_session()
                  url = urlparse.urljoin('http://%s' % server_and_port, '/server')
                  return client_http.RemoteObject(url, session)
              class CurlSession(object):
                  """
                  Modeled so that it provides a subset of the requests interface.
                  This has been created so that it does only provide a minimal API for our
                  needs. The parts which it provides are based on the API of the library
                  `requests` which allows us to easily benchmark against it.
                  Please have a look at the class :class:`requests.Session` when you extend
                  it.
                  """
                  def __init__(self):
                      curl = pycurl.Curl()
                      # TODO: johbo: I did test with 7.19 of libcurl. This version has
                      # trouble with 100 - continue being set in the expect header. This
                      # can lead to massive performance drops, switching it off here.
                      curl.setopt(curl.HTTPHEADER, ["Expect:"])
                      curl.setopt(curl.TCP_NODELAY, True)
                      curl.setopt(curl.PROTOCOLS, curl.PROTO_HTTP)
                      self._curl = curl
                  def post(self, url, data, allow_redirects=False):
                      response_buffer = StringIO()
                      curl = self._curl
                      curl.setopt(curl.URL, url)
                      curl.setopt(curl.POST, True)
                      curl.setopt(curl.POSTFIELDS, data)
                      curl.setopt(curl.FOLLOWLOCATION, allow_redirects)
                      curl.setopt(curl.WRITEDATA, response_buffer)
                      curl.perform()
                      return CurlResponse(response_buffer)
              class CurlResponse(object):
                  """
                  The response of a request, modeled after the requests API.
                  This class provides a subset of the response interface known from the
                  library `requests`. It is intentionally kept similar, so that we can use
                  `requests` as a drop in replacement for benchmarking purposes.
                  """
                  def __init__(self, response_buffer):
                      self._response_buffer = response_buffer
                  @property
                  def content(self):
                      return self._response_buffer.getvalue()
              def _create_http_rpc_session():
                  session = CurlSession()
                  return session

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages