rhodecode-enterprise-ce Files · rhodecode/lib/middleware/utils/scm_app_http.py

vcs: streaming will use now 100kb chunks readers for faster throughput

marcink - - Load All Authors

File last commit:

r3327:ddd720d0 default


                r3327:ddd720d0

default

Download file

             scm_app_http.py
        
                    179 lines
            
             | 5.5 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / middleware / utils / scm_app_http.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
project: added all source files and assets

              r1
            
      # -*- coding: utf-8 -*-

        marcink
    
release: update copyright year to 2018

              r2487
            
      # Copyright (C) 2014-2018 RhodeCode GmbH

        marcink
    
project: added all source files and assets

              r1
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      """

      Implementation of the scm_app interface using raw HTTP communication.

      """

      import base64

      import logging

      import urlparse

      import wsgiref.util

      import msgpack

      import requests

      import webob.request

      import rhodecode

      log = logging.getLogger(__name__)

        Martin Bornhold
    
vcs: Do not pass the backend key into the scm app instances....

              r951
            
      def create_git_wsgi_app(repo_path, repo_name, config):

        marcink
    
project: added all source files and assets

              r1
            
          url = _vcs_streaming_url() + 'git/'

        Martin Bornhold
    
vcs: Do not pass the backend key into the scm app instances....

              r951
            
          return VcsHttpProxy(url, repo_path, repo_name, config)

        marcink
    
project: added all source files and assets

              r1
            
        Martin Bornhold
    
vcs: Do not pass the backend key into the scm app instances....

              r951
            
      def create_hg_wsgi_app(repo_path, repo_name, config):

        marcink
    
project: added all source files and assets

              r1
            
          url = _vcs_streaming_url() + 'hg/'

        Martin Bornhold
    
vcs: Do not pass the backend key into the scm app instances....

              r951
            
          return VcsHttpProxy(url, repo_path, repo_name, config)

        marcink
    
project: added all source files and assets

              r1
            
      def _vcs_streaming_url():

          template = 'http://{}/stream/'

          return template.format(rhodecode.CONFIG['vcs.server'])

      # TODO: johbo: Avoid the global.

      session = requests.Session()

      # Requests speedup, avoid reading .netrc and similar

      session.trust_env = False

        marcink
    
http-app: prevent httplib3 logs to spawn our rhodecode logs....

              r1550
            
      # prevent urllib3 spawning our logs.

      logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(

          logging.WARNING)

        marcink
    
project: added all source files and assets

              r1
            
      class VcsHttpProxy(object):

          """

          A WSGI application which proxies vcs requests.

          The goal is to shuffle the data around without touching it. The only

          exception is the extra data from the config object which we send to the

          server as well.

          """

        Martin Bornhold
    
vcs: Do not pass the backend key into the scm app instances....

              r951
            
          def __init__(self, url, repo_path, repo_name, config):

        marcink
    
project: added all source files and assets

              r1
            
              """

              :param str url: The URL of the VCSServer to call.

              """

              self._url = url

              self._repo_name = repo_name

              self._repo_path = repo_path

              self._config = config

              log.debug(

                  "Creating VcsHttpProxy for repo %s, url %s",

                  repo_name, url)

          def __call__(self, environ, start_response):

              config = msgpack.packb(self._config)

              request = webob.request.Request(environ)

              request_headers = request.headers

              request_headers.update({

                  # TODO: johbo: Remove this, rely on URL path only

                  'X-RC-Repo-Name': self._repo_name,

                  'X-RC-Repo-Path': self._repo_path,

                  'X-RC-Path-Info': environ['PATH_INFO'],

                  # TODO: johbo: Avoid encoding and put this into payload?

                  'X-RC-Repo-Config': base64.b64encode(config),

        Martin Bornhold
    
vcs: Send custom repo-locked status code to the VCSServer.

              r980
            
                  'X-RC-Locked-Status-Code': rhodecode.CONFIG.get('lock_ret_code')

        marcink
    
project: added all source files and assets

              r1
            
              })

              method = environ['REQUEST_METHOD']

              # Preserve the query string

              url = self._url

              url = urlparse.urljoin(url, self._repo_name)

              if environ.get('QUERY_STRING'):

                  url += '?' + environ['QUERY_STRING']

        marcink
    
http-app: adds some logging.

              r1983
            
              log.debug('http-app: preparing request to: %s', url)

        marcink
    
project: added all source files and assets

              r1
            
              response = session.request(

        marcink
    
http-app: adds some logging.

              r1983
            
                  method,

                  url,

        marcink
    
http-proto: in case incoming requests come in as chunked stream the data to VCSServer....

              r1423
            
                  data=_maybe_stream_request(environ),

        marcink
    
project: added all source files and assets

              r1
            
                  headers=request_headers,

                  stream=True)

        marcink
    
http-app: adds some logging.

              r1983
            
              log.debug('http-app: got vcsserver response: %s', response)

        marcink
    
scm_app: add more debug info when unhandled errors happen on vcsserver.

              r3093
            
              if response.status_code >= 500:

                  log.error('Exception returned by vcsserver at: %s %s, %s',

                            url, response.status_code, response.content)

        marcink
    
project: added all source files and assets

              r1
            
              # Preserve the headers of the response, except hop_by_hop ones

              response_headers = [

                  (h, v) for h, v in response.headers.items()

                  if not wsgiref.util.is_hop_by_hop(h)

              ]

        marcink
    
scm_app: add more debug info when unhandled errors happen on vcsserver.

              r3093
            
              # Build status argument for start_response callable.

        Martin Bornhold
    
vcs: Include reason phrase from VCSServer into HTTP status string.

              r977
            
              status = '{status_code} {reason_phrase}'.format(

                  status_code=response.status_code,

                  reason_phrase=response.reason)

        marcink
    
project: added all source files and assets

              r1
            
              start_response(status, response_headers)

        marcink
    
http-proto: in case incoming requests come in as chunked stream the data to VCSServer....

              r1423
            
              return _maybe_stream_response(response)

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
vcs: streaming will use now 100kb chunks readers for faster throughput

              r3327
            
      def read_in_chunks(stream_obj, block_size=1024, chunks=-1):

          """

          Read Stream in chunks, default chunk size: 1k.

          """

          while chunks:

              data = stream_obj.read(block_size)

              if not data:

                  break

              yield data

              chunks -= 1

        marcink
    
http-app: simplify detection of chunked encoding....

              r1643
            
      def _is_request_chunked(environ):

          stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'

          return stream

        marcink
    
http-proto: in case incoming requests come in as chunked stream the data to VCSServer....

              r1423
            
      def _maybe_stream_request(environ):

        marcink
    
git-lfs: streaming support for file upload....

              r1566
            
          path = environ['PATH_INFO']

          stream = _is_request_chunked(environ)

          log.debug('handling request `%s` with stream support: %s', path, stream)

          if stream:

        marcink
    
vcs: streaming will use now 100kb chunks readers for faster throughput

              r3327
            
              # set stream by 256k

              return read_in_chunks(environ['wsgi.input'], block_size=1024 * 256)

        marcink
    
http-proto: in case incoming requests come in as chunked stream the data to VCSServer....

              r1423
            
          else:

              return environ['wsgi.input'].read()

      def _maybe_stream_response(response):

        marcink
    
project: added all source files and assets

              r1
            
          """

          Try to generate chunks from the response if it is chunked.

          """

        marcink
    
git-lfs: streaming support for file upload....

              r1566
            
          stream = _is_chunked(response)

          log.debug('returning response with stream: %s', stream)

          if stream:

        marcink
    
vcs: streaming will use now 100kb chunks readers for faster throughput

              r3327
            
              # read in 256k Chunks

              return response.raw.read_chunked(amt=1024 * 256)

        marcink
    
project: added all source files and assets

              r1
            
          else:

              return [response.content]

      def _is_chunked(response):

          return response.headers.get('Transfer-Encoding', '') == 'chunked'

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink project: added all source files and assets	r1	# -- coding: utf-8 --

marcink release: update copyright year to 2018	r2487	# Copyright (C) 2014-2018 RhodeCode GmbH
marcink project: added all source files and assets	r1	#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/

		"""
		Implementation of the scm_app interface using raw HTTP communication.
		"""

		import base64
		import logging
		import urlparse
		import wsgiref.util

		import msgpack
		import requests
		import webob.request

		import rhodecode


		log = logging.getLogger(__name__)


Martin Bornhold vcs: Do not pass the backend key into the scm app instances....	r951	def create_git_wsgi_app(repo_path, repo_name, config):
marcink project: added all source files and assets	r1	url = _vcs_streaming_url() + 'git/'
Martin Bornhold vcs: Do not pass the backend key into the scm app instances....	r951	return VcsHttpProxy(url, repo_path, repo_name, config)
marcink project: added all source files and assets	r1

Martin Bornhold vcs: Do not pass the backend key into the scm app instances....	r951	def create_hg_wsgi_app(repo_path, repo_name, config):
marcink project: added all source files and assets	r1	url = _vcs_streaming_url() + 'hg/'
Martin Bornhold vcs: Do not pass the backend key into the scm app instances....	r951	return VcsHttpProxy(url, repo_path, repo_name, config)
marcink project: added all source files and assets	r1

		def _vcs_streaming_url():
		template = 'http://{}/stream/'
		return template.format(rhodecode.CONFIG['vcs.server'])


		# TODO: johbo: Avoid the global.
		session = requests.Session()
		# Requests speedup, avoid reading .netrc and similar
		session.trust_env = False

marcink http-app: prevent httplib3 logs to spawn our rhodecode logs....	r1550	# prevent urllib3 spawning our logs.
		logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
		logging.WARNING)

marcink project: added all source files and assets	r1
		class VcsHttpProxy(object):
		"""
		A WSGI application which proxies vcs requests.

		The goal is to shuffle the data around without touching it. The only
		exception is the extra data from the config object which we send to the
		server as well.
		"""

Martin Bornhold vcs: Do not pass the backend key into the scm app instances....	r951	def __init__(self, url, repo_path, repo_name, config):
marcink project: added all source files and assets	r1	"""
		:param str url: The URL of the VCSServer to call.
		"""
		self._url = url
		self._repo_name = repo_name
		self._repo_path = repo_path
		self._config = config
		log.debug(
		"Creating VcsHttpProxy for repo %s, url %s",
		repo_name, url)

		def __call__(self, environ, start_response):
		config = msgpack.packb(self._config)
		request = webob.request.Request(environ)
		request_headers = request.headers
		request_headers.update({
		# TODO: johbo: Remove this, rely on URL path only
		'X-RC-Repo-Name': self._repo_name,
		'X-RC-Repo-Path': self._repo_path,
		'X-RC-Path-Info': environ['PATH_INFO'],
		# TODO: johbo: Avoid encoding and put this into payload?
		'X-RC-Repo-Config': base64.b64encode(config),
Martin Bornhold vcs: Send custom repo-locked status code to the VCSServer.	r980	'X-RC-Locked-Status-Code': rhodecode.CONFIG.get('lock_ret_code')
marcink project: added all source files and assets	r1	})

		method = environ['REQUEST_METHOD']

		# Preserve the query string
		url = self._url
		url = urlparse.urljoin(url, self._repo_name)
		if environ.get('QUERY_STRING'):
		url += '?' + environ['QUERY_STRING']

marcink http-app: adds some logging.	r1983	log.debug('http-app: preparing request to: %s', url)
marcink project: added all source files and assets	r1	response = session.request(
marcink http-app: adds some logging.	r1983	method,
		url,
marcink http-proto: in case incoming requests come in as chunked stream the data to VCSServer....	r1423	data=_maybe_stream_request(environ),
marcink project: added all source files and assets	r1	headers=request_headers,
		stream=True)

marcink http-app: adds some logging.	r1983	log.debug('http-app: got vcsserver response: %s', response)
marcink scm_app: add more debug info when unhandled errors happen on vcsserver.	r3093	if response.status_code >= 500:
		log.error('Exception returned by vcsserver at: %s %s, %s',
		url, response.status_code, response.content)

marcink project: added all source files and assets	r1	# Preserve the headers of the response, except hop_by_hop ones
		response_headers = [
		(h, v) for h, v in response.headers.items()
		if not wsgiref.util.is_hop_by_hop(h)
		]

marcink scm_app: add more debug info when unhandled errors happen on vcsserver.	r3093	# Build status argument for start_response callable.
Martin Bornhold vcs: Include reason phrase from VCSServer into HTTP status string.	r977	status = '{status_code} {reason_phrase}'.format(
		status_code=response.status_code,
		reason_phrase=response.reason)

marcink project: added all source files and assets	r1	start_response(status, response_headers)
marcink http-proto: in case incoming requests come in as chunked stream the data to VCSServer....	r1423	return _maybe_stream_response(response)
marcink project: added all source files and assets	r1

marcink vcs: streaming will use now 100kb chunks readers for faster throughput	r3327	def read_in_chunks(stream_obj, block_size=1024, chunks=-1):
		"""
		Read Stream in chunks, default chunk size: 1k.
		"""
		while chunks:
		data = stream_obj.read(block_size)
		if not data:
		break
		yield data
		chunks -= 1


marcink http-app: simplify detection of chunked encoding....	r1643	def _is_request_chunked(environ):
		stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
		return stream


marcink http-proto: in case incoming requests come in as chunked stream the data to VCSServer....	r1423	def _maybe_stream_request(environ):
marcink git-lfs: streaming support for file upload....	r1566	path = environ['PATH_INFO']
		stream = _is_request_chunked(environ)
		log.debug('handling request `%s` with stream support: %s', path, stream)

		if stream:
marcink vcs: streaming will use now 100kb chunks readers for faster throughput	r3327	# set stream by 256k
		return read_in_chunks(environ['wsgi.input'], block_size=1024 * 256)
marcink http-proto: in case incoming requests come in as chunked stream the data to VCSServer....	r1423	else:
		return environ['wsgi.input'].read()


		def _maybe_stream_response(response):
marcink project: added all source files and assets	r1	"""
		Try to generate chunks from the response if it is chunked.
		"""
marcink git-lfs: streaming support for file upload....	r1566	stream = _is_chunked(response)
		log.debug('returning response with stream: %s', stream)
		if stream:
marcink vcs: streaming will use now 100kb chunks readers for faster throughput	r3327	# read in 256k Chunks
		return response.raw.read_chunked(amt=1024 * 256)
marcink project: added all source files and assets	r1	else:
		return [response.content]


		def _is_chunked(response):
		return response.headers.get('Transfer-Encoding', '') == 'chunked'