##// END OF EJS Templates
vcs: Use regular expression to recognize requests to shadow repositories.
Martin Bornhold -
r888:6690eec6 default
parent child Browse files
Show More
@@ -1,224 +1,226 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2016 RhodeCode GmbH
3 # Copyright (C) 2010-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import gzip
21 import gzip
22 import re
22 import shutil
23 import shutil
23 import logging
24 import logging
24 import tempfile
25 import tempfile
25 import urlparse
26 import urlparse
26
27
27 from webob.exc import HTTPNotFound
28 from webob.exc import HTTPNotFound
28
29
29 import rhodecode
30 import rhodecode
30 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.model.settings import VcsSettingsModel
35 from rhodecode.model.settings import VcsSettingsModel
35
36
36 log = logging.getLogger(__name__)
37 log = logging.getLogger(__name__)
37
38
38
39
39 def is_git(environ):
40 def is_git(environ):
40 """
41 """
41 Returns True if requests should be handled by GIT wsgi middleware
42 Returns True if requests should be handled by GIT wsgi middleware
42 """
43 """
43 is_git_path = GIT_PROTO_PAT.match(environ['PATH_INFO'])
44 is_git_path = GIT_PROTO_PAT.match(environ['PATH_INFO'])
44 log.debug(
45 log.debug(
45 'request path: `%s` detected as GIT PROTOCOL %s', environ['PATH_INFO'],
46 'request path: `%s` detected as GIT PROTOCOL %s', environ['PATH_INFO'],
46 is_git_path is not None)
47 is_git_path is not None)
47
48
48 return is_git_path
49 return is_git_path
49
50
50
51
51 def is_hg(environ):
52 def is_hg(environ):
52 """
53 """
53 Returns True if requests target is mercurial server - header
54 Returns True if requests target is mercurial server - header
54 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
55 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
55 """
56 """
56 is_hg_path = False
57 is_hg_path = False
57
58
58 http_accept = environ.get('HTTP_ACCEPT')
59 http_accept = environ.get('HTTP_ACCEPT')
59
60
60 if http_accept and http_accept.startswith('application/mercurial'):
61 if http_accept and http_accept.startswith('application/mercurial'):
61 query = urlparse.parse_qs(environ['QUERY_STRING'])
62 query = urlparse.parse_qs(environ['QUERY_STRING'])
62 if 'cmd' in query:
63 if 'cmd' in query:
63 is_hg_path = True
64 is_hg_path = True
64
65
65 log.debug(
66 log.debug(
66 'request path: `%s` detected as HG PROTOCOL %s', environ['PATH_INFO'],
67 'request path: `%s` detected as HG PROTOCOL %s', environ['PATH_INFO'],
67 is_hg_path)
68 is_hg_path)
68
69
69 return is_hg_path
70 return is_hg_path
70
71
71
72
72 def is_svn(environ):
73 def is_svn(environ):
73 """
74 """
74 Returns True if requests target is Subversion server
75 Returns True if requests target is Subversion server
75 """
76 """
76 http_dav = environ.get('HTTP_DAV', '')
77 http_dav = environ.get('HTTP_DAV', '')
77 magic_path_segment = rhodecode.CONFIG.get(
78 magic_path_segment = rhodecode.CONFIG.get(
78 'rhodecode_subversion_magic_path', '/!svn')
79 'rhodecode_subversion_magic_path', '/!svn')
79 is_svn_path = (
80 is_svn_path = (
80 'subversion' in http_dav or
81 'subversion' in http_dav or
81 magic_path_segment in environ['PATH_INFO'])
82 magic_path_segment in environ['PATH_INFO'])
82 log.debug(
83 log.debug(
83 'request path: `%s` detected as SVN PROTOCOL %s', environ['PATH_INFO'],
84 'request path: `%s` detected as SVN PROTOCOL %s', environ['PATH_INFO'],
84 is_svn_path)
85 is_svn_path)
85
86
86 return is_svn_path
87 return is_svn_path
87
88
88
89
89 class GunzipMiddleware(object):
90 class GunzipMiddleware(object):
90 """
91 """
91 WSGI middleware that unzips gzip-encoded requests before
92 WSGI middleware that unzips gzip-encoded requests before
92 passing on to the underlying application.
93 passing on to the underlying application.
93 """
94 """
94
95
95 def __init__(self, application):
96 def __init__(self, application):
96 self.app = application
97 self.app = application
97
98
98 def __call__(self, environ, start_response):
99 def __call__(self, environ, start_response):
99 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
100 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
100
101
101 if b'gzip' in accepts_encoding_header:
102 if b'gzip' in accepts_encoding_header:
102 log.debug('gzip detected, now running gunzip wrapper')
103 log.debug('gzip detected, now running gunzip wrapper')
103 wsgi_input = environ['wsgi.input']
104 wsgi_input = environ['wsgi.input']
104
105
105 if not hasattr(environ['wsgi.input'], 'seek'):
106 if not hasattr(environ['wsgi.input'], 'seek'):
106 # The gzip implementation in the standard library of Python 2.x
107 # The gzip implementation in the standard library of Python 2.x
107 # requires the '.seek()' and '.tell()' methods to be available
108 # requires the '.seek()' and '.tell()' methods to be available
108 # on the input stream. Read the data into a temporary file to
109 # on the input stream. Read the data into a temporary file to
109 # work around this limitation.
110 # work around this limitation.
110
111
111 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
112 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
112 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
113 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
113 wsgi_input.seek(0)
114 wsgi_input.seek(0)
114
115
115 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
116 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
116 # since we "Ungzipped" the content we say now it's no longer gzip
117 # since we "Ungzipped" the content we say now it's no longer gzip
117 # content encoding
118 # content encoding
118 del environ['HTTP_CONTENT_ENCODING']
119 del environ['HTTP_CONTENT_ENCODING']
119
120
120 # content length has changes ? or i'm not sure
121 # content length has changes ? or i'm not sure
121 if 'CONTENT_LENGTH' in environ:
122 if 'CONTENT_LENGTH' in environ:
122 del environ['CONTENT_LENGTH']
123 del environ['CONTENT_LENGTH']
123 else:
124 else:
124 log.debug('content not gzipped, gzipMiddleware passing '
125 log.debug('content not gzipped, gzipMiddleware passing '
125 'request further')
126 'request further')
126 return self.app(environ, start_response)
127 return self.app(environ, start_response)
127
128
128
129
129 class VCSMiddleware(object):
130 class VCSMiddleware(object):
130
131
131 def __init__(self, app, config, appenlight_client, registry):
132 def __init__(self, app, config, appenlight_client, registry):
132 self.application = app
133 self.application = app
133 self.config = config
134 self.config = config
134 self.appenlight_client = appenlight_client
135 self.appenlight_client = appenlight_client
135 self.registry = registry
136 self.registry = registry
136 self.use_gzip = True
137 self.use_gzip = True
137 # order in which we check the middlewares, based on vcs.backends config
138 # order in which we check the middlewares, based on vcs.backends config
138 self.check_middlewares = config['vcs.backends']
139 self.check_middlewares = config['vcs.backends']
139 self.checks = {
140 self.checks = {
140 'hg': (is_hg, SimpleHg),
141 'hg': (is_hg, SimpleHg),
141 'git': (is_git, SimpleGit),
142 'git': (is_git, SimpleGit),
142 'svn': (is_svn, SimpleSvn),
143 'svn': (is_svn, SimpleSvn),
143 }
144 }
144
145
145 def vcs_config(self, repo_name=None):
146 def vcs_config(self, repo_name=None):
146 """
147 """
147 returns serialized VcsSettings
148 returns serialized VcsSettings
148 """
149 """
149 return VcsSettingsModel(repo=repo_name).get_ui_settings_as_config_obj()
150 return VcsSettingsModel(repo=repo_name).get_ui_settings_as_config_obj()
150
151
151 def wrap_in_gzip_if_enabled(self, app, config):
152 def wrap_in_gzip_if_enabled(self, app, config):
152 if self.use_gzip:
153 if self.use_gzip:
153 app = GunzipMiddleware(app)
154 app = GunzipMiddleware(app)
154 return app
155 return app
155
156
156 def _get_handler_app(self, environ):
157 def _get_handler_app(self, environ):
157 app = None
158 app = None
158 log.debug('Checking vcs types in order: %r', self.check_middlewares)
159 log.debug('Checking vcs types in order: %r', self.check_middlewares)
159 for vcs_type in self.check_middlewares:
160 for vcs_type in self.check_middlewares:
160 vcs_check, handler = self.checks[vcs_type]
161 vcs_check, handler = self.checks[vcs_type]
161 if vcs_check(environ):
162 if vcs_check(environ):
162 log.debug(
163 log.debug(
163 'Found VCS Middleware to handle the request %s', handler)
164 'Found VCS Middleware to handle the request %s', handler)
164 app = handler(self.application, self.config, self.registry)
165 app = handler(self.application, self.config, self.registry)
165 break
166 break
166
167
167 return app
168 return app
168
169
169 def __call__(self, environ, start_response):
170 def __call__(self, environ, start_response):
170 # check if we handle one of interesting protocols, optionally extract
171 # check if we handle one of interesting protocols, optionally extract
171 # specific vcsSettings and allow changes of how things are wrapped
172 # specific vcsSettings and allow changes of how things are wrapped
172 vcs_handler = self._get_handler_app(environ)
173 vcs_handler = self._get_handler_app(environ)
173 if vcs_handler:
174 if vcs_handler:
174 # translate the _REPO_ID into real repo NAME for usage
175 # translate the _REPO_ID into real repo NAME for usage
175 # in middleware
176 # in middleware
176 environ['PATH_INFO'] = vcs_handler._get_by_id(environ['PATH_INFO'])
177 environ['PATH_INFO'] = vcs_handler._get_by_id(environ['PATH_INFO'])
177 repo_name = vcs_handler._get_repository_name(environ)
178 repo_name = vcs_handler._get_repository_name(environ)
178
179
179 acl_repo_name = repo_name
180 acl_repo_name = repo_name
180 vcs_repo_name = repo_name
181 vcs_repo_name = repo_name
181 url_repo_name = repo_name
182 url_repo_name = repo_name
182 pr_id = None
183 pr_id = None
183
184
184 # TODO: johbo: recognize a pull request based on pattern matching
185 pr_regex = re.compile(
185 if '/pull-request/' in repo_name:
186 '(?P<base_name>(?:[\w-]+)(?:/[\w-]+)*)/'
186 acl_repo_name, other = repo_name.split('/pull-request/')
187 '(?P<repo_name>[\w-]+)'
187 # TODO: johbo: Set shadow repo path
188 '/pull-request/(?P<pr_id>\d+)/repository')
188 basename, repo_segment = acl_repo_name.rsplit('/', 1)
189 match = pr_regex.match(repo_name)
189 pr_id = int(other[0:-len('/repository')])
190 if match:
190 vcs_repo_name = '{basename}/.__shadow_{repo_segment}_pr-{pr_id}'.format(
191 match_dict = match.groupdict()
191 basename=basename,
192 pr_id = match_dict.get('pr_id')
192 repo_segment=repo_segment,
193 acl_repo_name = '{base_name}/{repo_name}'.format(**match_dict)
193 pr_id=pr_id)
194 vcs_repo_name = '{base_name}/.__shadow_{repo_name}_pr-{pr_id}'.format(
195 **match_dict)
194
196
195 log.debug('repo_names %s', {
197 log.debug('repo_names %s', {
196 'acl_repo_name': acl_repo_name,
198 'acl_repo_name': acl_repo_name,
197 'vcs_repo_name': vcs_repo_name,
199 'vcs_repo_name': vcs_repo_name,
198 'url_repo_name': url_repo_name,
200 'url_repo_name': url_repo_name,
199 })
201 })
200 log.debug('pull_request %s', pr_id)
202 log.debug('pull_request %s', pr_id)
201
203
202 # check for type, presence in database and on filesystem
204 # check for type, presence in database and on filesystem
203 if not vcs_handler.is_valid_and_existing_repo(
205 if not vcs_handler.is_valid_and_existing_repo(
204 acl_repo_name, vcs_handler.basepath, vcs_handler.SCM):
206 acl_repo_name, vcs_handler.basepath, vcs_handler.SCM):
205 return HTTPNotFound()(environ, start_response)
207 return HTTPNotFound()(environ, start_response)
206
208
207 # TODO: johbo: Needed for the Pyro4 backend and Mercurial only.
209 # TODO: johbo: Needed for the Pyro4 backend and Mercurial only.
208 # Remove once we fully switched to the HTTP backend.
210 # Remove once we fully switched to the HTTP backend.
209 environ['REPO_NAME'] = url_repo_name
211 environ['REPO_NAME'] = url_repo_name
210
212
211 # register repo_name and it's config back to the handler
213 # register repo_name and it's config back to the handler
212 vcs_handler.acl_repo_name = acl_repo_name
214 vcs_handler.acl_repo_name = acl_repo_name
213 vcs_handler.url_repo_name = url_repo_name
215 vcs_handler.url_repo_name = url_repo_name
214 vcs_handler.vcs_repo_name = vcs_repo_name
216 vcs_handler.vcs_repo_name = vcs_repo_name
215 vcs_handler.pr_id = pr_id
217 vcs_handler.pr_id = pr_id
216 vcs_handler.repo_vcs_config = self.vcs_config(acl_repo_name)
218 vcs_handler.repo_vcs_config = self.vcs_config(acl_repo_name)
217
219
218 vcs_handler = self.wrap_in_gzip_if_enabled(
220 vcs_handler = self.wrap_in_gzip_if_enabled(
219 vcs_handler, self.config)
221 vcs_handler, self.config)
220 vcs_handler, _ = wrap_in_appenlight_if_enabled(
222 vcs_handler, _ = wrap_in_appenlight_if_enabled(
221 vcs_handler, self.config, self.appenlight_client)
223 vcs_handler, self.config, self.appenlight_client)
222 return vcs_handler(environ, start_response)
224 return vcs_handler(environ, start_response)
223
225
224 return self.application(environ, start_response)
226 return self.application(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now