##// END OF EJS Templates
fix(git): fixed issues with gzip detection for git protocol
super-admin -
r5229:d12cacc8 default
parent child Browse files
Show More
@@ -1,302 +1,303 b''
1
1
2 # Copyright (C) 2010-2023 RhodeCode GmbH
2 # Copyright (C) 2010-2023 RhodeCode GmbH
3 #
3 #
4 # This program is free software: you can redistribute it and/or modify
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License, version 3
5 # it under the terms of the GNU Affero General Public License, version 3
6 # (only), as published by the Free Software Foundation.
6 # (only), as published by the Free Software Foundation.
7 #
7 #
8 # This program is distributed in the hope that it will be useful,
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
11 # GNU General Public License for more details.
12 #
12 #
13 # You should have received a copy of the GNU Affero General Public License
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #
15 #
16 # This program is dual-licensed. If you wish to learn more about the
16 # This program is dual-licensed. If you wish to learn more about the
17 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19
19
20 import gzip
20 import gzip
21 import shutil
21 import shutil
22 import logging
22 import logging
23 import tempfile
23 import tempfile
24 import urllib.parse
24 import urllib.parse
25
25
26 from webob.exc import HTTPNotFound
26 from webob.exc import HTTPNotFound
27
27
28 import rhodecode
28 import rhodecode
29 from rhodecode.lib.middleware.utils import get_path_info
29 from rhodecode.lib.middleware.utils import get_path_info
30 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
30 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
31 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplehg import SimpleHg
32 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplesvn import SimpleSvn
33 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.lib.str_utils import safe_str
34 from rhodecode.model.settings import VcsSettingsModel
35 from rhodecode.model.settings import VcsSettingsModel
35
36
36
37
37 log = logging.getLogger(__name__)
38 log = logging.getLogger(__name__)
38
39
39 VCS_TYPE_KEY = '_rc_vcs_type'
40 VCS_TYPE_KEY = '_rc_vcs_type'
40 VCS_TYPE_SKIP = '_rc_vcs_skip'
41 VCS_TYPE_SKIP = '_rc_vcs_skip'
41
42
42
43
43 def is_git(environ):
44 def is_git(environ):
44 """
45 """
45 Returns True if requests should be handled by GIT wsgi middleware
46 Returns True if requests should be handled by GIT wsgi middleware
46 """
47 """
47 path_info = get_path_info(environ)
48 path_info = get_path_info(environ)
48 is_git_path = GIT_PROTO_PAT.match(path_info)
49 is_git_path = GIT_PROTO_PAT.match(path_info)
49 log.debug(
50 log.debug(
50 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
51 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
51 is_git_path is not None)
52 is_git_path is not None)
52
53
53 return is_git_path
54 return is_git_path
54
55
55
56
56 def is_hg(environ):
57 def is_hg(environ):
57 """
58 """
58 Returns True if requests target is mercurial server - header
59 Returns True if requests target is mercurial server - header
59 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
60 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
60 """
61 """
61 is_hg_path = False
62 is_hg_path = False
62
63
63 http_accept = environ.get('HTTP_ACCEPT')
64 http_accept = environ.get('HTTP_ACCEPT')
64
65
65 if http_accept and http_accept.startswith('application/mercurial'):
66 if http_accept and http_accept.startswith('application/mercurial'):
66 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
67 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
67 if 'cmd' in query:
68 if 'cmd' in query:
68 is_hg_path = True
69 is_hg_path = True
69
70
70 path_info = get_path_info(environ)
71 path_info = get_path_info(environ)
71 log.debug(
72 log.debug(
72 'request path: `%s` detected as HG PROTOCOL %s', path_info,
73 'request path: `%s` detected as HG PROTOCOL %s', path_info,
73 is_hg_path)
74 is_hg_path)
74
75
75 return is_hg_path
76 return is_hg_path
76
77
77
78
78 def is_svn(environ):
79 def is_svn(environ):
79 """
80 """
80 Returns True if requests target is Subversion server
81 Returns True if requests target is Subversion server
81 """
82 """
82
83
83 http_dav = environ.get('HTTP_DAV', '')
84 http_dav = environ.get('HTTP_DAV', '')
84 magic_path_segment = rhodecode.CONFIG.get(
85 magic_path_segment = rhodecode.CONFIG.get(
85 'rhodecode_subversion_magic_path', '/!svn')
86 'rhodecode_subversion_magic_path', '/!svn')
86 path_info = get_path_info(environ)
87 path_info = get_path_info(environ)
87 req_method = environ['REQUEST_METHOD']
88 req_method = environ['REQUEST_METHOD']
88
89
89 is_svn_path = (
90 is_svn_path = (
90 'subversion' in http_dav or
91 'subversion' in http_dav or
91 magic_path_segment in path_info
92 magic_path_segment in path_info
92 or req_method in ['PROPFIND', 'PROPPATCH', 'HEAD']
93 or req_method in ['PROPFIND', 'PROPPATCH', 'HEAD']
93 )
94 )
94 log.debug(
95 log.debug(
95 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
96 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
96 is_svn_path)
97 is_svn_path)
97
98
98 return is_svn_path
99 return is_svn_path
99
100
100
101
101 class GunzipMiddleware(object):
102 class GunzipMiddleware(object):
102 """
103 """
103 WSGI middleware that unzips gzip-encoded requests before
104 WSGI middleware that unzips gzip-encoded requests before
104 passing on to the underlying application.
105 passing on to the underlying application.
105 """
106 """
106
107
107 def __init__(self, application):
108 def __init__(self, application):
108 self.app = application
109 self.app = application
109
110
110 def __call__(self, environ, start_response):
111 def __call__(self, environ, start_response):
111 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
112 accepts_encoding_header = safe_str(environ.get('HTTP_CONTENT_ENCODING', ''))
112
113
113 if b'gzip' in accepts_encoding_header:
114 if 'gzip' in accepts_encoding_header:
114 log.debug('gzip detected, now running gunzip wrapper')
115 log.debug('gzip detected, now running gunzip wrapper')
115 wsgi_input = environ['wsgi.input']
116 wsgi_input = environ['wsgi.input']
116
117
117 if not hasattr(environ['wsgi.input'], 'seek'):
118 if not hasattr(environ['wsgi.input'], 'seek'):
118 # The gzip implementation in the standard library of Python 2.x
119 # The gzip implementation in the standard library of Python 2.x
119 # requires the '.seek()' and '.tell()' methods to be available
120 # requires the '.seek()' and '.tell()' methods to be available
120 # on the input stream. Read the data into a temporary file to
121 # on the input stream. Read the data into a temporary file to
121 # work around this limitation.
122 # work around this limitation.
122
123
123 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
124 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
124 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
125 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
125 wsgi_input.seek(0)
126 wsgi_input.seek(0)
126
127
127 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
128 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
128 # since we "Ungzipped" the content we say now it's no longer gzip
129 # since we "Ungzipped" the content we say now it's no longer gzip
129 # content encoding
130 # content encoding
130 del environ['HTTP_CONTENT_ENCODING']
131 del environ['HTTP_CONTENT_ENCODING']
131
132
132 # content length has changes ? or i'm not sure
133 # content length has changes ? or i'm not sure
133 if 'CONTENT_LENGTH' in environ:
134 if 'CONTENT_LENGTH' in environ:
134 del environ['CONTENT_LENGTH']
135 del environ['CONTENT_LENGTH']
135 else:
136 else:
136 log.debug('content not gzipped, gzipMiddleware passing '
137 log.debug('content not gzipped, gzipMiddleware passing '
137 'request further')
138 'request further')
138 return self.app(environ, start_response)
139 return self.app(environ, start_response)
139
140
140
141
141 def is_vcs_call(environ):
142 def is_vcs_call(environ):
142 if VCS_TYPE_KEY in environ:
143 if VCS_TYPE_KEY in environ:
143 raw_type = environ[VCS_TYPE_KEY]
144 raw_type = environ[VCS_TYPE_KEY]
144 return raw_type and raw_type != VCS_TYPE_SKIP
145 return raw_type and raw_type != VCS_TYPE_SKIP
145 return False
146 return False
146
147
147
148
148 def detect_vcs_request(environ, backends):
149 def detect_vcs_request(environ, backends):
149 checks = {
150 checks = {
150 'hg': (is_hg, SimpleHg),
151 'hg': (is_hg, SimpleHg),
151 'git': (is_git, SimpleGit),
152 'git': (is_git, SimpleGit),
152 'svn': (is_svn, SimpleSvn),
153 'svn': (is_svn, SimpleSvn),
153 }
154 }
154 handler = None
155 handler = None
155 # List of path views first chunk we don't do any checks
156 # List of path views first chunk we don't do any checks
156 white_list = [
157 white_list = [
157 # favicon often requested by browsers
158 # favicon often requested by browsers
158 'favicon.ico',
159 'favicon.ico',
159
160
160 # e.g /_file_store/download
161 # e.g /_file_store/download
161 '_file_store++',
162 '_file_store++',
162
163
163 # login
164 # login
164 "_admin/login",
165 "_admin/login",
165
166
166 # _admin/api is safe too
167 # _admin/api is safe too
167 '_admin/api',
168 '_admin/api',
168
169
169 # _admin/gist is safe too
170 # _admin/gist is safe too
170 '_admin/gists++',
171 '_admin/gists++',
171
172
172 # _admin/my_account is safe too
173 # _admin/my_account is safe too
173 '_admin/my_account++',
174 '_admin/my_account++',
174
175
175 # static files no detection
176 # static files no detection
176 '_static++',
177 '_static++',
177
178
178 # debug-toolbar
179 # debug-toolbar
179 '_debug_toolbar++',
180 '_debug_toolbar++',
180
181
181 # skip ops ping, status
182 # skip ops ping, status
182 '_admin/ops/ping',
183 '_admin/ops/ping',
183 '_admin/ops/status',
184 '_admin/ops/status',
184
185
185 # full channelstream connect should be VCS skipped
186 # full channelstream connect should be VCS skipped
186 '_admin/channelstream/connect',
187 '_admin/channelstream/connect',
187
188
188 '++/repo_creating_check'
189 '++/repo_creating_check'
189 ]
190 ]
190 path_info = get_path_info(environ)
191 path_info = get_path_info(environ)
191 path_url = path_info.lstrip('/')
192 path_url = path_info.lstrip('/')
192 req_method = environ.get('REQUEST_METHOD')
193 req_method = environ.get('REQUEST_METHOD')
193
194
194 for item in white_list:
195 for item in white_list:
195 if item.endswith('++') and path_url.startswith(item[:-2]):
196 if item.endswith('++') and path_url.startswith(item[:-2]):
196 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
197 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
197 return handler
198 return handler
198 if item.startswith('++') and path_url.endswith(item[2:]):
199 if item.startswith('++') and path_url.endswith(item[2:]):
199 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
200 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
200 return handler
201 return handler
201 if item == path_url:
202 if item == path_url:
202 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
203 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
203 return handler
204 return handler
204
205
205 if VCS_TYPE_KEY in environ:
206 if VCS_TYPE_KEY in environ:
206 raw_type = environ[VCS_TYPE_KEY]
207 raw_type = environ[VCS_TYPE_KEY]
207 if raw_type == VCS_TYPE_SKIP:
208 if raw_type == VCS_TYPE_SKIP:
208 log.debug('got `skip` marker for vcs detection, skipping...')
209 log.debug('got `skip` marker for vcs detection, skipping...')
209 return handler
210 return handler
210
211
211 _check, handler = checks.get(raw_type) or [None, None]
212 _check, handler = checks.get(raw_type) or [None, None]
212 if handler:
213 if handler:
213 log.debug('got handler:%s from environ', handler)
214 log.debug('got handler:%s from environ', handler)
214
215
215 if not handler:
216 if not handler:
216 log.debug('request start: checking if request for `%s:%s` is of VCS type in order: %s',
217 log.debug('request start: checking if request for `%s:%s` is of VCS type in order: %s',
217 req_method, path_url, backends)
218 req_method, path_url, backends)
218 for vcs_type in backends:
219 for vcs_type in backends:
219 vcs_check, _handler = checks[vcs_type]
220 vcs_check, _handler = checks[vcs_type]
220 if vcs_check(environ):
221 if vcs_check(environ):
221 log.debug('vcs handler found %s', _handler)
222 log.debug('vcs handler found %s', _handler)
222 handler = _handler
223 handler = _handler
223 break
224 break
224
225
225 return handler
226 return handler
226
227
227
228
228 class VCSMiddleware(object):
229 class VCSMiddleware(object):
229
230
230 def __init__(self, app, registry, config, appenlight_client):
231 def __init__(self, app, registry, config, appenlight_client):
231 self.application = app
232 self.application = app
232 self.registry = registry
233 self.registry = registry
233 self.config = config
234 self.config = config
234 self.appenlight_client = appenlight_client
235 self.appenlight_client = appenlight_client
235 self.use_gzip = True
236 self.use_gzip = True
236 # order in which we check the middlewares, based on vcs.backends config
237 # order in which we check the middlewares, based on vcs.backends config
237 self.check_middlewares = config['vcs.backends']
238 self.check_middlewares = config['vcs.backends']
238
239
239 def vcs_config(self, repo_name=None):
240 def vcs_config(self, repo_name=None):
240 """
241 """
241 returns serialized VcsSettings
242 returns serialized VcsSettings
242 """
243 """
243 try:
244 try:
244 return VcsSettingsModel(
245 return VcsSettingsModel(
245 repo=repo_name).get_ui_settings_as_config_obj()
246 repo=repo_name).get_ui_settings_as_config_obj()
246 except Exception:
247 except Exception:
247 pass
248 pass
248
249
249 def wrap_in_gzip_if_enabled(self, app, config):
250 def wrap_in_gzip_if_enabled(self, app, config):
250 if self.use_gzip:
251 if self.use_gzip:
251 app = GunzipMiddleware(app)
252 app = GunzipMiddleware(app)
252 return app
253 return app
253
254
254 def _get_handler_app(self, environ):
255 def _get_handler_app(self, environ):
255 app = None
256 app = None
256 log.debug('VCSMiddleware: detecting vcs type.')
257 log.debug('VCSMiddleware: detecting vcs type.')
257 handler = detect_vcs_request(environ, self.check_middlewares)
258 handler = detect_vcs_request(environ, self.check_middlewares)
258 if handler:
259 if handler:
259 app = handler(self.config, self.registry)
260 app = handler(self.config, self.registry)
260
261
261 return app
262 return app
262
263
263 def __call__(self, environ, start_response):
264 def __call__(self, environ, start_response):
264 # check if we handle one of interesting protocols, optionally extract
265 # check if we handle one of interesting protocols, optionally extract
265 # specific vcsSettings and allow changes of how things are wrapped
266 # specific vcsSettings and allow changes of how things are wrapped
266 vcs_handler = self._get_handler_app(environ)
267 vcs_handler = self._get_handler_app(environ)
267 if vcs_handler:
268 if vcs_handler:
268 # translate the _REPO_ID into real repo NAME for usage
269 # translate the _REPO_ID into real repo NAME for usage
269 # in middleware
270 # in middleware
270
271
271 path_info = get_path_info(environ)
272 path_info = get_path_info(environ)
272 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
273 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
273
274
274 # Set acl, url and vcs repo names.
275 # Set acl, url and vcs repo names.
275 vcs_handler.set_repo_names(environ)
276 vcs_handler.set_repo_names(environ)
276
277
277 # register repo config back to the handler
278 # register repo config back to the handler
278 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
279 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
279 # maybe damaged/non existent settings. We still want to
280 # maybe damaged/non existent settings. We still want to
280 # pass that point to validate on is_valid_and_existing_repo
281 # pass that point to validate on is_valid_and_existing_repo
281 # and return proper HTTP Code back to client
282 # and return proper HTTP Code back to client
282 if vcs_conf:
283 if vcs_conf:
283 vcs_handler.repo_vcs_config = vcs_conf
284 vcs_handler.repo_vcs_config = vcs_conf
284
285
285 # check for type, presence in database and on filesystem
286 # check for type, presence in database and on filesystem
286 if not vcs_handler.is_valid_and_existing_repo(
287 if not vcs_handler.is_valid_and_existing_repo(
287 vcs_handler.acl_repo_name,
288 vcs_handler.acl_repo_name,
288 vcs_handler.base_path,
289 vcs_handler.base_path,
289 vcs_handler.SCM):
290 vcs_handler.SCM):
290 return HTTPNotFound()(environ, start_response)
291 return HTTPNotFound()(environ, start_response)
291
292
292 environ['REPO_NAME'] = vcs_handler.url_repo_name
293 environ['REPO_NAME'] = vcs_handler.url_repo_name
293
294
294 # Wrap handler in middlewares if they are enabled.
295 # Wrap handler in middlewares if they are enabled.
295 vcs_handler = self.wrap_in_gzip_if_enabled(
296 vcs_handler = self.wrap_in_gzip_if_enabled(
296 vcs_handler, self.config)
297 vcs_handler, self.config)
297 vcs_handler, _ = wrap_in_appenlight_if_enabled(
298 vcs_handler, _ = wrap_in_appenlight_if_enabled(
298 vcs_handler, self.config, self.appenlight_client)
299 vcs_handler, self.config, self.appenlight_client)
299
300
300 return vcs_handler(environ, start_response)
301 return vcs_handler(environ, start_response)
301
302
302 return self.application(environ, start_response)
303 return self.application(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now