##// END OF EJS Templates
fix(vcs): fixed logic bug on skip-vcs detector where it didn't skip properly defined urls
super-admin -
r5533:ec540a35 default
parent child Browse files
Show More
@@ -1,308 +1,312 b''
1
1
2 # Copyright (C) 2010-2023 RhodeCode GmbH
2 # Copyright (C) 2010-2023 RhodeCode GmbH
3 #
3 #
4 # This program is free software: you can redistribute it and/or modify
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License, version 3
5 # it under the terms of the GNU Affero General Public License, version 3
6 # (only), as published by the Free Software Foundation.
6 # (only), as published by the Free Software Foundation.
7 #
7 #
8 # This program is distributed in the hope that it will be useful,
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
11 # GNU General Public License for more details.
12 #
12 #
13 # You should have received a copy of the GNU Affero General Public License
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #
15 #
16 # This program is dual-licensed. If you wish to learn more about the
16 # This program is dual-licensed. If you wish to learn more about the
17 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19
19
20 import gzip
20 import gzip
21 import shutil
21 import shutil
22 import logging
22 import logging
23 import tempfile
23 import tempfile
24 import urllib.parse
24 import urllib.parse
25
25
26 from webob.exc import HTTPNotFound
26 from webob.exc import HTTPNotFound
27
27
28 import rhodecode
28 import rhodecode
29 from rhodecode.apps._base import ADMIN_PREFIX
29 from rhodecode.apps._base import ADMIN_PREFIX
30 from rhodecode.lib.middleware.utils import get_path_info
30 from rhodecode.lib.middleware.utils import get_path_info
31 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
32 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
33 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplehg import SimpleHg
34 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.lib.middleware.simplesvn import SimpleSvn
35 from rhodecode.lib.str_utils import safe_str
35 from rhodecode.lib.str_utils import safe_str
36 from rhodecode.model.settings import VcsSettingsModel
36 from rhodecode.model.settings import VcsSettingsModel
37
37
38
38
39 log = logging.getLogger(__name__)
39 log = logging.getLogger(__name__)
40
40
41 VCS_TYPE_KEY = '_rc_vcs_type'
41 VCS_TYPE_KEY = '_rc_vcs_type'
42 VCS_TYPE_SKIP = '_rc_vcs_skip'
42 VCS_TYPE_SKIP = '_rc_vcs_skip'
43
43
44
44
45 def is_git(environ):
45 def is_git(environ):
46 """
46 """
47 Returns True if requests should be handled by GIT wsgi middleware
47 Returns True if requests should be handled by GIT wsgi middleware
48 """
48 """
49 path_info = get_path_info(environ)
49 path_info = get_path_info(environ)
50 is_git_path = GIT_PROTO_PAT.match(path_info)
50 is_git_path = GIT_PROTO_PAT.match(path_info)
51 log.debug(
51 log.debug(
52 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
52 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
53 is_git_path is not None)
53 is_git_path is not None)
54
54
55 return is_git_path
55 return is_git_path
56
56
57
57
58 def is_hg(environ):
58 def is_hg(environ):
59 """
59 """
60 Returns True if requests target is mercurial server - header
60 Returns True if requests target is mercurial server - header
61 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
61 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
62 """
62 """
63 is_hg_path = False
63 is_hg_path = False
64
64
65 http_accept = environ.get('HTTP_ACCEPT')
65 http_accept = environ.get('HTTP_ACCEPT')
66
66
67 if http_accept and http_accept.startswith('application/mercurial'):
67 if http_accept and http_accept.startswith('application/mercurial'):
68 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
68 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
69 if 'cmd' in query:
69 if 'cmd' in query:
70 is_hg_path = True
70 is_hg_path = True
71
71
72 path_info = get_path_info(environ)
72 path_info = get_path_info(environ)
73 log.debug(
73 log.debug(
74 'request path: `%s` detected as HG PROTOCOL %s', path_info,
74 'request path: `%s` detected as HG PROTOCOL %s', path_info,
75 is_hg_path)
75 is_hg_path)
76
76
77 return is_hg_path
77 return is_hg_path
78
78
79
79
80 def is_svn(environ):
80 def is_svn(environ):
81 """
81 """
82 Returns True if requests target is Subversion server
82 Returns True if requests target is Subversion server
83 """
83 """
84
84
85 http_dav = environ.get('HTTP_DAV', '')
85 http_dav = environ.get('HTTP_DAV', '')
86 magic_path_segment = rhodecode.CONFIG.get(
86 magic_path_segment = rhodecode.CONFIG.get(
87 'rhodecode_subversion_magic_path', '/!svn')
87 'rhodecode_subversion_magic_path', '/!svn')
88 path_info = get_path_info(environ)
88 path_info = get_path_info(environ)
89 req_method = environ['REQUEST_METHOD']
89 req_method = environ['REQUEST_METHOD']
90
90
91 is_svn_path = (
91 is_svn_path = (
92 'subversion' in http_dav or
92 'subversion' in http_dav or
93 magic_path_segment in path_info
93 magic_path_segment in path_info
94 or req_method in ['PROPFIND', 'PROPPATCH', 'HEAD']
94 or req_method in ['PROPFIND', 'PROPPATCH', 'HEAD']
95 )
95 )
96 log.debug(
96 log.debug(
97 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
97 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
98 is_svn_path)
98 is_svn_path)
99
99
100 return is_svn_path
100 return is_svn_path
101
101
102
102
103 class GunzipMiddleware(object):
103 class GunzipMiddleware(object):
104 """
104 """
105 WSGI middleware that unzips gzip-encoded requests before
105 WSGI middleware that unzips gzip-encoded requests before
106 passing on to the underlying application.
106 passing on to the underlying application.
107 """
107 """
108
108
109 def __init__(self, application):
109 def __init__(self, application):
110 self.app = application
110 self.app = application
111
111
112 def __call__(self, environ, start_response):
112 def __call__(self, environ, start_response):
113 accepts_encoding_header = safe_str(environ.get('HTTP_CONTENT_ENCODING', ''))
113 accepts_encoding_header = safe_str(environ.get('HTTP_CONTENT_ENCODING', ''))
114
114
115 if 'gzip' in accepts_encoding_header:
115 if 'gzip' in accepts_encoding_header:
116 log.debug('gzip detected, now running gunzip wrapper')
116 log.debug('gzip detected, now running gunzip wrapper')
117 wsgi_input = environ['wsgi.input']
117 wsgi_input = environ['wsgi.input']
118
118
119 if not hasattr(environ['wsgi.input'], 'seek'):
119 if not hasattr(environ['wsgi.input'], 'seek'):
120 # The gzip implementation in the standard library of Python 2.x
120 # The gzip implementation in the standard library of Python 2.x
121 # requires the '.seek()' and '.tell()' methods to be available
121 # requires the '.seek()' and '.tell()' methods to be available
122 # on the input stream. Read the data into a temporary file to
122 # on the input stream. Read the data into a temporary file to
123 # work around this limitation.
123 # work around this limitation.
124
124
125 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
125 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
126 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
126 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
127 wsgi_input.seek(0)
127 wsgi_input.seek(0)
128
128
129 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
129 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
130 # since we "Ungzipped" the content we say now it's no longer gzip
130 # since we "Ungzipped" the content we say now it's no longer gzip
131 # content encoding
131 # content encoding
132 del environ['HTTP_CONTENT_ENCODING']
132 del environ['HTTP_CONTENT_ENCODING']
133
133
134 # content length has changes ? or i'm not sure
134 # content length has changes ? or i'm not sure
135 if 'CONTENT_LENGTH' in environ:
135 if 'CONTENT_LENGTH' in environ:
136 del environ['CONTENT_LENGTH']
136 del environ['CONTENT_LENGTH']
137 else:
137 else:
138 log.debug('content not gzipped, gzipMiddleware passing '
138 log.debug('content not gzipped, gzipMiddleware passing '
139 'request further')
139 'request further')
140 return self.app(environ, start_response)
140 return self.app(environ, start_response)
141
141
142
142
143 def is_vcs_call(environ):
143 def is_vcs_call(environ):
144 if VCS_TYPE_KEY in environ:
144 if VCS_TYPE_KEY in environ:
145 raw_type = environ[VCS_TYPE_KEY]
145 raw_type = environ[VCS_TYPE_KEY]
146 return raw_type and raw_type != VCS_TYPE_SKIP
146 return raw_type and raw_type != VCS_TYPE_SKIP
147 return False
147 return False
148
148
149
149
150 def detect_vcs_request(environ, backends):
150 def detect_vcs_request(environ, backends):
151 checks = {
151 checks = {
152 'hg': (is_hg, SimpleHg),
152 'hg': (is_hg, SimpleHg),
153 'git': (is_git, SimpleGit),
153 'git': (is_git, SimpleGit),
154 'svn': (is_svn, SimpleSvn),
154 'svn': (is_svn, SimpleSvn),
155 }
155 }
156 handler = None
156 handler = None
157 # List of path views first chunk we don't do any checks
157 # List of path views first chunk we don't do any checks
158 white_list = [
158 white_list = [
159 # favicon often requested by browsers
159 # favicon often requested by browsers
160 'favicon.ico',
160 'favicon.ico',
161
161
162 # static files no detection
163 '_static++',
164
165 # debug-toolbar
166 '_debug_toolbar++',
167
162 # e.g /_file_store/download
168 # e.g /_file_store/download
163 '_file_store++',
169 '_file_store++',
164
170
165 # login
171 # login
166 "_admin/login",
172 f"{ADMIN_PREFIX}/login",
173 f"{ADMIN_PREFIX}/logout",
167
174
168 # 2fa
175 # 2fa
169 f"{ADMIN_PREFIX}/check_2fa",
176 f"{ADMIN_PREFIX}/check_2fa",
170 f"{ADMIN_PREFIX}/setup_2fa",
177 f"{ADMIN_PREFIX}/setup_2fa",
171
178
172 # _admin/api is safe too
179 # _admin/api is safe too
173 f'{ADMIN_PREFIX}/api',
180 f'{ADMIN_PREFIX}/api',
174
181
175 # _admin/gist is safe too
182 # _admin/gist is safe too
176 f'{ADMIN_PREFIX}/gists++',
183 f'{ADMIN_PREFIX}/gists++',
177
184
178 # _admin/my_account is safe too
185 # _admin/my_account is safe too
179 f'{ADMIN_PREFIX}/my_account++',
186 f'{ADMIN_PREFIX}/my_account++',
180
187
181 # static files no detection
182 '_static++',
183
184 # debug-toolbar
185 '_debug_toolbar++',
186
187 # skip ops ping, status
188 # skip ops ping, status
188 f'{ADMIN_PREFIX}/ops/ping',
189 f'{ADMIN_PREFIX}/ops/ping',
189 f'{ADMIN_PREFIX}/ops/status',
190 f'{ADMIN_PREFIX}/ops/status',
190
191
191 # full channelstream connect should be VCS skipped
192 # full channelstream connect should be VCS skipped
192 f'{ADMIN_PREFIX}/channelstream/connect',
193 f'{ADMIN_PREFIX}/channelstream/connect',
193
194
194 '++/repo_creating_check'
195 '++/repo_creating_check'
195 ]
196 ]
197
196 path_info = get_path_info(environ)
198 path_info = get_path_info(environ)
197 path_url = path_info.lstrip('/')
199 path_url = path_info.lstrip('/')
198 req_method = environ.get('REQUEST_METHOD')
200 req_method = environ.get('REQUEST_METHOD')
199
201
200 for item in white_list:
202 for item in white_list:
203 item = item.lstrip('/')
204
201 if item.endswith('++') and path_url.startswith(item[:-2]):
205 if item.endswith('++') and path_url.startswith(item[:-2]):
202 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
206 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
203 return handler
207 return handler
204 if item.startswith('++') and path_url.endswith(item[2:]):
208 if item.startswith('++') and path_url.endswith(item[2:]):
205 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
209 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
206 return handler
210 return handler
207 if item == path_url:
211 if item == path_url:
208 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
212 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
209 return handler
213 return handler
210
214
211 if VCS_TYPE_KEY in environ:
215 if VCS_TYPE_KEY in environ:
212 raw_type = environ[VCS_TYPE_KEY]
216 raw_type = environ[VCS_TYPE_KEY]
213 if raw_type == VCS_TYPE_SKIP:
217 if raw_type == VCS_TYPE_SKIP:
214 log.debug('got `skip` marker for vcs detection, skipping...')
218 log.debug('got `skip` marker for vcs detection, skipping...')
215 return handler
219 return handler
216
220
217 _check, handler = checks.get(raw_type) or [None, None]
221 _check, handler = checks.get(raw_type) or [None, None]
218 if handler:
222 if handler:
219 log.debug('got handler:%s from environ', handler)
223 log.debug('got handler:%s from environ', handler)
220
224
221 if not handler:
225 if not handler:
222 log.debug('request start: checking if request for `%s:%s` is of VCS type in order: %s',
226 log.debug('request start: checking if request for `%s:%s` is of VCS type in order: %s',
223 req_method, path_url, backends)
227 req_method, path_url, backends)
224 for vcs_type in backends:
228 for vcs_type in backends:
225 vcs_check, _handler = checks[vcs_type]
229 vcs_check, _handler = checks[vcs_type]
226 if vcs_check(environ):
230 if vcs_check(environ):
227 log.debug('vcs handler found %s', _handler)
231 log.debug('vcs handler found %s', _handler)
228 handler = _handler
232 handler = _handler
229 break
233 break
230
234
231 return handler
235 return handler
232
236
233
237
234 class VCSMiddleware(object):
238 class VCSMiddleware(object):
235
239
236 def __init__(self, app, registry, config, appenlight_client):
240 def __init__(self, app, registry, config, appenlight_client):
237 self.application = app
241 self.application = app
238 self.registry = registry
242 self.registry = registry
239 self.config = config
243 self.config = config
240 self.appenlight_client = appenlight_client
244 self.appenlight_client = appenlight_client
241 self.use_gzip = True
245 self.use_gzip = True
242 # order in which we check the middlewares, based on vcs.backends config
246 # order in which we check the middlewares, based on vcs.backends config
243 self.check_middlewares = config['vcs.backends']
247 self.check_middlewares = config['vcs.backends']
244
248
245 def vcs_config(self, repo_name=None):
249 def vcs_config(self, repo_name=None):
246 """
250 """
247 returns serialized VcsSettings
251 returns serialized VcsSettings
248 """
252 """
249 try:
253 try:
250 return VcsSettingsModel(
254 return VcsSettingsModel(
251 repo=repo_name).get_ui_settings_as_config_obj()
255 repo=repo_name).get_ui_settings_as_config_obj()
252 except Exception:
256 except Exception:
253 pass
257 pass
254
258
255 def wrap_in_gzip_if_enabled(self, app, config):
259 def wrap_in_gzip_if_enabled(self, app, config):
256 if self.use_gzip:
260 if self.use_gzip:
257 app = GunzipMiddleware(app)
261 app = GunzipMiddleware(app)
258 return app
262 return app
259
263
260 def _get_handler_app(self, environ):
264 def _get_handler_app(self, environ):
261 app = None
265 app = None
262 log.debug('VCSMiddleware: detecting vcs type.')
266 log.debug('VCSMiddleware: detecting vcs type.')
263 handler = detect_vcs_request(environ, self.check_middlewares)
267 handler = detect_vcs_request(environ, self.check_middlewares)
264 if handler:
268 if handler:
265 app = handler(self.config, self.registry)
269 app = handler(self.config, self.registry)
266
270
267 return app
271 return app
268
272
269 def __call__(self, environ, start_response):
273 def __call__(self, environ, start_response):
270 # check if we handle one of interesting protocols, optionally extract
274 # check if we handle one of interesting protocols, optionally extract
271 # specific vcsSettings and allow changes of how things are wrapped
275 # specific vcsSettings and allow changes of how things are wrapped
272 vcs_handler = self._get_handler_app(environ)
276 vcs_handler = self._get_handler_app(environ)
273 if vcs_handler:
277 if vcs_handler:
274 # translate the _REPO_ID into real repo NAME for usage
278 # translate the _REPO_ID into real repo NAME for usage
275 # in middleware
279 # in middleware
276
280
277 path_info = get_path_info(environ)
281 path_info = get_path_info(environ)
278 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
282 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
279
283
280 # Set acl, url and vcs repo names.
284 # Set acl, url and vcs repo names.
281 vcs_handler.set_repo_names(environ)
285 vcs_handler.set_repo_names(environ)
282
286
283 # register repo config back to the handler
287 # register repo config back to the handler
284 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
288 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
285 # maybe damaged/non existent settings. We still want to
289 # maybe damaged/non existent settings. We still want to
286 # pass that point to validate on is_valid_and_existing_repo
290 # pass that point to validate on is_valid_and_existing_repo
287 # and return proper HTTP Code back to client
291 # and return proper HTTP Code back to client
288 if vcs_conf:
292 if vcs_conf:
289 vcs_handler.repo_vcs_config = vcs_conf
293 vcs_handler.repo_vcs_config = vcs_conf
290
294
291 # check for type, presence in database and on filesystem
295 # check for type, presence in database and on filesystem
292 if not vcs_handler.is_valid_and_existing_repo(
296 if not vcs_handler.is_valid_and_existing_repo(
293 vcs_handler.acl_repo_name,
297 vcs_handler.acl_repo_name,
294 vcs_handler.base_path,
298 vcs_handler.base_path,
295 vcs_handler.SCM):
299 vcs_handler.SCM):
296 return HTTPNotFound()(environ, start_response)
300 return HTTPNotFound()(environ, start_response)
297
301
298 environ['REPO_NAME'] = vcs_handler.url_repo_name
302 environ['REPO_NAME'] = vcs_handler.url_repo_name
299
303
300 # Wrap handler in middlewares if they are enabled.
304 # Wrap handler in middlewares if they are enabled.
301 vcs_handler = self.wrap_in_gzip_if_enabled(
305 vcs_handler = self.wrap_in_gzip_if_enabled(
302 vcs_handler, self.config)
306 vcs_handler, self.config)
303 vcs_handler, _ = wrap_in_appenlight_if_enabled(
307 vcs_handler, _ = wrap_in_appenlight_if_enabled(
304 vcs_handler, self.config, self.appenlight_client)
308 vcs_handler, self.config, self.appenlight_client)
305
309
306 return vcs_handler(environ, start_response)
310 return vcs_handler(environ, start_response)
307
311
308 return self.application(environ, start_response)
312 return self.application(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now