##// END OF EJS Templates
fix(git): fixed issues with gzip detection for git protocol
super-admin -
r5229:d12cacc8 default
parent child Browse files
Show More
@@ -1,302 +1,303 b''
1 1
2 2 # Copyright (C) 2010-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software: you can redistribute it and/or modify
5 5 # it under the terms of the GNU Affero General Public License, version 3
6 6 # (only), as published by the Free Software Foundation.
7 7 #
8 8 # This program is distributed in the hope that it will be useful,
9 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 11 # GNU General Public License for more details.
12 12 #
13 13 # You should have received a copy of the GNU Affero General Public License
14 14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 15 #
16 16 # This program is dual-licensed. If you wish to learn more about the
17 17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 19
20 20 import gzip
21 21 import shutil
22 22 import logging
23 23 import tempfile
24 24 import urllib.parse
25 25
26 26 from webob.exc import HTTPNotFound
27 27
28 28 import rhodecode
29 29 from rhodecode.lib.middleware.utils import get_path_info
30 30 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 31 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 32 from rhodecode.lib.middleware.simplehg import SimpleHg
33 33 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.lib.str_utils import safe_str
34 35 from rhodecode.model.settings import VcsSettingsModel
35 36
36 37
37 38 log = logging.getLogger(__name__)
38 39
39 40 VCS_TYPE_KEY = '_rc_vcs_type'
40 41 VCS_TYPE_SKIP = '_rc_vcs_skip'
41 42
42 43
43 44 def is_git(environ):
44 45 """
45 46 Returns True if requests should be handled by GIT wsgi middleware
46 47 """
47 48 path_info = get_path_info(environ)
48 49 is_git_path = GIT_PROTO_PAT.match(path_info)
49 50 log.debug(
50 51 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
51 52 is_git_path is not None)
52 53
53 54 return is_git_path
54 55
55 56
56 57 def is_hg(environ):
57 58 """
58 59 Returns True if requests target is mercurial server - header
59 60 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
60 61 """
61 62 is_hg_path = False
62 63
63 64 http_accept = environ.get('HTTP_ACCEPT')
64 65
65 66 if http_accept and http_accept.startswith('application/mercurial'):
66 67 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
67 68 if 'cmd' in query:
68 69 is_hg_path = True
69 70
70 71 path_info = get_path_info(environ)
71 72 log.debug(
72 73 'request path: `%s` detected as HG PROTOCOL %s', path_info,
73 74 is_hg_path)
74 75
75 76 return is_hg_path
76 77
77 78
78 79 def is_svn(environ):
79 80 """
80 81 Returns True if requests target is Subversion server
81 82 """
82 83
83 84 http_dav = environ.get('HTTP_DAV', '')
84 85 magic_path_segment = rhodecode.CONFIG.get(
85 86 'rhodecode_subversion_magic_path', '/!svn')
86 87 path_info = get_path_info(environ)
87 88 req_method = environ['REQUEST_METHOD']
88 89
89 90 is_svn_path = (
90 91 'subversion' in http_dav or
91 92 magic_path_segment in path_info
92 93 or req_method in ['PROPFIND', 'PROPPATCH', 'HEAD']
93 94 )
94 95 log.debug(
95 96 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
96 97 is_svn_path)
97 98
98 99 return is_svn_path
99 100
100 101
101 102 class GunzipMiddleware(object):
102 103 """
103 104 WSGI middleware that unzips gzip-encoded requests before
104 105 passing on to the underlying application.
105 106 """
106 107
107 108 def __init__(self, application):
108 109 self.app = application
109 110
110 111 def __call__(self, environ, start_response):
111 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
112 accepts_encoding_header = safe_str(environ.get('HTTP_CONTENT_ENCODING', ''))
112 113
113 if b'gzip' in accepts_encoding_header:
114 if 'gzip' in accepts_encoding_header:
114 115 log.debug('gzip detected, now running gunzip wrapper')
115 116 wsgi_input = environ['wsgi.input']
116 117
117 118 if not hasattr(environ['wsgi.input'], 'seek'):
118 119 # The gzip implementation in the standard library of Python 2.x
119 120 # requires the '.seek()' and '.tell()' methods to be available
120 121 # on the input stream. Read the data into a temporary file to
121 122 # work around this limitation.
122 123
123 124 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
124 125 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
125 126 wsgi_input.seek(0)
126 127
127 128 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
128 129 # since we "Ungzipped" the content we say now it's no longer gzip
129 130 # content encoding
130 131 del environ['HTTP_CONTENT_ENCODING']
131 132
132 133 # content length has changes ? or i'm not sure
133 134 if 'CONTENT_LENGTH' in environ:
134 135 del environ['CONTENT_LENGTH']
135 136 else:
136 137 log.debug('content not gzipped, gzipMiddleware passing '
137 138 'request further')
138 139 return self.app(environ, start_response)
139 140
140 141
141 142 def is_vcs_call(environ):
142 143 if VCS_TYPE_KEY in environ:
143 144 raw_type = environ[VCS_TYPE_KEY]
144 145 return raw_type and raw_type != VCS_TYPE_SKIP
145 146 return False
146 147
147 148
148 149 def detect_vcs_request(environ, backends):
149 150 checks = {
150 151 'hg': (is_hg, SimpleHg),
151 152 'git': (is_git, SimpleGit),
152 153 'svn': (is_svn, SimpleSvn),
153 154 }
154 155 handler = None
155 156 # List of path views first chunk we don't do any checks
156 157 white_list = [
157 158 # favicon often requested by browsers
158 159 'favicon.ico',
159 160
160 161 # e.g /_file_store/download
161 162 '_file_store++',
162 163
163 164 # login
164 165 "_admin/login",
165 166
166 167 # _admin/api is safe too
167 168 '_admin/api',
168 169
169 170 # _admin/gist is safe too
170 171 '_admin/gists++',
171 172
172 173 # _admin/my_account is safe too
173 174 '_admin/my_account++',
174 175
175 176 # static files no detection
176 177 '_static++',
177 178
178 179 # debug-toolbar
179 180 '_debug_toolbar++',
180 181
181 182 # skip ops ping, status
182 183 '_admin/ops/ping',
183 184 '_admin/ops/status',
184 185
185 186 # full channelstream connect should be VCS skipped
186 187 '_admin/channelstream/connect',
187 188
188 189 '++/repo_creating_check'
189 190 ]
190 191 path_info = get_path_info(environ)
191 192 path_url = path_info.lstrip('/')
192 193 req_method = environ.get('REQUEST_METHOD')
193 194
194 195 for item in white_list:
195 196 if item.endswith('++') and path_url.startswith(item[:-2]):
196 197 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
197 198 return handler
198 199 if item.startswith('++') and path_url.endswith(item[2:]):
199 200 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
200 201 return handler
201 202 if item == path_url:
202 203 log.debug('path `%s` in whitelist (match:%s), skipping...', path_url, item)
203 204 return handler
204 205
205 206 if VCS_TYPE_KEY in environ:
206 207 raw_type = environ[VCS_TYPE_KEY]
207 208 if raw_type == VCS_TYPE_SKIP:
208 209 log.debug('got `skip` marker for vcs detection, skipping...')
209 210 return handler
210 211
211 212 _check, handler = checks.get(raw_type) or [None, None]
212 213 if handler:
213 214 log.debug('got handler:%s from environ', handler)
214 215
215 216 if not handler:
216 217 log.debug('request start: checking if request for `%s:%s` is of VCS type in order: %s',
217 218 req_method, path_url, backends)
218 219 for vcs_type in backends:
219 220 vcs_check, _handler = checks[vcs_type]
220 221 if vcs_check(environ):
221 222 log.debug('vcs handler found %s', _handler)
222 223 handler = _handler
223 224 break
224 225
225 226 return handler
226 227
227 228
228 229 class VCSMiddleware(object):
229 230
230 231 def __init__(self, app, registry, config, appenlight_client):
231 232 self.application = app
232 233 self.registry = registry
233 234 self.config = config
234 235 self.appenlight_client = appenlight_client
235 236 self.use_gzip = True
236 237 # order in which we check the middlewares, based on vcs.backends config
237 238 self.check_middlewares = config['vcs.backends']
238 239
239 240 def vcs_config(self, repo_name=None):
240 241 """
241 242 returns serialized VcsSettings
242 243 """
243 244 try:
244 245 return VcsSettingsModel(
245 246 repo=repo_name).get_ui_settings_as_config_obj()
246 247 except Exception:
247 248 pass
248 249
249 250 def wrap_in_gzip_if_enabled(self, app, config):
250 251 if self.use_gzip:
251 252 app = GunzipMiddleware(app)
252 253 return app
253 254
254 255 def _get_handler_app(self, environ):
255 256 app = None
256 257 log.debug('VCSMiddleware: detecting vcs type.')
257 258 handler = detect_vcs_request(environ, self.check_middlewares)
258 259 if handler:
259 260 app = handler(self.config, self.registry)
260 261
261 262 return app
262 263
263 264 def __call__(self, environ, start_response):
264 265 # check if we handle one of interesting protocols, optionally extract
265 266 # specific vcsSettings and allow changes of how things are wrapped
266 267 vcs_handler = self._get_handler_app(environ)
267 268 if vcs_handler:
268 269 # translate the _REPO_ID into real repo NAME for usage
269 270 # in middleware
270 271
271 272 path_info = get_path_info(environ)
272 273 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
273 274
274 275 # Set acl, url and vcs repo names.
275 276 vcs_handler.set_repo_names(environ)
276 277
277 278 # register repo config back to the handler
278 279 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
279 280 # maybe damaged/non existent settings. We still want to
280 281 # pass that point to validate on is_valid_and_existing_repo
281 282 # and return proper HTTP Code back to client
282 283 if vcs_conf:
283 284 vcs_handler.repo_vcs_config = vcs_conf
284 285
285 286 # check for type, presence in database and on filesystem
286 287 if not vcs_handler.is_valid_and_existing_repo(
287 288 vcs_handler.acl_repo_name,
288 289 vcs_handler.base_path,
289 290 vcs_handler.SCM):
290 291 return HTTPNotFound()(environ, start_response)
291 292
292 293 environ['REPO_NAME'] = vcs_handler.url_repo_name
293 294
294 295 # Wrap handler in middlewares if they are enabled.
295 296 vcs_handler = self.wrap_in_gzip_if_enabled(
296 297 vcs_handler, self.config)
297 298 vcs_handler, _ = wrap_in_appenlight_if_enabled(
298 299 vcs_handler, self.config, self.appenlight_client)
299 300
300 301 return vcs_handler(environ, start_response)
301 302
302 303 return self.application(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now