##// END OF EJS Templates
PATH_INFO: use new method to consistently extract proper PATH_INFO data
super-admin -
r5032:f4682f64 default
parent child Browse files
Show More
@@ -1,161 +1,162 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2020 RhodeCode GmbH
3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import textwrap
22 import textwrap
23
23
24 import routes.middleware
24 import routes.middleware
25 import urlobject
25 import urlobject
26 import webob
26 import webob
27 import webob.exc
27 import webob.exc
28
28
29 import rhodecode.lib.auth
29 import rhodecode.lib.auth
30
30 from rhodecode.lib.middleware.utils import get_path_info
31
31
32 log = logging.getLogger(__name__)
32 log = logging.getLogger(__name__)
33
33
34
34
35 class CSRFDetector(object):
35 class CSRFDetector(object):
36 """
36 """
37 Middleware for preventing CSRF.
37 Middleware for preventing CSRF.
38
38
39
39
40 It checks that all requests are either GET or POST.
40 It checks that all requests are either GET or POST.
41 For POST requests, it logs the requests that do not have a CSRF token.
41 For POST requests, it logs the requests that do not have a CSRF token.
42 Eventually it will raise an error.
42 Eventually it will raise an error.
43
43
44 It special cases some endpoints as they do not really require a token.
44 It special cases some endpoints as they do not really require a token.
45
45
46 Note: this middleware is only intended for testing.
46 Note: this middleware is only intended for testing.
47 """
47 """
48
48
49 _PUT_DELETE_MESSAGE = textwrap.dedent('''
49 _PUT_DELETE_MESSAGE = textwrap.dedent('''
50 Do not call in tests app.delete or app.put, use instead
50 Do not call in tests app.delete or app.put, use instead
51 app.post(..., params={'_method': 'delete'}.
51 app.post(..., params={'_method': 'delete'}.
52
52
53 The reason is twofold. The first is because that's how the browser is
53 The reason is twofold. The first is because that's how the browser is
54 calling rhodecode and the second is because it allow us to detect
54 calling rhodecode and the second is because it allow us to detect
55 potential CSRF.''').strip()
55 potential CSRF.''').strip()
56
56
57 _PATHS_WITHOUT_TOKEN = frozenset((
57 _PATHS_WITHOUT_TOKEN = frozenset((
58 # The password is the token.
58 # The password is the token.
59 '/_admin/login',
59 '/_admin/login',
60 # Captcha may be enabled.
60 # Captcha may be enabled.
61 '/_admin/password_reset',
61 '/_admin/password_reset',
62 # Captcha may be enabled.
62 # Captcha may be enabled.
63 '/_admin/password_reset_confirmation',
63 '/_admin/password_reset_confirmation',
64 # Captcha may be enabled.
64 # Captcha may be enabled.
65 '/_admin/register',
65 '/_admin/register',
66 # No change in state with this controller.
66 # No change in state with this controller.
67 '/error/document',
67 '/error/document',
68 ))
68 ))
69
69
70 _SKIP_PATTERN = frozenset((
70 _SKIP_PATTERN = frozenset((
71 '/_admin/gists/',
71 '/_admin/gists/',
72 ))
72 ))
73
73
74 def __init__(self, app):
74 def __init__(self, app):
75 self._app = app
75 self._app = app
76
76
77 def __call__(self, environ, start_response):
77 def __call__(self, environ, start_response):
78 if environ['REQUEST_METHOD'].upper() not in ('GET', 'POST'):
78 if environ['REQUEST_METHOD'].upper() not in ('GET', 'POST'):
79 raise Exception(self._PUT_DELETE_MESSAGE)
79 raise Exception(self._PUT_DELETE_MESSAGE)
80 token_expected = environ['PATH_INFO'] not in self._PATHS_WITHOUT_TOKEN
80 path_info = get_path_info(environ)
81 token_expected = path_info not in self._PATHS_WITHOUT_TOKEN
81 allowed = True
82 allowed = True
82 for pattern in self._SKIP_PATTERN:
83 for pattern in self._SKIP_PATTERN:
83 if environ['PATH_INFO'].startswith(pattern):
84 if path_info.startswith(pattern):
84 allowed = False
85 allowed = False
85 break
86 break
86
87
87 if (environ['REQUEST_METHOD'] == 'POST' and
88 if (environ['REQUEST_METHOD'] == 'POST' and
88 token_expected and allowed and
89 token_expected and allowed and
89 routes.middleware.is_form_post(environ)):
90 routes.middleware.is_form_post(environ)):
90 body = environ['wsgi.input']
91 body = environ['wsgi.input']
91 if body.seekable():
92 if body.seekable():
92 pos = body.tell()
93 pos = body.tell()
93 content = body.read()
94 content = body.read()
94 body.seek(pos)
95 body.seek(pos)
95 elif hasattr(body, 'peek'):
96 elif hasattr(body, 'peek'):
96 content = body.peek()
97 content = body.peek()
97 else:
98 else:
98 raise Exception("Cannot check if the request has a CSRF token")
99 raise Exception("Cannot check if the request has a CSRF token")
99 if rhodecode.lib.auth.csrf_token_key not in content:
100 if rhodecode.lib.auth.csrf_token_key not in content:
100 raise Exception(
101 raise Exception(
101 '%s to %s does not have a csrf_token %r' %
102 '%s to %s does not have a csrf_token %r' %
102 (environ['REQUEST_METHOD'], environ['PATH_INFO'], content))
103 (environ['REQUEST_METHOD'], path_info, content))
103
104
104 return self._app(environ, start_response)
105 return self._app(environ, start_response)
105
106
106
107
107 def _get_scheme_host_port(url):
108 def _get_scheme_host_port(url):
108 url = urlobject.URLObject(url)
109 url = urlobject.URLObject(url)
109 if '://' not in url:
110 if '://' not in url:
110 return None, url, None
111 return None, url, None
111
112
112 scheme = url.scheme or 'http'
113 scheme = url.scheme or 'http'
113 port = url.port
114 port = url.port
114 if not port:
115 if not port:
115 if scheme == 'http':
116 if scheme == 'http':
116 port = 80
117 port = 80
117 elif scheme == 'https':
118 elif scheme == 'https':
118 port = 443
119 port = 443
119 host = url.netloc.without_port()
120 host = url.netloc.without_port()
120
121
121 return scheme, host, port
122 return scheme, host, port
122
123
123
124
124 def _equivalent_urls(url1, url2):
125 def _equivalent_urls(url1, url2):
125 """Check if both urls are equivalent."""
126 """Check if both urls are equivalent."""
126 return _get_scheme_host_port(url1) == _get_scheme_host_port(url2)
127 return _get_scheme_host_port(url1) == _get_scheme_host_port(url2)
127
128
128
129
129 class OriginChecker(object):
130 class OriginChecker(object):
130 """
131 """
131 Check whether the request has a valid Origin header.
132 Check whether the request has a valid Origin header.
132
133
133 See https://wiki.mozilla.org/Security/Origin for details.
134 See https://wiki.mozilla.org/Security/Origin for details.
134 """
135 """
135
136
136 def __init__(self, app, expected_origin, skip_urls=None):
137 def __init__(self, app, expected_origin, skip_urls=None):
137 """
138 """
138 :param expected_origin: the value we expect to see for the Origin
139 :param expected_origin: the value we expect to see for the Origin
139 header.
140 header.
140 :param skip_urls: list of urls for which we do not need to check the
141 :param skip_urls: list of urls for which we do not need to check the
141 Origin header.
142 Origin header.
142 """
143 """
143 self._app = app
144 self._app = app
144 self._expected_origin = expected_origin
145 self._expected_origin = expected_origin
145 self._skip_urls = frozenset(skip_urls or [])
146 self._skip_urls = frozenset(skip_urls or [])
146
147
147 def __call__(self, environ, start_response):
148 def __call__(self, environ, start_response):
148 origin_header = environ.get('HTTP_ORIGIN', '')
149 origin_header = environ.get('HTTP_ORIGIN', '')
149 origin = origin_header.split(' ', 1)[0]
150 origin = origin_header.split(' ', 1)[0]
150 if origin == 'null':
151 if origin == 'null':
151 origin = None
152 origin = None
152
153 path_info = get_path_info(environ)
153 if (environ['PATH_INFO'] not in self._skip_urls and origin and
154 if (path_info not in self._skip_urls and origin and
154 not _equivalent_urls(origin, self._expected_origin)):
155 not _equivalent_urls(origin, self._expected_origin)):
155 log.warn(
156 log.warning(
156 'Invalid Origin header detected: got %s, expected %s',
157 'Invalid Origin header detected: got %s, expected %s',
157 origin_header, self._expected_origin)
158 origin_header, self._expected_origin)
158 return webob.exc.HTTPForbidden('Origin header mismatch')(
159 return webob.exc.HTTPForbidden('Origin header mismatch')(
159 environ, start_response)
160 environ, start_response)
160 else:
161 else:
161 return self._app(environ, start_response)
162 return self._app(environ, start_response)
@@ -1,93 +1,94 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2020 RhodeCode GmbH
3 # Copyright (C) 2016-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import time
21 import time
22 import logging
22 import logging
23
23
24 import rhodecode
24 import rhodecode
25 from rhodecode.lib.auth import AuthUser
25 from rhodecode.lib.auth import AuthUser
26 from rhodecode.lib.base import get_ip_addr, get_access_path, get_user_agent
26 from rhodecode.lib.base import get_ip_addr, get_user_agent
27 from rhodecode.lib.middleware.utils import get_path_info
27 from rhodecode.lib.utils2 import safe_str, get_current_rhodecode_user
28 from rhodecode.lib.utils2 import safe_str, get_current_rhodecode_user
28
29
29
30
30 log = logging.getLogger(__name__)
31 log = logging.getLogger(__name__)
31
32
32
33
33 class RequestWrapperTween(object):
34 class RequestWrapperTween(object):
34 def __init__(self, handler, registry):
35 def __init__(self, handler, registry):
35 self.handler = handler
36 self.handler = handler
36 self.registry = registry
37 self.registry = registry
37
38
38 # one-time configuration code goes here
39 # one-time configuration code goes here
39
40
40 def _get_user_info(self, request):
41 def _get_user_info(self, request):
41 user = get_current_rhodecode_user(request)
42 user = get_current_rhodecode_user(request)
42 if not user:
43 if not user:
43 user = AuthUser.repr_user(ip=get_ip_addr(request.environ))
44 user = AuthUser.repr_user(ip=get_ip_addr(request.environ))
44 return user
45 return user
45
46
46 def __call__(self, request):
47 def __call__(self, request):
47 start = time.time()
48 start = time.time()
48 log.debug('Starting request time measurement')
49 log.debug('Starting request time measurement')
49 response = None
50 response = None
50 try:
51 try:
51 response = self.handler(request)
52 response = self.handler(request)
52 finally:
53 finally:
53 count = request.request_count()
54 count = request.request_count()
54 _ver_ = rhodecode.__version__
55 _ver_ = rhodecode.__version__
55 _path = safe_str(get_access_path(request.environ))
56 _path = get_path_info(request.environ)
56 _auth_user = self._get_user_info(request)
57 _auth_user = self._get_user_info(request)
57 ip = get_ip_addr(request.environ)
58 ip = get_ip_addr(request.environ)
58 match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
59 match_route = request.matched_route.name if request.matched_route else "NOT_FOUND"
59 resp_code = getattr(response, 'status_code', 'UNDEFINED')
60 resp_code = getattr(response, 'status_code', 'UNDEFINED')
60
61
61 total = time.time() - start
62 total = time.time() - start
62 log.info(
63 log.info(
63 'Req[%4s] %s %s Request to %s time: %.4fs [%s], RhodeCode %s',
64 'Req[%4s] %s %s Request to %s time: %.4fs [%s], RhodeCode %s',
64 count, _auth_user, request.environ.get('REQUEST_METHOD'),
65 count, _auth_user, request.environ.get('REQUEST_METHOD'),
65 _path, total, get_user_agent(request. environ), _ver_,
66 _path, total, get_user_agent(request. environ), _ver_,
66 extra={"time": total, "ver": _ver_, "ip": ip,
67 extra={"time": total, "ver": _ver_, "ip": ip,
67 "path": _path, "view_name": match_route, "code": resp_code}
68 "path": _path, "view_name": match_route, "code": resp_code}
68 )
69 )
69
70
70 statsd = request.registry.statsd
71 statsd = request.registry.statsd
71 if statsd:
72 if statsd:
72 elapsed_time_ms = round(1000.0 * total) # use ms only
73 elapsed_time_ms = round(1000.0 * total) # use ms only
73 statsd.timing(
74 statsd.timing(
74 "rhodecode_req_timing.histogram", elapsed_time_ms,
75 "rhodecode_req_timing.histogram", elapsed_time_ms,
75 tags=[
76 tags=[
76 "view_name:{}".format(match_route),
77 "view_name:{}".format(match_route),
77 "code:{}".format(resp_code)
78 "code:{}".format(resp_code)
78 ],
79 ],
79 use_decimals=False
80 use_decimals=False
80 )
81 )
81 statsd.incr(
82 statsd.incr(
82 'rhodecode_req_total', tags=[
83 'rhodecode_req_total', tags=[
83 "view_name:{}".format(match_route),
84 "view_name:{}".format(match_route),
84 "code:{}".format(resp_code)
85 "code:{}".format(resp_code)
85 ])
86 ])
86
87
87 return response
88 return response
88
89
89
90
90 def includeme(config):
91 def includeme(config):
91 config.add_tween(
92 config.add_tween(
92 'rhodecode.lib.middleware.request_wrapper.RequestWrapperTween',
93 'rhodecode.lib.middleware.request_wrapper.RequestWrapperTween',
93 )
94 )
@@ -1,156 +1,157 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2020 RhodeCode GmbH
3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 SimpleGit middleware for handling git protocol request (push/clone etc.)
22 SimpleGit middleware for handling git protocol request (push/clone etc.)
23 It's implemented with basic auth function
23 It's implemented with basic auth function
24 """
24 """
25 import os
25 import os
26 import re
26 import re
27 import logging
27 import logging
28 import urllib.parse
28 import urllib.parse
29
29
30 import rhodecode
30 import rhodecode
31 from rhodecode.lib import utils
31 from rhodecode.lib import utils
32 from rhodecode.lib import utils2
32 from rhodecode.lib import utils2
33 from rhodecode.lib.middleware import simplevcs
33 from rhodecode.lib.middleware import simplevcs
34 from rhodecode.lib.middleware.utils import get_path_info
34
35
35 log = logging.getLogger(__name__)
36 log = logging.getLogger(__name__)
36
37
37
38
38 GIT_PROTO_PAT = re.compile(
39 GIT_PROTO_PAT = re.compile(
39 r'^/(.+)/(info/refs|info/lfs/(.+)|git-upload-pack|git-receive-pack)')
40 r'^/(.+)/(info/refs|info/lfs/(.+)|git-upload-pack|git-receive-pack)')
40 GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
41 GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
41
42
42
43
43 def default_lfs_store():
44 def default_lfs_store():
44 """
45 """
45 Default lfs store location, it's consistent with Mercurials large file
46 Default lfs store location, it's consistent with Mercurials large file
46 store which is in .cache/largefiles
47 store which is in .cache/largefiles
47 """
48 """
48 from rhodecode.lib.vcs.backends.git import lfs_store
49 from rhodecode.lib.vcs.backends.git import lfs_store
49 user_home = os.path.expanduser("~")
50 user_home = os.path.expanduser("~")
50 return lfs_store(user_home)
51 return lfs_store(user_home)
51
52
52
53
53 class SimpleGit(simplevcs.SimpleVCS):
54 class SimpleGit(simplevcs.SimpleVCS):
54
55
55 SCM = 'git'
56 SCM = 'git'
56
57
57 def _get_repository_name(self, environ):
58 def _get_repository_name(self, environ):
58 """
59 """
59 Gets repository name out of PATH_INFO header
60 Gets repository name out of PATH_INFO header
60
61
61 :param environ: environ where PATH_INFO is stored
62 :param environ: environ where PATH_INFO is stored
62 """
63 """
63 repo_name = GIT_PROTO_PAT.match(environ['PATH_INFO']).group(1)
64 path_info = get_path_info(environ)
65 repo_name = GIT_PROTO_PAT.match(path_info).group(1)
64 # for GIT LFS, and bare format strip .git suffix from names
66 # for GIT LFS, and bare format strip .git suffix from names
65 if repo_name.endswith('.git'):
67 if repo_name.endswith('.git'):
66 repo_name = repo_name[:-4]
68 repo_name = repo_name[:-4]
67 return repo_name
69 return repo_name
68
70
69 def _get_lfs_action(self, path, request_method):
71 def _get_lfs_action(self, path, request_method):
70 """
72 """
71 return an action based on LFS requests type.
73 return an action based on LFS requests type.
72 Those routes are handled inside vcsserver app.
74 Those routes are handled inside vcsserver app.
73
75
74 batch -> POST to /info/lfs/objects/batch => PUSH/PULL
76 batch -> POST to /info/lfs/objects/batch => PUSH/PULL
75 batch is based on the `operation.
77 batch is based on the `operation.
76 that could be download or upload, but those are only
78 that could be download or upload, but those are only
77 instructions to fetch so we return pull always
79 instructions to fetch so we return pull always
78
80
79 download -> GET to /info/lfs/{oid} => PULL
81 download -> GET to /info/lfs/{oid} => PULL
80 upload -> PUT to /info/lfs/{oid} => PUSH
82 upload -> PUT to /info/lfs/{oid} => PUSH
81
83
82 verification -> POST to /info/lfs/verify => PULL
84 verification -> POST to /info/lfs/verify => PULL
83
85
84 """
86 """
85
87
86 match_obj = GIT_LFS_PROTO_PAT.match(path)
88 match_obj = GIT_LFS_PROTO_PAT.match(path)
87 _parts = match_obj.groups()
89 _parts = match_obj.groups()
88 repo_name, path, operation = _parts
90 repo_name, path, operation = _parts
89 log.debug(
91 log.debug(
90 'LFS: detecting operation based on following '
92 'LFS: detecting operation based on following '
91 'data: %s, req_method:%s', _parts, request_method)
93 'data: %s, req_method:%s', _parts, request_method)
92
94
93 if operation == 'verify':
95 if operation == 'verify':
94 return 'pull'
96 return 'pull'
95 elif operation == 'objects/batch':
97 elif operation == 'objects/batch':
96 # batch sends back instructions for API to dl/upl we report it
98 # batch sends back instructions for API to dl/upl we report it
97 # as pull
99 # as pull
98 if request_method == 'POST':
100 if request_method == 'POST':
99 return 'pull'
101 return 'pull'
100
102
101 elif operation:
103 elif operation:
102 # probably a OID, upload is PUT, download a GET
104 # probably a OID, upload is PUT, download a GET
103 if request_method == 'GET':
105 if request_method == 'GET':
104 return 'pull'
106 return 'pull'
105 else:
107 else:
106 return 'push'
108 return 'push'
107
109
108 # if default not found require push, as action
110 # if default not found require push, as action
109 return 'push'
111 return 'push'
110
112
111 _ACTION_MAPPING = {
113 _ACTION_MAPPING = {
112 'git-receive-pack': 'push',
114 'git-receive-pack': 'push',
113 'git-upload-pack': 'pull',
115 'git-upload-pack': 'pull',
114 }
116 }
115
117
116 def _get_action(self, environ):
118 def _get_action(self, environ):
117 """
119 """
118 Maps git request commands into a pull or push command.
120 Maps git request commands into a pull or push command.
119 In case of unknown/unexpected data, it returns 'pull' to be safe.
121 In case of unknown/unexpected data, it returns 'pull' to be safe.
120
122
121 :param environ:
123 :param environ:
122 """
124 """
123 path = environ['PATH_INFO']
125 path = get_path_info(environ)
124
126
125 if path.endswith('/info/refs'):
127 if path.endswith('/info/refs'):
126 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
128 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
127 service_cmd = query.get('service', [''])[0]
129 service_cmd = query.get('service', [''])[0]
128 return self._ACTION_MAPPING.get(service_cmd, 'pull')
130 return self._ACTION_MAPPING.get(service_cmd, 'pull')
129
131
130 elif GIT_LFS_PROTO_PAT.match(environ['PATH_INFO']):
132 elif GIT_LFS_PROTO_PAT.match(path):
131 return self._get_lfs_action(
133 return self._get_lfs_action(path, environ['REQUEST_METHOD'])
132 environ['PATH_INFO'], environ['REQUEST_METHOD'])
133
134
134 elif path.endswith('/git-receive-pack'):
135 elif path.endswith('/git-receive-pack'):
135 return 'push'
136 return 'push'
136 elif path.endswith('/git-upload-pack'):
137 elif path.endswith('/git-upload-pack'):
137 return 'pull'
138 return 'pull'
138
139
139 return 'pull'
140 return 'pull'
140
141
141 def _create_wsgi_app(self, repo_path, repo_name, config):
142 def _create_wsgi_app(self, repo_path, repo_name, config):
142 return self.scm_app.create_git_wsgi_app(
143 return self.scm_app.create_git_wsgi_app(
143 repo_path, repo_name, config)
144 repo_path, repo_name, config)
144
145
145 def _create_config(self, extras, repo_name, scheme='http'):
146 def _create_config(self, extras, repo_name, scheme='http'):
146 extras['git_update_server_info'] = utils2.str2bool(
147 extras['git_update_server_info'] = utils2.str2bool(
147 rhodecode.CONFIG.get('git_update_server_info'))
148 rhodecode.CONFIG.get('git_update_server_info'))
148
149
149 config = utils.make_db_config(repo=repo_name)
150 config = utils.make_db_config(repo=repo_name)
150 custom_store = config.get('vcs_git_lfs', 'store_location')
151 custom_store = config.get('vcs_git_lfs', 'store_location')
151
152
152 extras['git_lfs_enabled'] = utils2.str2bool(
153 extras['git_lfs_enabled'] = utils2.str2bool(
153 config.get('vcs_git_lfs', 'enabled'))
154 config.get('vcs_git_lfs', 'enabled'))
154 extras['git_lfs_store_path'] = custom_store or default_lfs_store()
155 extras['git_lfs_store_path'] = custom_store or default_lfs_store()
155 extras['git_lfs_http_scheme'] = scheme
156 extras['git_lfs_http_scheme'] = scheme
156 return extras
157 return extras
@@ -1,159 +1,160 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2020 RhodeCode GmbH
3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 SimpleHG middleware for handling mercurial protocol request
22 SimpleHG middleware for handling mercurial protocol request
23 (push/clone etc.). It's implemented with basic auth function
23 (push/clone etc.). It's implemented with basic auth function
24 """
24 """
25
25
26 import logging
26 import logging
27 import urllib.parse
27 import urllib.parse
28 import urllib.request, urllib.parse, urllib.error
28 import urllib.request, urllib.parse, urllib.error
29
29
30 from rhodecode.lib import utils
30 from rhodecode.lib import utils
31 from rhodecode.lib.ext_json import json
31 from rhodecode.lib.ext_json import json
32 from rhodecode.lib.middleware import simplevcs
32 from rhodecode.lib.middleware import simplevcs
33 from rhodecode.lib.middleware.utils import get_path_info
33
34
34 log = logging.getLogger(__name__)
35 log = logging.getLogger(__name__)
35
36
36
37
37 class SimpleHg(simplevcs.SimpleVCS):
38 class SimpleHg(simplevcs.SimpleVCS):
38
39
39 SCM = 'hg'
40 SCM = 'hg'
40
41
41 def _get_repository_name(self, environ):
42 def _get_repository_name(self, environ):
42 """
43 """
43 Gets repository name out of PATH_INFO header
44 Gets repository name out of PATH_INFO header
44
45
45 :param environ: environ where PATH_INFO is stored
46 :param environ: environ where PATH_INFO is stored
46 """
47 """
47 repo_name = environ['PATH_INFO']
48 repo_name = get_path_info(environ)
48 if repo_name and repo_name.startswith('/'):
49 if repo_name and repo_name.startswith('/'):
49 # remove only the first leading /
50 # remove only the first leading /
50 repo_name = repo_name[1:]
51 repo_name = repo_name[1:]
51 return repo_name.rstrip('/')
52 return repo_name.rstrip('/')
52
53
53 _ACTION_MAPPING = {
54 _ACTION_MAPPING = {
54 'changegroup': 'pull',
55 'changegroup': 'pull',
55 'changegroupsubset': 'pull',
56 'changegroupsubset': 'pull',
56 'getbundle': 'pull',
57 'getbundle': 'pull',
57 'stream_out': 'pull',
58 'stream_out': 'pull',
58 'listkeys': 'pull',
59 'listkeys': 'pull',
59 'between': 'pull',
60 'between': 'pull',
60 'branchmap': 'pull',
61 'branchmap': 'pull',
61 'branches': 'pull',
62 'branches': 'pull',
62 'clonebundles': 'pull',
63 'clonebundles': 'pull',
63 'capabilities': 'pull',
64 'capabilities': 'pull',
64 'debugwireargs': 'pull',
65 'debugwireargs': 'pull',
65 'heads': 'pull',
66 'heads': 'pull',
66 'lookup': 'pull',
67 'lookup': 'pull',
67 'hello': 'pull',
68 'hello': 'pull',
68 'known': 'pull',
69 'known': 'pull',
69
70
70 # largefiles
71 # largefiles
71 'putlfile': 'push',
72 'putlfile': 'push',
72 'getlfile': 'pull',
73 'getlfile': 'pull',
73 'statlfile': 'pull',
74 'statlfile': 'pull',
74 'lheads': 'pull',
75 'lheads': 'pull',
75
76
76 # evolve
77 # evolve
77 'evoext_obshashrange_v1': 'pull',
78 'evoext_obshashrange_v1': 'pull',
78 'evoext_obshash': 'pull',
79 'evoext_obshash': 'pull',
79 'evoext_obshash1': 'pull',
80 'evoext_obshash1': 'pull',
80
81
81 'unbundle': 'push',
82 'unbundle': 'push',
82 'pushkey': 'push',
83 'pushkey': 'push',
83 }
84 }
84
85
85 @classmethod
86 @classmethod
86 def _get_xarg_headers(cls, environ):
87 def _get_xarg_headers(cls, environ):
87 i = 1
88 i = 1
88 chunks = [] # gather chunks stored in multiple 'hgarg_N'
89 chunks = [] # gather chunks stored in multiple 'hgarg_N'
89 while True:
90 while True:
90 head = environ.get('HTTP_X_HGARG_{}'.format(i))
91 head = environ.get('HTTP_X_HGARG_{}'.format(i))
91 if not head:
92 if not head:
92 break
93 break
93 i += 1
94 i += 1
94 chunks.append(urllib.parse.unquote_plus(head))
95 chunks.append(urllib.parse.unquote_plus(head))
95 full_arg = ''.join(chunks)
96 full_arg = ''.join(chunks)
96 pref = 'cmds='
97 pref = 'cmds='
97 if full_arg.startswith(pref):
98 if full_arg.startswith(pref):
98 # strip the cmds= header defining our batch commands
99 # strip the cmds= header defining our batch commands
99 full_arg = full_arg[len(pref):]
100 full_arg = full_arg[len(pref):]
100 cmds = full_arg.split(';')
101 cmds = full_arg.split(';')
101 return cmds
102 return cmds
102
103
103 @classmethod
104 @classmethod
104 def _get_batch_cmd(cls, environ):
105 def _get_batch_cmd(cls, environ):
105 """
106 """
106 Handle batch command send commands. Those are ';' separated commands
107 Handle batch command send commands. Those are ';' separated commands
107 sent by batch command that server needs to execute. We need to extract
108 sent by batch command that server needs to execute. We need to extract
108 those, and map them to our ACTION_MAPPING to get all push/pull commands
109 those, and map them to our ACTION_MAPPING to get all push/pull commands
109 specified in the batch
110 specified in the batch
110 """
111 """
111 default = 'push'
112 default = 'push'
112 batch_cmds = []
113 batch_cmds = []
113 try:
114 try:
114 cmds = cls._get_xarg_headers(environ)
115 cmds = cls._get_xarg_headers(environ)
115 for pair in cmds:
116 for pair in cmds:
116 parts = pair.split(' ', 1)
117 parts = pair.split(' ', 1)
117 if len(parts) != 2:
118 if len(parts) != 2:
118 continue
119 continue
119 # entry should be in a format `key ARGS`
120 # entry should be in a format `key ARGS`
120 cmd, args = parts
121 cmd, args = parts
121 action = cls._ACTION_MAPPING.get(cmd, default)
122 action = cls._ACTION_MAPPING.get(cmd, default)
122 batch_cmds.append(action)
123 batch_cmds.append(action)
123 except Exception:
124 except Exception:
124 log.exception('Failed to extract batch commands operations')
125 log.exception('Failed to extract batch commands operations')
125
126
126 # in case we failed, (e.g malformed data) assume it's PUSH sub-command
127 # in case we failed, (e.g malformed data) assume it's PUSH sub-command
127 # for safety
128 # for safety
128 return batch_cmds or [default]
129 return batch_cmds or [default]
129
130
130 def _get_action(self, environ):
131 def _get_action(self, environ):
131 """
132 """
132 Maps mercurial request commands into a pull or push command.
133 Maps mercurial request commands into a pull or push command.
133 In case of unknown/unexpected data, it returns 'push' to be safe.
134 In case of unknown/unexpected data, it returns 'push' to be safe.
134
135
135 :param environ:
136 :param environ:
136 """
137 """
137 default = 'push'
138 default = 'push'
138 query = urllib.parse.parse_qs(environ['QUERY_STRING'], keep_blank_values=True)
139 query = urllib.parse.parse_qs(environ['QUERY_STRING'], keep_blank_values=True)
139
140
140 if 'cmd' in query:
141 if 'cmd' in query:
141 cmd = query['cmd'][0]
142 cmd = query['cmd'][0]
142 if cmd == 'batch':
143 if cmd == 'batch':
143 cmds = self._get_batch_cmd(environ)
144 cmds = self._get_batch_cmd(environ)
144 if 'push' in cmds:
145 if 'push' in cmds:
145 return 'push'
146 return 'push'
146 else:
147 else:
147 return 'pull'
148 return 'pull'
148 return self._ACTION_MAPPING.get(cmd, default)
149 return self._ACTION_MAPPING.get(cmd, default)
149
150
150 return default
151 return default
151
152
152 def _create_wsgi_app(self, repo_path, repo_name, config):
153 def _create_wsgi_app(self, repo_path, repo_name, config):
153 return self.scm_app.create_hg_wsgi_app(repo_path, repo_name, config)
154 return self.scm_app.create_hg_wsgi_app(repo_path, repo_name, config)
154
155
155 def _create_config(self, extras, repo_name, scheme='http'):
156 def _create_config(self, extras, repo_name, scheme='http'):
156 config = utils.make_db_config(repo=repo_name)
157 config = utils.make_db_config(repo=repo_name)
157 config.set('rhodecode', 'RC_SCM_DATA', json.dumps(extras))
158 config.set('rhodecode', 'RC_SCM_DATA', json.dumps(extras))
158
159
159 return config.serialize()
160 return config.serialize()
@@ -1,229 +1,231 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2020 RhodeCode GmbH
3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import base64
21 import base64
22 import logging
22 import logging
23 import urllib.request, urllib.parse, urllib.error
23 import urllib.request, urllib.parse, urllib.error
24 import urllib.parse
24 import urllib.parse
25
25
26 import requests
26 import requests
27 from pyramid.httpexceptions import HTTPNotAcceptable
27 from pyramid.httpexceptions import HTTPNotAcceptable
28
28
29 from rhodecode.lib import rc_cache
29 from rhodecode.lib import rc_cache
30 from rhodecode.lib.middleware import simplevcs
30 from rhodecode.lib.middleware import simplevcs
31 from rhodecode.lib.middleware.utils import get_path_info
31 from rhodecode.lib.utils import is_valid_repo
32 from rhodecode.lib.utils import is_valid_repo
32 from rhodecode.lib.utils2 import str2bool, safe_int, safe_str
33 from rhodecode.lib.utils2 import str2bool, safe_int, safe_str
33 from rhodecode.lib.ext_json import json
34 from rhodecode.lib.ext_json import json
34 from rhodecode.lib.hooks_daemon import store_txn_id_data
35 from rhodecode.lib.hooks_daemon import store_txn_id_data
35
36
36
37
37 log = logging.getLogger(__name__)
38 log = logging.getLogger(__name__)
38
39
39
40
40 class SimpleSvnApp(object):
41 class SimpleSvnApp(object):
41 IGNORED_HEADERS = [
42 IGNORED_HEADERS = [
42 'connection', 'keep-alive', 'content-encoding',
43 'connection', 'keep-alive', 'content-encoding',
43 'transfer-encoding', 'content-length']
44 'transfer-encoding', 'content-length']
44 rc_extras = {}
45 rc_extras = {}
45
46
46 def __init__(self, config):
47 def __init__(self, config):
47 self.config = config
48 self.config = config
48
49
49 def __call__(self, environ, start_response):
50 def __call__(self, environ, start_response):
50 request_headers = self._get_request_headers(environ)
51 request_headers = self._get_request_headers(environ)
51 data = environ['wsgi.input']
52 data = environ['wsgi.input']
52 req_method = environ['REQUEST_METHOD']
53 req_method = environ['REQUEST_METHOD']
53 has_content_length = 'CONTENT_LENGTH' in environ
54 has_content_length = 'CONTENT_LENGTH' in environ
55
54 path_info = self._get_url(
56 path_info = self._get_url(
55 self.config.get('subversion_http_server_url', ''), environ['PATH_INFO'])
57 self.config.get('subversion_http_server_url', ''), get_path_info(environ))
56 transfer_encoding = environ.get('HTTP_TRANSFER_ENCODING', '')
58 transfer_encoding = environ.get('HTTP_TRANSFER_ENCODING', '')
57 log.debug('Handling: %s method via `%s`', req_method, path_info)
59 log.debug('Handling: %s method via `%s`', req_method, path_info)
58
60
59 # stream control flag, based on request and content type...
61 # stream control flag, based on request and content type...
60 stream = False
62 stream = False
61
63
62 if req_method in ['MKCOL'] or has_content_length:
64 if req_method in ['MKCOL'] or has_content_length:
63 data_processed = False
65 data_processed = False
64 # read chunk to check if we have txn-with-props
66 # read chunk to check if we have txn-with-props
65 initial_data = data.read(1024)
67 initial_data = data.read(1024)
66 if initial_data.startswith('(create-txn-with-props'):
68 if initial_data.startswith('(create-txn-with-props'):
67 data = initial_data + data.read()
69 data = initial_data + data.read()
68 # store on-the-fly our rc_extra using svn revision properties
70 # store on-the-fly our rc_extra using svn revision properties
69 # those can be read later on in hooks executed so we have a way
71 # those can be read later on in hooks executed so we have a way
70 # to pass in the data into svn hooks
72 # to pass in the data into svn hooks
71 rc_data = base64.urlsafe_b64encode(json.dumps(self.rc_extras))
73 rc_data = base64.urlsafe_b64encode(json.dumps(self.rc_extras))
72 rc_data_len = len(rc_data)
74 rc_data_len = len(rc_data)
73 # header defines data length, and serialized data
75 # header defines data length, and serialized data
74 skel = ' rc-scm-extras {} {}'.format(rc_data_len, rc_data)
76 skel = ' rc-scm-extras {} {}'.format(rc_data_len, rc_data)
75 data = data[:-2] + skel + '))'
77 data = data[:-2] + skel + '))'
76 data_processed = True
78 data_processed = True
77
79
78 if not data_processed:
80 if not data_processed:
79 # NOTE(johbo): Avoid that we end up with sending the request in chunked
81 # NOTE(johbo): Avoid that we end up with sending the request in chunked
80 # transfer encoding (mainly on Gunicorn). If we know the content
82 # transfer encoding (mainly on Gunicorn). If we know the content
81 # length, then we should transfer the payload in one request.
83 # length, then we should transfer the payload in one request.
82 data = initial_data + data.read()
84 data = initial_data + data.read()
83
85
84 if req_method in ['GET', 'PUT'] or transfer_encoding == 'chunked':
86 if req_method in ['GET', 'PUT'] or transfer_encoding == 'chunked':
85 # NOTE(marcink): when getting/uploading files we want to STREAM content
87 # NOTE(marcink): when getting/uploading files we want to STREAM content
86 # back to the client/proxy instead of buffering it here...
88 # back to the client/proxy instead of buffering it here...
87 stream = True
89 stream = True
88
90
89 stream = stream
91 stream = stream
90 log.debug('Calling SVN PROXY at `%s`, using method:%s. Stream: %s',
92 log.debug('Calling SVN PROXY at `%s`, using method:%s. Stream: %s',
91 path_info, req_method, stream)
93 path_info, req_method, stream)
92 try:
94 try:
93 response = requests.request(
95 response = requests.request(
94 req_method, path_info,
96 req_method, path_info,
95 data=data, headers=request_headers, stream=stream)
97 data=data, headers=request_headers, stream=stream)
96 except requests.ConnectionError:
98 except requests.ConnectionError:
97 log.exception('ConnectionError occurred for endpoint %s', path_info)
99 log.exception('ConnectionError occurred for endpoint %s', path_info)
98 raise
100 raise
99
101
100 if response.status_code not in [200, 401]:
102 if response.status_code not in [200, 401]:
101 from rhodecode.lib.utils2 import safe_str
103 from rhodecode.lib.utils2 import safe_str
102 text = '\n{}'.format(safe_str(response.text)) if response.text else ''
104 text = '\n{}'.format(safe_str(response.text)) if response.text else ''
103 if response.status_code >= 500:
105 if response.status_code >= 500:
104 log.error('Got SVN response:%s with text:`%s`', response, text)
106 log.error('Got SVN response:%s with text:`%s`', response, text)
105 else:
107 else:
106 log.debug('Got SVN response:%s with text:`%s`', response, text)
108 log.debug('Got SVN response:%s with text:`%s`', response, text)
107 else:
109 else:
108 log.debug('got response code: %s', response.status_code)
110 log.debug('got response code: %s', response.status_code)
109
111
110 response_headers = self._get_response_headers(response.headers)
112 response_headers = self._get_response_headers(response.headers)
111
113
112 if response.headers.get('SVN-Txn-name'):
114 if response.headers.get('SVN-Txn-name'):
113 svn_tx_id = response.headers.get('SVN-Txn-name')
115 svn_tx_id = response.headers.get('SVN-Txn-name')
114 txn_id = rc_cache.utils.compute_key_from_params(
116 txn_id = rc_cache.utils.compute_key_from_params(
115 self.config['repository'], svn_tx_id)
117 self.config['repository'], svn_tx_id)
116 port = safe_int(self.rc_extras['hooks_uri'].split(':')[-1])
118 port = safe_int(self.rc_extras['hooks_uri'].split(':')[-1])
117 store_txn_id_data(txn_id, {'port': port})
119 store_txn_id_data(txn_id, {'port': port})
118
120
119 start_response(
121 start_response(
120 '{} {}'.format(response.status_code, response.reason),
122 '{} {}'.format(response.status_code, response.reason),
121 response_headers)
123 response_headers)
122 return response.iter_content(chunk_size=1024)
124 return response.iter_content(chunk_size=1024)
123
125
124 def _get_url(self, svn_http_server, path):
126 def _get_url(self, svn_http_server, path):
125 svn_http_server_url = (svn_http_server or '').rstrip('/')
127 svn_http_server_url = (svn_http_server or '').rstrip('/')
126 url_path = urllib.parse.urljoin(svn_http_server_url + '/', (path or '').lstrip('/'))
128 url_path = urllib.parse.urljoin(svn_http_server_url + '/', (path or '').lstrip('/'))
127 url_path = urllib.parse.quote(url_path, safe="/:=~+!$,;'")
129 url_path = urllib.parse.quote(url_path, safe="/:=~+!$,;'")
128 return url_path
130 return url_path
129
131
130 def _get_request_headers(self, environ):
132 def _get_request_headers(self, environ):
131 headers = {}
133 headers = {}
132
134
133 for key in environ:
135 for key in environ:
134 if not key.startswith('HTTP_'):
136 if not key.startswith('HTTP_'):
135 continue
137 continue
136 new_key = key.split('_')
138 new_key = key.split('_')
137 new_key = [k.capitalize() for k in new_key[1:]]
139 new_key = [k.capitalize() for k in new_key[1:]]
138 new_key = '-'.join(new_key)
140 new_key = '-'.join(new_key)
139 headers[new_key] = environ[key]
141 headers[new_key] = environ[key]
140
142
141 if 'CONTENT_TYPE' in environ:
143 if 'CONTENT_TYPE' in environ:
142 headers['Content-Type'] = environ['CONTENT_TYPE']
144 headers['Content-Type'] = environ['CONTENT_TYPE']
143
145
144 if 'CONTENT_LENGTH' in environ:
146 if 'CONTENT_LENGTH' in environ:
145 headers['Content-Length'] = environ['CONTENT_LENGTH']
147 headers['Content-Length'] = environ['CONTENT_LENGTH']
146
148
147 return headers
149 return headers
148
150
149 def _get_response_headers(self, headers):
151 def _get_response_headers(self, headers):
150 headers = [
152 headers = [
151 (h, headers[h])
153 (h, headers[h])
152 for h in headers
154 for h in headers
153 if h.lower() not in self.IGNORED_HEADERS
155 if h.lower() not in self.IGNORED_HEADERS
154 ]
156 ]
155
157
156 return headers
158 return headers
157
159
158
160
159 class DisabledSimpleSvnApp(object):
161 class DisabledSimpleSvnApp(object):
160 def __init__(self, config):
162 def __init__(self, config):
161 self.config = config
163 self.config = config
162
164
163 def __call__(self, environ, start_response):
165 def __call__(self, environ, start_response):
164 reason = 'Cannot handle SVN call because: SVN HTTP Proxy is not enabled'
166 reason = 'Cannot handle SVN call because: SVN HTTP Proxy is not enabled'
165 log.warning(reason)
167 log.warning(reason)
166 return HTTPNotAcceptable(reason)(environ, start_response)
168 return HTTPNotAcceptable(reason)(environ, start_response)
167
169
168
170
169 class SimpleSvn(simplevcs.SimpleVCS):
171 class SimpleSvn(simplevcs.SimpleVCS):
170
172
171 SCM = 'svn'
173 SCM = 'svn'
172 READ_ONLY_COMMANDS = ('OPTIONS', 'PROPFIND', 'GET', 'REPORT')
174 READ_ONLY_COMMANDS = ('OPTIONS', 'PROPFIND', 'GET', 'REPORT')
173 DEFAULT_HTTP_SERVER = 'http://localhost:8090'
175 DEFAULT_HTTP_SERVER = 'http://localhost:8090'
174
176
175 def _get_repository_name(self, environ):
177 def _get_repository_name(self, environ):
176 """
178 """
177 Gets repository name out of PATH_INFO header
179 Gets repository name out of PATH_INFO header
178
180
179 :param environ: environ where PATH_INFO is stored
181 :param environ: environ where PATH_INFO is stored
180 """
182 """
181 path = environ['PATH_INFO'].split('!')
183 path = get_path_info(environ).split('!')
182 repo_name = path[0].strip('/')
184 repo_name = path[0].strip('/')
183
185
184 # SVN includes the whole path in it's requests, including
186 # SVN includes the whole path in it's requests, including
185 # subdirectories inside the repo. Therefore we have to search for
187 # subdirectories inside the repo. Therefore we have to search for
186 # the repo root directory.
188 # the repo root directory.
187 if not is_valid_repo(
189 if not is_valid_repo(
188 repo_name, self.base_path, explicit_scm=self.SCM):
190 repo_name, self.base_path, explicit_scm=self.SCM):
189 current_path = ''
191 current_path = ''
190 for component in repo_name.split('/'):
192 for component in repo_name.split('/'):
191 current_path += component
193 current_path += component
192 if is_valid_repo(
194 if is_valid_repo(
193 current_path, self.base_path, explicit_scm=self.SCM):
195 current_path, self.base_path, explicit_scm=self.SCM):
194 return current_path
196 return current_path
195 current_path += '/'
197 current_path += '/'
196
198
197 return repo_name
199 return repo_name
198
200
199 def _get_action(self, environ):
201 def _get_action(self, environ):
200 return (
202 return (
201 'pull'
203 'pull'
202 if environ['REQUEST_METHOD'] in self.READ_ONLY_COMMANDS
204 if environ['REQUEST_METHOD'] in self.READ_ONLY_COMMANDS
203 else 'push')
205 else 'push')
204
206
205 def _should_use_callback_daemon(self, extras, environ, action):
207 def _should_use_callback_daemon(self, extras, environ, action):
206 # only MERGE command triggers hooks, so we don't want to start
208 # only MERGE command triggers hooks, so we don't want to start
207 # hooks server too many times. POST however starts the svn transaction
209 # hooks server too many times. POST however starts the svn transaction
208 # so we also need to run the init of callback daemon of POST
210 # so we also need to run the init of callback daemon of POST
209 if environ['REQUEST_METHOD'] in ['MERGE', 'POST']:
211 if environ['REQUEST_METHOD'] in ['MERGE', 'POST']:
210 return True
212 return True
211 return False
213 return False
212
214
213 def _create_wsgi_app(self, repo_path, repo_name, config):
215 def _create_wsgi_app(self, repo_path, repo_name, config):
214 if self._is_svn_enabled():
216 if self._is_svn_enabled():
215 return SimpleSvnApp(config)
217 return SimpleSvnApp(config)
216 # we don't have http proxy enabled return dummy request handler
218 # we don't have http proxy enabled return dummy request handler
217 return DisabledSimpleSvnApp(config)
219 return DisabledSimpleSvnApp(config)
218
220
219 def _is_svn_enabled(self):
221 def _is_svn_enabled(self):
220 conf = self.repo_vcs_config
222 conf = self.repo_vcs_config
221 return str2bool(conf.get('vcs_svn_proxy', 'http_requests_enabled'))
223 return str2bool(conf.get('vcs_svn_proxy', 'http_requests_enabled'))
222
224
223 def _create_config(self, extras, repo_name, scheme='http'):
225 def _create_config(self, extras, repo_name, scheme='http'):
224 conf = self.repo_vcs_config
226 conf = self.repo_vcs_config
225 server_url = conf.get('vcs_svn_proxy', 'http_server_url')
227 server_url = conf.get('vcs_svn_proxy', 'http_server_url')
226 server_url = server_url or self.DEFAULT_HTTP_SERVER
228 server_url = server_url or self.DEFAULT_HTTP_SERVER
227
229
228 extras['subversion_http_server_url'] = server_url
230 extras['subversion_http_server_url'] = server_url
229 return extras
231 return extras
@@ -1,189 +1,189 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2014-2020 RhodeCode GmbH
3 # Copyright (C) 2014-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Implementation of the scm_app interface using raw HTTP communication.
22 Implementation of the scm_app interface using raw HTTP communication.
23 """
23 """
24
24
25 import base64
25 import base64
26 import logging
26 import logging
27 import urllib.parse
27 import urllib.parse
28 import wsgiref.util
28 import wsgiref.util
29
29
30 import msgpack
30 import msgpack
31 import requests
31 import requests
32 import webob.request
32 import webob.request
33
33
34 import rhodecode
34 import rhodecode
35
35 from rhodecode.lib.middleware.utils import get_path_info
36
36
37 log = logging.getLogger(__name__)
37 log = logging.getLogger(__name__)
38
38
39
39
40 def create_git_wsgi_app(repo_path, repo_name, config):
40 def create_git_wsgi_app(repo_path, repo_name, config):
41 url = _vcs_streaming_url() + 'git/'
41 url = _vcs_streaming_url() + 'git/'
42 return VcsHttpProxy(url, repo_path, repo_name, config)
42 return VcsHttpProxy(url, repo_path, repo_name, config)
43
43
44
44
45 def create_hg_wsgi_app(repo_path, repo_name, config):
45 def create_hg_wsgi_app(repo_path, repo_name, config):
46 url = _vcs_streaming_url() + 'hg/'
46 url = _vcs_streaming_url() + 'hg/'
47 return VcsHttpProxy(url, repo_path, repo_name, config)
47 return VcsHttpProxy(url, repo_path, repo_name, config)
48
48
49
49
50 def _vcs_streaming_url():
50 def _vcs_streaming_url():
51 template = 'http://{}/stream/'
51 template = 'http://{}/stream/'
52 return template.format(rhodecode.CONFIG['vcs.server'])
52 return template.format(rhodecode.CONFIG['vcs.server'])
53
53
54
54
55 # TODO: johbo: Avoid the global.
55 # TODO: johbo: Avoid the global.
56 session = requests.Session()
56 session = requests.Session()
57 # Requests speedup, avoid reading .netrc and similar
57 # Requests speedup, avoid reading .netrc and similar
58 session.trust_env = False
58 session.trust_env = False
59
59
60 # prevent urllib3 spawning our logs.
60 # prevent urllib3 spawning our logs.
61 logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
61 logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
62 logging.WARNING)
62 logging.WARNING)
63
63
64
64
65 class VcsHttpProxy(object):
65 class VcsHttpProxy(object):
66 """
66 """
67 A WSGI application which proxies vcs requests.
67 A WSGI application which proxies vcs requests.
68
68
69 The goal is to shuffle the data around without touching it. The only
69 The goal is to shuffle the data around without touching it. The only
70 exception is the extra data from the config object which we send to the
70 exception is the extra data from the config object which we send to the
71 server as well.
71 server as well.
72 """
72 """
73
73
74 def __init__(self, url, repo_path, repo_name, config):
74 def __init__(self, url, repo_path, repo_name, config):
75 """
75 """
76 :param str url: The URL of the VCSServer to call.
76 :param str url: The URL of the VCSServer to call.
77 """
77 """
78 self._url = url
78 self._url = url
79 self._repo_name = repo_name
79 self._repo_name = repo_name
80 self._repo_path = repo_path
80 self._repo_path = repo_path
81 self._config = config
81 self._config = config
82 self.rc_extras = {}
82 self.rc_extras = {}
83 log.debug(
83 log.debug(
84 "Creating VcsHttpProxy for repo %s, url %s",
84 "Creating VcsHttpProxy for repo %s, url %s",
85 repo_name, url)
85 repo_name, url)
86
86
87 def __call__(self, environ, start_response):
87 def __call__(self, environ, start_response):
88 config = msgpack.packb(self._config)
88 config = msgpack.packb(self._config)
89 request = webob.request.Request(environ)
89 request = webob.request.Request(environ)
90 request_headers = request.headers
90 request_headers = request.headers
91
91
92 request_headers.update({
92 request_headers.update({
93 # TODO: johbo: Remove this, rely on URL path only
93 # TODO: johbo: Remove this, rely on URL path only
94 'X-RC-Repo-Name': self._repo_name,
94 'X-RC-Repo-Name': self._repo_name,
95 'X-RC-Repo-Path': self._repo_path,
95 'X-RC-Repo-Path': self._repo_path,
96 'X-RC-Path-Info': environ['PATH_INFO'],
96 'X-RC-Path-Info': environ['PATH_INFO'],
97
97
98 'X-RC-Repo-Store': self.rc_extras.get('repo_store'),
98 'X-RC-Repo-Store': self.rc_extras.get('repo_store'),
99 'X-RC-Server-Config-File': self.rc_extras.get('config'),
99 'X-RC-Server-Config-File': self.rc_extras.get('config'),
100
100
101 'X-RC-Auth-User': self.rc_extras.get('username'),
101 'X-RC-Auth-User': self.rc_extras.get('username'),
102 'X-RC-Auth-User-Id': str(self.rc_extras.get('user_id')),
102 'X-RC-Auth-User-Id': str(self.rc_extras.get('user_id')),
103 'X-RC-Auth-User-Ip': self.rc_extras.get('ip'),
103 'X-RC-Auth-User-Ip': self.rc_extras.get('ip'),
104
104
105 # TODO: johbo: Avoid encoding and put this into payload?
105 # TODO: johbo: Avoid encoding and put this into payload?
106 'X-RC-Repo-Config': base64.b64encode(config),
106 'X-RC-Repo-Config': base64.b64encode(config),
107 'X-RC-Locked-Status-Code': rhodecode.CONFIG.get('lock_ret_code'),
107 'X-RC-Locked-Status-Code': rhodecode.CONFIG.get('lock_ret_code'),
108 })
108 })
109
109
110 method = environ['REQUEST_METHOD']
110 method = environ['REQUEST_METHOD']
111
111
112 # Preserve the query string
112 # Preserve the query string
113 url = self._url
113 url = self._url
114 url = urllib.parse.urljoin(url, self._repo_name)
114 url = urllib.parse.urljoin(url, self._repo_name)
115 if environ.get('QUERY_STRING'):
115 if environ.get('QUERY_STRING'):
116 url += '?' + environ['QUERY_STRING']
116 url += '?' + environ['QUERY_STRING']
117
117
118 log.debug('http-app: preparing request to: %s', url)
118 log.debug('http-app: preparing request to: %s', url)
119 response = session.request(
119 response = session.request(
120 method,
120 method,
121 url,
121 url,
122 data=_maybe_stream_request(environ),
122 data=_maybe_stream_request(environ),
123 headers=request_headers,
123 headers=request_headers,
124 stream=True)
124 stream=True)
125
125
126 log.debug('http-app: got vcsserver response: %s', response)
126 log.debug('http-app: got vcsserver response: %s', response)
127 if response.status_code >= 500:
127 if response.status_code >= 500:
128 log.error('Exception returned by vcsserver at: %s %s, %s',
128 log.error('Exception returned by vcsserver at: %s %s, %s',
129 url, response.status_code, response.content)
129 url, response.status_code, response.content)
130
130
131 # Preserve the headers of the response, except hop_by_hop ones
131 # Preserve the headers of the response, except hop_by_hop ones
132 response_headers = [
132 response_headers = [
133 (h, v) for h, v in response.headers.items()
133 (h, v) for h, v in response.headers.items()
134 if not wsgiref.util.is_hop_by_hop(h)
134 if not wsgiref.util.is_hop_by_hop(h)
135 ]
135 ]
136
136
137 # Build status argument for start_response callable.
137 # Build status argument for start_response callable.
138 status = '{status_code} {reason_phrase}'.format(
138 status = '{status_code} {reason_phrase}'.format(
139 status_code=response.status_code,
139 status_code=response.status_code,
140 reason_phrase=response.reason)
140 reason_phrase=response.reason)
141
141
142 start_response(status, response_headers)
142 start_response(status, response_headers)
143 return _maybe_stream_response(response)
143 return _maybe_stream_response(response)
144
144
145
145
146 def read_in_chunks(stream_obj, block_size=1024, chunks=-1):
146 def read_in_chunks(stream_obj, block_size=1024, chunks=-1):
147 """
147 """
148 Read Stream in chunks, default chunk size: 1k.
148 Read Stream in chunks, default chunk size: 1k.
149 """
149 """
150 while chunks:
150 while chunks:
151 data = stream_obj.read(block_size)
151 data = stream_obj.read(block_size)
152 if not data:
152 if not data:
153 break
153 break
154 yield data
154 yield data
155 chunks -= 1
155 chunks -= 1
156
156
157
157
158 def _is_request_chunked(environ):
158 def _is_request_chunked(environ):
159 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
159 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
160 return stream
160 return stream
161
161
162
162
163 def _maybe_stream_request(environ):
163 def _maybe_stream_request(environ):
164 path = environ['PATH_INFO']
164 path = get_path_info(environ)
165 stream = _is_request_chunked(environ)
165 stream = _is_request_chunked(environ)
166 log.debug('handling request `%s` with stream support: %s', path, stream)
166 log.debug('handling request `%s` with stream support: %s', path, stream)
167
167
168 if stream:
168 if stream:
169 # set stream by 256k
169 # set stream by 256k
170 return read_in_chunks(environ['wsgi.input'], block_size=1024 * 256)
170 return read_in_chunks(environ['wsgi.input'], block_size=1024 * 256)
171 else:
171 else:
172 return environ['wsgi.input'].read()
172 return environ['wsgi.input'].read()
173
173
174
174
175 def _maybe_stream_response(response):
175 def _maybe_stream_response(response):
176 """
176 """
177 Try to generate chunks from the response if it is chunked.
177 Try to generate chunks from the response if it is chunked.
178 """
178 """
179 stream = _is_chunked(response)
179 stream = _is_chunked(response)
180 log.debug('returning response with stream: %s', stream)
180 log.debug('returning response with stream: %s', stream)
181 if stream:
181 if stream:
182 # read in 256k Chunks
182 # read in 256k Chunks
183 return response.raw.read_chunked(amt=1024 * 256)
183 return response.raw.read_chunked(amt=1024 * 256)
184 else:
184 else:
185 return [response.content]
185 return [response.content]
186
186
187
187
188 def _is_chunked(response):
188 def _is_chunked(response):
189 return response.headers.get('Transfer-Encoding', '') == 'chunked'
189 return response.headers.get('Transfer-Encoding', '') == 'chunked'
@@ -1,284 +1,289 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2020 RhodeCode GmbH
3 # Copyright (C) 2010-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import gzip
21 import gzip
22 import shutil
22 import shutil
23 import logging
23 import logging
24 import tempfile
24 import tempfile
25 import urllib.parse
25 import urllib.parse
26
26
27 from webob.exc import HTTPNotFound
27 from webob.exc import HTTPNotFound
28
28
29 import rhodecode
29 import rhodecode
30 from rhodecode.lib.middleware.utils import get_path_info
30 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.appenlight import wrap_in_appenlight_if_enabled
31 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplegit import SimpleGit, GIT_PROTO_PAT
32 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplehg import SimpleHg
33 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.lib.middleware.simplesvn import SimpleSvn
34 from rhodecode.model.settings import VcsSettingsModel
35 from rhodecode.model.settings import VcsSettingsModel
35
36
37
36 log = logging.getLogger(__name__)
38 log = logging.getLogger(__name__)
37
39
38 VCS_TYPE_KEY = '_rc_vcs_type'
40 VCS_TYPE_KEY = '_rc_vcs_type'
39 VCS_TYPE_SKIP = '_rc_vcs_skip'
41 VCS_TYPE_SKIP = '_rc_vcs_skip'
40
42
41
43
42 def is_git(environ):
44 def is_git(environ):
43 """
45 """
44 Returns True if requests should be handled by GIT wsgi middleware
46 Returns True if requests should be handled by GIT wsgi middleware
45 """
47 """
46 is_git_path = GIT_PROTO_PAT.match(environ['PATH_INFO'])
48 path_info = get_path_info(environ)
49 is_git_path = GIT_PROTO_PAT.match(path_info)
47 log.debug(
50 log.debug(
48 'request path: `%s` detected as GIT PROTOCOL %s', environ['PATH_INFO'],
51 'request path: `%s` detected as GIT PROTOCOL %s', path_info,
49 is_git_path is not None)
52 is_git_path is not None)
50
53
51 return is_git_path
54 return is_git_path
52
55
53
56
54 def is_hg(environ):
57 def is_hg(environ):
55 """
58 """
56 Returns True if requests target is mercurial server - header
59 Returns True if requests target is mercurial server - header
57 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
60 ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
58 """
61 """
59 is_hg_path = False
62 is_hg_path = False
60
63
61 http_accept = environ.get('HTTP_ACCEPT')
64 http_accept = environ.get('HTTP_ACCEPT')
62
65
63 if http_accept and http_accept.startswith('application/mercurial'):
66 if http_accept and http_accept.startswith('application/mercurial'):
64 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
67 query = urllib.parse.parse_qs(environ['QUERY_STRING'])
65 if 'cmd' in query:
68 if 'cmd' in query:
66 is_hg_path = True
69 is_hg_path = True
67
70
71 path_info = get_path_info(environ)
68 log.debug(
72 log.debug(
69 'request path: `%s` detected as HG PROTOCOL %s', environ['PATH_INFO'],
73 'request path: `%s` detected as HG PROTOCOL %s', path_info,
70 is_hg_path)
74 is_hg_path)
71
75
72 return is_hg_path
76 return is_hg_path
73
77
74
78
75 def is_svn(environ):
79 def is_svn(environ):
76 """
80 """
77 Returns True if requests target is Subversion server
81 Returns True if requests target is Subversion server
78 """
82 """
79
83
80 http_dav = environ.get('HTTP_DAV', '')
84 http_dav = environ.get('HTTP_DAV', '')
81 magic_path_segment = rhodecode.CONFIG.get(
85 magic_path_segment = rhodecode.CONFIG.get(
82 'rhodecode_subversion_magic_path', '/!svn')
86 'rhodecode_subversion_magic_path', '/!svn')
87 path_info = get_path_info(environ)
83 is_svn_path = (
88 is_svn_path = (
84 'subversion' in http_dav or
89 'subversion' in http_dav or
85 magic_path_segment in environ['PATH_INFO']
90 magic_path_segment in path_info
86 or environ['REQUEST_METHOD'] in ['PROPFIND', 'PROPPATCH']
91 or environ['REQUEST_METHOD'] in ['PROPFIND', 'PROPPATCH']
87 )
92 )
88 log.debug(
93 log.debug(
89 'request path: `%s` detected as SVN PROTOCOL %s', environ['PATH_INFO'],
94 'request path: `%s` detected as SVN PROTOCOL %s', path_info,
90 is_svn_path)
95 is_svn_path)
91
96
92 return is_svn_path
97 return is_svn_path
93
98
94
99
95 class GunzipMiddleware(object):
100 class GunzipMiddleware(object):
96 """
101 """
97 WSGI middleware that unzips gzip-encoded requests before
102 WSGI middleware that unzips gzip-encoded requests before
98 passing on to the underlying application.
103 passing on to the underlying application.
99 """
104 """
100
105
101 def __init__(self, application):
106 def __init__(self, application):
102 self.app = application
107 self.app = application
103
108
104 def __call__(self, environ, start_response):
109 def __call__(self, environ, start_response):
105 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
110 accepts_encoding_header = environ.get('HTTP_CONTENT_ENCODING', b'')
106
111
107 if b'gzip' in accepts_encoding_header:
112 if b'gzip' in accepts_encoding_header:
108 log.debug('gzip detected, now running gunzip wrapper')
113 log.debug('gzip detected, now running gunzip wrapper')
109 wsgi_input = environ['wsgi.input']
114 wsgi_input = environ['wsgi.input']
110
115
111 if not hasattr(environ['wsgi.input'], 'seek'):
116 if not hasattr(environ['wsgi.input'], 'seek'):
112 # The gzip implementation in the standard library of Python 2.x
117 # The gzip implementation in the standard library of Python 2.x
113 # requires the '.seek()' and '.tell()' methods to be available
118 # requires the '.seek()' and '.tell()' methods to be available
114 # on the input stream. Read the data into a temporary file to
119 # on the input stream. Read the data into a temporary file to
115 # work around this limitation.
120 # work around this limitation.
116
121
117 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
122 wsgi_input = tempfile.SpooledTemporaryFile(64 * 1024 * 1024)
118 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
123 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
119 wsgi_input.seek(0)
124 wsgi_input.seek(0)
120
125
121 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
126 environ['wsgi.input'] = gzip.GzipFile(fileobj=wsgi_input, mode='r')
122 # since we "Ungzipped" the content we say now it's no longer gzip
127 # since we "Ungzipped" the content we say now it's no longer gzip
123 # content encoding
128 # content encoding
124 del environ['HTTP_CONTENT_ENCODING']
129 del environ['HTTP_CONTENT_ENCODING']
125
130
126 # content length has changes ? or i'm not sure
131 # content length has changes ? or i'm not sure
127 if 'CONTENT_LENGTH' in environ:
132 if 'CONTENT_LENGTH' in environ:
128 del environ['CONTENT_LENGTH']
133 del environ['CONTENT_LENGTH']
129 else:
134 else:
130 log.debug('content not gzipped, gzipMiddleware passing '
135 log.debug('content not gzipped, gzipMiddleware passing '
131 'request further')
136 'request further')
132 return self.app(environ, start_response)
137 return self.app(environ, start_response)
133
138
134
139
135 def is_vcs_call(environ):
140 def is_vcs_call(environ):
136 if VCS_TYPE_KEY in environ:
141 if VCS_TYPE_KEY in environ:
137 raw_type = environ[VCS_TYPE_KEY]
142 raw_type = environ[VCS_TYPE_KEY]
138 return raw_type and raw_type != VCS_TYPE_SKIP
143 return raw_type and raw_type != VCS_TYPE_SKIP
139 return False
144 return False
140
145
141
146
142 def get_path_elem(route_path):
147 def get_path_elem(route_path):
143 if not route_path:
148 if not route_path:
144 return None
149 return None
145
150
146 cleaned_route_path = route_path.lstrip('/')
151 cleaned_route_path = route_path.lstrip('/')
147 if cleaned_route_path:
152 if cleaned_route_path:
148 cleaned_route_path_elems = cleaned_route_path.split('/')
153 cleaned_route_path_elems = cleaned_route_path.split('/')
149 if cleaned_route_path_elems:
154 if cleaned_route_path_elems:
150 return cleaned_route_path_elems[0]
155 return cleaned_route_path_elems[0]
151 return None
156 return None
152
157
153
158
154 def detect_vcs_request(environ, backends):
159 def detect_vcs_request(environ, backends):
155 checks = {
160 checks = {
156 'hg': (is_hg, SimpleHg),
161 'hg': (is_hg, SimpleHg),
157 'git': (is_git, SimpleGit),
162 'git': (is_git, SimpleGit),
158 'svn': (is_svn, SimpleSvn),
163 'svn': (is_svn, SimpleSvn),
159 }
164 }
160 handler = None
165 handler = None
161 # List of path views first chunk we don't do any checks
166 # List of path views first chunk we don't do any checks
162 white_list = [
167 white_list = [
163 # e.g /_file_store/download
168 # e.g /_file_store/download
164 '_file_store',
169 '_file_store',
165
170
166 # static files no detection
171 # static files no detection
167 '_static',
172 '_static',
168
173
169 # skip ops ping, status
174 # skip ops ping, status
170 '_admin/ops/ping',
175 '_admin/ops/ping',
171 '_admin/ops/status',
176 '_admin/ops/status',
172
177
173 # full channelstream connect should be VCS skipped
178 # full channelstream connect should be VCS skipped
174 '_admin/channelstream/connect',
179 '_admin/channelstream/connect',
175 ]
180 ]
176
181 path_info = get_path_info(environ)
177 path_info = environ['PATH_INFO']
182 path_url = path_info.lstrip('/')
178
179 path_elem = get_path_elem(path_info)
180
183
181 if path_elem in white_list:
184 if path_elem in white_list:
182 log.debug('path `%s` in whitelist, skipping...', path_info)
185 log.debug('path `%s` in whitelist, skipping...', path_info)
183 return handler
186 return handler
184
187
185 path_url = path_info.lstrip('/')
188 path_url = path_info.lstrip('/')
186 if path_url in white_list:
189 if path_url in white_list:
187 log.debug('full url path `%s` in whitelist, skipping...', path_url)
190 log.debug('full url path `%s` in whitelist, skipping...', path_url)
188 return handler
191 return handler
189
192
190 if VCS_TYPE_KEY in environ:
193 if VCS_TYPE_KEY in environ:
191 raw_type = environ[VCS_TYPE_KEY]
194 raw_type = environ[VCS_TYPE_KEY]
192 if raw_type == VCS_TYPE_SKIP:
195 if raw_type == VCS_TYPE_SKIP:
193 log.debug('got `skip` marker for vcs detection, skipping...')
196 log.debug('got `skip` marker for vcs detection, skipping...')
194 return handler
197 return handler
195
198
196 _check, handler = checks.get(raw_type) or [None, None]
199 _check, handler = checks.get(raw_type) or [None, None]
197 if handler:
200 if handler:
198 log.debug('got handler:%s from environ', handler)
201 log.debug('got handler:%s from environ', handler)
199
202
200 if not handler:
203 if not handler:
201 log.debug('request start: checking if request for `%s` is of VCS type in order: %s', path_elem, backends)
204 log.debug('request start: checking if request for `%s` is of VCS type in order: %s', path_elem, backends)
202 for vcs_type in backends:
205 for vcs_type in backends:
203 vcs_check, _handler = checks[vcs_type]
206 vcs_check, _handler = checks[vcs_type]
204 if vcs_check(environ):
207 if vcs_check(environ):
205 log.debug('vcs handler found %s', _handler)
208 log.debug('vcs handler found %s', _handler)
206 handler = _handler
209 handler = _handler
207 break
210 break
208
211
209 return handler
212 return handler
210
213
211
214
212 class VCSMiddleware(object):
215 class VCSMiddleware(object):
213
216
214 def __init__(self, app, registry, config, appenlight_client):
217 def __init__(self, app, registry, config, appenlight_client):
215 self.application = app
218 self.application = app
216 self.registry = registry
219 self.registry = registry
217 self.config = config
220 self.config = config
218 self.appenlight_client = appenlight_client
221 self.appenlight_client = appenlight_client
219 self.use_gzip = True
222 self.use_gzip = True
220 # order in which we check the middlewares, based on vcs.backends config
223 # order in which we check the middlewares, based on vcs.backends config
221 self.check_middlewares = config['vcs.backends']
224 self.check_middlewares = config['vcs.backends']
222
225
223 def vcs_config(self, repo_name=None):
226 def vcs_config(self, repo_name=None):
224 """
227 """
225 returns serialized VcsSettings
228 returns serialized VcsSettings
226 """
229 """
227 try:
230 try:
228 return VcsSettingsModel(
231 return VcsSettingsModel(
229 repo=repo_name).get_ui_settings_as_config_obj()
232 repo=repo_name).get_ui_settings_as_config_obj()
230 except Exception:
233 except Exception:
231 pass
234 pass
232
235
233 def wrap_in_gzip_if_enabled(self, app, config):
236 def wrap_in_gzip_if_enabled(self, app, config):
234 if self.use_gzip:
237 if self.use_gzip:
235 app = GunzipMiddleware(app)
238 app = GunzipMiddleware(app)
236 return app
239 return app
237
240
238 def _get_handler_app(self, environ):
241 def _get_handler_app(self, environ):
239 app = None
242 app = None
240 log.debug('VCSMiddleware: detecting vcs type.')
243 log.debug('VCSMiddleware: detecting vcs type.')
241 handler = detect_vcs_request(environ, self.check_middlewares)
244 handler = detect_vcs_request(environ, self.check_middlewares)
242 if handler:
245 if handler:
243 app = handler(self.config, self.registry)
246 app = handler(self.config, self.registry)
244
247
245 return app
248 return app
246
249
247 def __call__(self, environ, start_response):
250 def __call__(self, environ, start_response):
248 # check if we handle one of interesting protocols, optionally extract
251 # check if we handle one of interesting protocols, optionally extract
249 # specific vcsSettings and allow changes of how things are wrapped
252 # specific vcsSettings and allow changes of how things are wrapped
250 vcs_handler = self._get_handler_app(environ)
253 vcs_handler = self._get_handler_app(environ)
251 if vcs_handler:
254 if vcs_handler:
252 # translate the _REPO_ID into real repo NAME for usage
255 # translate the _REPO_ID into real repo NAME for usage
253 # in middleware
256 # in middleware
254 environ['PATH_INFO'] = vcs_handler._get_by_id(environ['PATH_INFO'])
257
258 path_info = get_path_info(environ)
259 environ['PATH_INFO'] = vcs_handler._get_by_id(path_info)
255
260
256 # Set acl, url and vcs repo names.
261 # Set acl, url and vcs repo names.
257 vcs_handler.set_repo_names(environ)
262 vcs_handler.set_repo_names(environ)
258
263
259 # register repo config back to the handler
264 # register repo config back to the handler
260 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
265 vcs_conf = self.vcs_config(vcs_handler.acl_repo_name)
261 # maybe damaged/non existent settings. We still want to
266 # maybe damaged/non existent settings. We still want to
262 # pass that point to validate on is_valid_and_existing_repo
267 # pass that point to validate on is_valid_and_existing_repo
263 # and return proper HTTP Code back to client
268 # and return proper HTTP Code back to client
264 if vcs_conf:
269 if vcs_conf:
265 vcs_handler.repo_vcs_config = vcs_conf
270 vcs_handler.repo_vcs_config = vcs_conf
266
271
267 # check for type, presence in database and on filesystem
272 # check for type, presence in database and on filesystem
268 if not vcs_handler.is_valid_and_existing_repo(
273 if not vcs_handler.is_valid_and_existing_repo(
269 vcs_handler.acl_repo_name,
274 vcs_handler.acl_repo_name,
270 vcs_handler.base_path,
275 vcs_handler.base_path,
271 vcs_handler.SCM):
276 vcs_handler.SCM):
272 return HTTPNotFound()(environ, start_response)
277 return HTTPNotFound()(environ, start_response)
273
278
274 environ['REPO_NAME'] = vcs_handler.url_repo_name
279 environ['REPO_NAME'] = vcs_handler.url_repo_name
275
280
276 # Wrap handler in middlewares if they are enabled.
281 # Wrap handler in middlewares if they are enabled.
277 vcs_handler = self.wrap_in_gzip_if_enabled(
282 vcs_handler = self.wrap_in_gzip_if_enabled(
278 vcs_handler, self.config)
283 vcs_handler, self.config)
279 vcs_handler, _ = wrap_in_appenlight_if_enabled(
284 vcs_handler, _ = wrap_in_appenlight_if_enabled(
280 vcs_handler, self.config, self.appenlight_client)
285 vcs_handler, self.config, self.appenlight_client)
281
286
282 return vcs_handler(environ, start_response)
287 return vcs_handler(environ, start_response)
283
288
284 return self.application(environ, start_response)
289 return self.application(environ, start_response)
General Comments 0
You need to be logged in to leave comments. Login now