##// END OF EJS Templates
fix(svn): fixed problems with svn hooks binary dir not beeing propagates in mod_dav_svn
super-admin -
r1229:fe30068d v5.0.1 stable
parent child Browse files
Show More
@@ -0,0 +1,40 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 import os
19
20
21 def get_config(ini_path, **kwargs):
22 import configparser
23 parser = configparser.ConfigParser(**kwargs)
24 parser.read(ini_path)
25 return parser
26
27
28 def get_app_config_lightweight(ini_path):
29 parser = get_config(ini_path)
30 parser.set('app:main', 'here', os.getcwd())
31 parser.set('app:main', '__file__', ini_path)
32 return dict(parser.items('app:main'))
33
34
35 def get_app_config(ini_path):
36 """
37 This loads the app context and provides a heavy type iniliaziation of config
38 """
39 from paste.deploy.loadwsgi import appconfig
40 return appconfig(f'config:{ini_path}', relative_to=os.getcwd())
@@ -1,168 +1,185 b''
1 1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import os
20 20 import textwrap
21 21 import string
22 22 import functools
23 23 import logging
24 24 import tempfile
25 25 import logging.config
26 26
27 27 from vcsserver.type_utils import str2bool, aslist
28 28
29 29 log = logging.getLogger(__name__)
30 30
31
31 32 # skip keys, that are set here, so we don't double process those
32 33 set_keys = {
33 34 '__file__': ''
34 35 }
35 36
36 37
37 38 class SettingsMaker:
38 39
39 40 def __init__(self, app_settings):
40 41 self.settings = app_settings
41 42
42 43 @classmethod
43 44 def _bool_func(cls, input_val):
44 45 if isinstance(input_val, bytes):
45 46 # decode to str
46 47 input_val = input_val.decode('utf8')
47 48 return str2bool(input_val)
48 49
49 50 @classmethod
50 51 def _int_func(cls, input_val):
51 52 return int(input_val)
52 53
53 54 @classmethod
55 def _float_func(cls, input_val):
56 return float(input_val)
57
58 @classmethod
54 59 def _list_func(cls, input_val, sep=','):
55 60 return aslist(input_val, sep=sep)
56 61
57 62 @classmethod
58 63 def _string_func(cls, input_val, lower=True):
59 64 if lower:
60 65 input_val = input_val.lower()
61 66 return input_val
62 67
63 68 @classmethod
64 def _float_func(cls, input_val):
65 return float(input_val)
69 def _string_no_quote_func(cls, input_val, lower=True):
70 """
71 Special case string function that detects if value is set to empty quote string
72 e.g.
73
74 core.binary_dir = ""
75 """
76
77 input_val = cls._string_func(input_val, lower=lower)
78 if input_val in ['""', "''"]:
79 return ''
80 return input_val
66 81
67 82 @classmethod
68 83 def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
69 84
70 85 # ensure we have our dir created
71 86 if not os.path.isdir(input_val) and ensure_dir:
72 87 os.makedirs(input_val, mode=mode, exist_ok=True)
73 88
74 89 if not os.path.isdir(input_val):
75 90 raise Exception(f'Dir at {input_val} does not exist')
76 91 return input_val
77 92
78 93 @classmethod
79 94 def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
80 95 dirname = os.path.dirname(input_val)
81 96 cls._dir_func(dirname, ensure_dir=ensure_dir)
82 97 return input_val
83 98
84 99 @classmethod
85 100 def _key_transformator(cls, key):
86 101 return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
87 102
88 103 def maybe_env_key(self, key):
89 104 # now maybe we have this KEY in env, search and use the value with higher priority.
90 105 transformed_key = self._key_transformator(key)
91 106 envvar_value = os.environ.get(transformed_key)
92 107 if envvar_value:
93 108 log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
94 109
95 110 return envvar_value
96 111
97 112 def env_expand(self):
98 113 replaced = {}
99 114 for k, v in self.settings.items():
100 115 if k not in set_keys:
101 116 envvar_value = self.maybe_env_key(k)
102 117 if envvar_value:
103 118 replaced[k] = envvar_value
104 119 set_keys[k] = envvar_value
105 120
106 121 # replace ALL keys updated
107 122 self.settings.update(replaced)
108 123
109 124 def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
110 125 """
111 126 Helper to enable debug on running instance
112 127 :return:
113 128 """
114 129
115 130 if not str2bool(self.settings.get('logging.autoconfigure')):
116 131 log.info('logging configuration based on main .ini file')
117 132 return
118 133
119 134 if logging_conf is None:
120 135 logging_conf = self.settings.get('logging.logging_conf_file') or ''
121 136
122 137 if not os.path.isfile(logging_conf):
123 138 log.error('Unable to setup logging based on %s, '
124 139 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
125 140 return
126 141
127 142 with open(logging_conf, 'rt') as f:
128 143 ini_template = textwrap.dedent(f.read())
129 144 ini_template = string.Template(ini_template).safe_substitute(
130 145 RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
131 146 RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
132 147 )
133 148
134 149 with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
135 150 log.info('Saved Temporary LOGGING config at %s', f.name)
136 151 f.write(ini_template)
137 152
138 153 logging.config.fileConfig(f.name)
139 154 os.remove(f.name)
140 155
141 156 def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
142 157 input_val = self.settings.get(key, default)
143 158
144 159 if default_when_empty and not input_val:
145 160 # use default value when value is set in the config but it is empty
146 161 input_val = default
147 162
148 163 parser_func = {
149 164 'bool': self._bool_func,
150 165 'int': self._int_func,
166 'float': self._float_func,
151 167 'list': self._list_func,
152 168 'list:newline': functools.partial(self._list_func, sep='/n'),
153 169 'list:spacesep': functools.partial(self._list_func, sep=' '),
154 170 'string': functools.partial(self._string_func, lower=lower),
171 'string:noquote': functools.partial(self._string_no_quote_func, lower=lower),
155 172 'dir': self._dir_func,
156 173 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
157 174 'file': self._file_path_func,
158 175 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
159 176 None: lambda i: i
160 177 }[parser]
161 178
162 179 envvar_value = self.maybe_env_key(key)
163 180 if envvar_value:
164 181 input_val = envvar_value
165 182 set_keys[key] = input_val
166 183
167 184 self.settings[key] = parser_func(input_val)
168 185 return self.settings[key]
@@ -1,230 +1,230 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import re
20 20 import os
21 21 import sys
22 22 import datetime
23 23 import logging
24 24 import pkg_resources
25 25
26 26 import vcsserver
27 27 import vcsserver.settings
28 28 from vcsserver.str_utils import safe_bytes
29 29
30 30 log = logging.getLogger(__name__)
31 31
32 32 HOOKS_DIR_MODE = 0o755
33 33 HOOKS_FILE_MODE = 0o755
34 34
35 35
36 36 def set_permissions_if_needed(path_to_check, perms: oct):
37 37 # Get current permissions
38 38 current_permissions = os.stat(path_to_check).st_mode & 0o777 # Extract permission bits
39 39
40 40 # Check if current permissions are lower than required
41 41 if current_permissions < int(perms):
42 42 # Change the permissions if they are lower than required
43 43 os.chmod(path_to_check, perms)
44 44
45 45
46 46 def get_git_hooks_path(repo_path, bare):
47 47 hooks_path = os.path.join(repo_path, 'hooks')
48 48 if not bare:
49 49 hooks_path = os.path.join(repo_path, '.git', 'hooks')
50 50
51 51 return hooks_path
52 52
53 53
54 54 def install_git_hooks(repo_path, bare, executable=None, force_create=False):
55 55 """
56 56 Creates a RhodeCode hook inside a git repository
57 57
58 58 :param repo_path: path to repository
59 59 :param bare: defines if repository is considered a bare git repo
60 60 :param executable: binary executable to put in the hooks
61 61 :param force_create: Creates even if the same name hook exists
62 62 """
63 63 executable = executable or sys.executable
64 64 hooks_path = get_git_hooks_path(repo_path, bare)
65 65
66 66 # we always call it to ensure dir exists and it has a proper mode
67 67 if not os.path.exists(hooks_path):
68 68 # If it doesn't exist, create a new directory with the specified mode
69 69 os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
70 70 # If it exists, change the directory's mode to the specified mode
71 71 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
72 72
73 73 tmpl_post = pkg_resources.resource_string(
74 74 'vcsserver', '/'.join(
75 75 ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
76 76 tmpl_pre = pkg_resources.resource_string(
77 77 'vcsserver', '/'.join(
78 78 ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
79 79
80 80 path = '' # not used for now
81 81 timestamp = datetime.datetime.utcnow().isoformat()
82 82
83 83 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
84 84 log.debug('Installing git hook in repo %s', repo_path)
85 85 _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
86 86 _rhodecode_hook = check_rhodecode_hook(_hook_file)
87 87
88 88 if _rhodecode_hook or force_create:
89 89 log.debug('writing git %s hook file at %s !', h_type, _hook_file)
90 90 try:
91 91 with open(_hook_file, 'wb') as f:
92 92 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
93 93 template = template.replace(b'_DATE_', safe_bytes(timestamp))
94 94 template = template.replace(b'_ENV_', safe_bytes(executable))
95 95 template = template.replace(b'_PATH_', safe_bytes(path))
96 96 f.write(template)
97 97 set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
98 98 except OSError:
99 99 log.exception('error writing hook file %s', _hook_file)
100 100 else:
101 101 log.debug('skipping writing hook file')
102 102
103 103 return True
104 104
105 105
106 106 def get_svn_hooks_path(repo_path):
107 107 hooks_path = os.path.join(repo_path, 'hooks')
108 108
109 109 return hooks_path
110 110
111 111
112 112 def install_svn_hooks(repo_path, executable=None, force_create=False):
113 113 """
114 114 Creates RhodeCode hooks inside a svn repository
115 115
116 116 :param repo_path: path to repository
117 117 :param executable: binary executable to put in the hooks
118 118 :param force_create: Create even if same name hook exists
119 119 """
120 120 executable = executable or sys.executable
121 121 hooks_path = get_svn_hooks_path(repo_path)
122 122 if not os.path.isdir(hooks_path):
123 123 os.makedirs(hooks_path, mode=0o777, exist_ok=True)
124 124
125 125 tmpl_post = pkg_resources.resource_string(
126 126 'vcsserver', '/'.join(
127 127 ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
128 128 tmpl_pre = pkg_resources.resource_string(
129 129 'vcsserver', '/'.join(
130 130 ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
131 131
132 132 path = '' # not used for now
133 133 timestamp = datetime.datetime.utcnow().isoformat()
134 134
135 135 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
136 136 log.debug('Installing svn hook in repo %s', repo_path)
137 137 _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
138 138 _rhodecode_hook = check_rhodecode_hook(_hook_file)
139 139
140 140 if _rhodecode_hook or force_create:
141 141 log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
142 142
143 143 env_expand = str([
144 144 ('RC_CORE_BINARY_DIR', vcsserver.settings.BINARY_DIR),
145 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE),
146 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE),
147 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE),
145 ('RC_GIT_EXECUTABLE', vcsserver.settings.GIT_EXECUTABLE()),
146 ('RC_SVN_EXECUTABLE', vcsserver.settings.SVN_EXECUTABLE()),
147 ('RC_SVNLOOK_EXECUTABLE', vcsserver.settings.SVNLOOK_EXECUTABLE()),
148 148
149 149 ])
150 150 try:
151 151 with open(_hook_file, 'wb') as f:
152 152 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
153 153 template = template.replace(b'_DATE_', safe_bytes(timestamp))
154 154 template = template.replace(b'_OS_EXPAND_', safe_bytes(env_expand))
155 155 template = template.replace(b'_ENV_', safe_bytes(executable))
156 156 template = template.replace(b'_PATH_', safe_bytes(path))
157 157
158 158 f.write(template)
159 159 os.chmod(_hook_file, 0o755)
160 160 except OSError:
161 161 log.exception('error writing hook file %s', _hook_file)
162 162 else:
163 163 log.debug('skipping writing hook file')
164 164
165 165 return True
166 166
167 167
168 168 def get_version_from_hook(hook_path):
169 169 version = b''
170 170 hook_content = read_hook_content(hook_path)
171 171 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
172 172 if matches:
173 173 try:
174 174 version = matches.groups()[0]
175 175 log.debug('got version %s from hooks.', version)
176 176 except Exception:
177 177 log.exception("Exception while reading the hook version.")
178 178 return version.replace(b"'", b"")
179 179
180 180
181 181 def check_rhodecode_hook(hook_path):
182 182 """
183 183 Check if the hook was created by RhodeCode
184 184 """
185 185 if not os.path.exists(hook_path):
186 186 return True
187 187
188 188 log.debug('hook exists, checking if it is from RhodeCode')
189 189
190 190 version = get_version_from_hook(hook_path)
191 191 if version:
192 192 return True
193 193
194 194 return False
195 195
196 196
197 197 def read_hook_content(hook_path) -> bytes:
198 198 content = b''
199 199 if os.path.isfile(hook_path):
200 200 with open(hook_path, 'rb') as f:
201 201 content = f.read()
202 202 return content
203 203
204 204
205 205 def get_git_pre_hook_version(repo_path, bare):
206 206 hooks_path = get_git_hooks_path(repo_path, bare)
207 207 _hook_file = os.path.join(hooks_path, 'pre-receive')
208 208 version = get_version_from_hook(_hook_file)
209 209 return version
210 210
211 211
212 212 def get_git_post_hook_version(repo_path, bare):
213 213 hooks_path = get_git_hooks_path(repo_path, bare)
214 214 _hook_file = os.path.join(hooks_path, 'post-receive')
215 215 version = get_version_from_hook(_hook_file)
216 216 return version
217 217
218 218
219 219 def get_svn_pre_hook_version(repo_path):
220 220 hooks_path = get_svn_hooks_path(repo_path)
221 221 _hook_file = os.path.join(hooks_path, 'pre-commit')
222 222 version = get_version_from_hook(_hook_file)
223 223 return version
224 224
225 225
226 226 def get_svn_post_hook_version(repo_path):
227 227 hooks_path = get_svn_hooks_path(repo_path)
228 228 _hook_file = os.path.join(hooks_path, 'post-commit')
229 229 version = get_version_from_hook(_hook_file)
230 230 return version
@@ -1,818 +1,826 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import sys
21 21 import logging
22 22 import collections
23 23 import base64
24 24 import msgpack
25 25 import dataclasses
26 26 import pygit2
27 27
28 28 import http.client
29 29 from celery import Celery
30 30
31 31 import mercurial.scmutil
32 32 import mercurial.node
33 33
34 import vcsserver.settings
35 34 from vcsserver.lib.rc_json import json
36 35 from vcsserver import exceptions, subprocessio, settings
37 36 from vcsserver.str_utils import ascii_str, safe_str
38 37 from vcsserver.remote.git_remote import Repository
39 38
40 39 celery_app = Celery('__vcsserver__')
41 40 log = logging.getLogger(__name__)
42 41
43 42
44 43 class HooksHttpClient:
45 44 proto = 'msgpack.v1'
46 45 connection = None
47 46
48 47 def __init__(self, hooks_uri):
49 48 self.hooks_uri = hooks_uri
50 49
51 50 def __repr__(self):
52 51 return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
53 52
54 53 def __call__(self, method, extras):
55 54 connection = http.client.HTTPConnection(self.hooks_uri)
56 55 # binary msgpack body
57 56 headers, body = self._serialize(method, extras)
58 57 log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
59 58
60 59 try:
61 60 try:
62 61 connection.request('POST', '/', body, headers)
63 62 except Exception as error:
64 63 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
65 64 raise
66 65
67 66 response = connection.getresponse()
68 67 try:
69 68 return msgpack.load(response)
70 69 except Exception:
71 70 response_data = response.read()
72 71 log.exception('Failed to decode hook response json data. '
73 72 'response_code:%s, raw_data:%s',
74 73 response.status, response_data)
75 74 raise
76 75 finally:
77 76 connection.close()
78 77
79 78 @classmethod
80 79 def _serialize(cls, hook_name, extras):
81 80 data = {
82 81 'method': hook_name,
83 82 'extras': extras
84 83 }
85 84 headers = {
86 85 "rc-hooks-protocol": cls.proto,
87 86 "Connection": "keep-alive"
88 87 }
89 88 return headers, msgpack.packb(data)
90 89
91 90
92 91 class HooksCeleryClient:
93 92 TASK_TIMEOUT = 60 # time in seconds
94 93
95 94 def __init__(self, queue, backend):
96 95 celery_app.config_from_object({
97 96 'broker_url': queue, 'result_backend': backend,
98 97 'broker_connection_retry_on_startup': True,
99 98 'task_serializer': 'msgpack',
100 99 'accept_content': ['json', 'msgpack'],
101 100 'result_serializer': 'msgpack',
102 101 'result_accept_content': ['json', 'msgpack']
103 102 })
104 103 self.celery_app = celery_app
105 104
106 105 def __call__(self, method, extras):
107 106 inquired_task = self.celery_app.signature(
108 107 f'rhodecode.lib.celerylib.tasks.{method}'
109 108 )
110 109 return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
111 110
112 111
113 112 class HooksShadowRepoClient:
114 113
115 114 def __call__(self, hook_name, extras):
116 115 return {'output': '', 'status': 0}
117 116
118 117
119 118 class RemoteMessageWriter:
120 119 """Writer base class."""
121 120 def write(self, message):
122 121 raise NotImplementedError()
123 122
124 123
125 124 class HgMessageWriter(RemoteMessageWriter):
126 125 """Writer that knows how to send messages to mercurial clients."""
127 126
128 127 def __init__(self, ui):
129 128 self.ui = ui
130 129
131 130 def write(self, message: str):
132 131 # TODO: Check why the quiet flag is set by default.
133 132 old = self.ui.quiet
134 133 self.ui.quiet = False
135 134 self.ui.status(message.encode('utf-8'))
136 135 self.ui.quiet = old
137 136
138 137
139 138 class GitMessageWriter(RemoteMessageWriter):
140 139 """Writer that knows how to send messages to git clients."""
141 140
142 141 def __init__(self, stdout=None):
143 142 self.stdout = stdout or sys.stdout
144 143
145 144 def write(self, message: str):
146 145 self.stdout.write(message)
147 146
148 147
149 148 class SvnMessageWriter(RemoteMessageWriter):
150 149 """Writer that knows how to send messages to svn clients."""
151 150
152 151 def __init__(self, stderr=None):
153 152 # SVN needs data sent to stderr for back-to-client messaging
154 153 self.stderr = stderr or sys.stderr
155 154
156 155 def write(self, message):
157 156 self.stderr.write(message)
158 157
159 158
160 159 def _handle_exception(result):
161 160 exception_class = result.get('exception')
162 161 exception_traceback = result.get('exception_traceback')
163 162 log.debug('Handling hook-call exception: %s', exception_class)
164 163
165 164 if exception_traceback:
166 165 log.error('Got traceback from remote call:%s', exception_traceback)
167 166
168 167 if exception_class == 'HTTPLockedRC':
169 168 raise exceptions.RepositoryLockedException()(*result['exception_args'])
170 169 elif exception_class == 'HTTPBranchProtected':
171 170 raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
172 171 elif exception_class == 'RepositoryError':
173 172 raise exceptions.VcsException()(*result['exception_args'])
174 173 elif exception_class:
175 174 raise Exception(
176 175 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
177 176 )
178 177
179 178
180 179 def _get_hooks_client(extras):
181 180 hooks_uri = extras.get('hooks_uri')
182 181 task_queue = extras.get('task_queue')
183 182 task_backend = extras.get('task_backend')
184 183 is_shadow_repo = extras.get('is_shadow_repo')
185 184
186 185 if hooks_uri:
187 186 return HooksHttpClient(hooks_uri)
188 187 elif task_queue and task_backend:
189 188 return HooksCeleryClient(task_queue, task_backend)
190 189 elif is_shadow_repo:
191 190 return HooksShadowRepoClient()
192 191 else:
193 192 raise Exception("Hooks client not found!")
194 193
195 194
196 195 def _call_hook(hook_name, extras, writer):
197 196 hooks_client = _get_hooks_client(extras)
198 197 log.debug('Hooks, using client:%s', hooks_client)
199 198 result = hooks_client(hook_name, extras)
200 199 log.debug('Hooks got result: %s', result)
201 200 _handle_exception(result)
202 201 writer.write(result['output'])
203 202
204 203 return result['status']
205 204
206 205
207 206 def _extras_from_ui(ui):
208 207 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
209 208 if not hook_data:
210 209 # maybe it's inside environ ?
211 210 env_hook_data = os.environ.get('RC_SCM_DATA')
212 211 if env_hook_data:
213 212 hook_data = env_hook_data
214 213
215 214 extras = {}
216 215 if hook_data:
217 216 extras = json.loads(hook_data)
218 217 return extras
219 218
220 219
221 220 def _rev_range_hash(repo, node, check_heads=False):
222 221 from vcsserver.hgcompat import get_ctx
223 222
224 223 commits = []
225 224 revs = []
226 225 start = get_ctx(repo, node).rev()
227 226 end = len(repo)
228 227 for rev in range(start, end):
229 228 revs.append(rev)
230 229 ctx = get_ctx(repo, rev)
231 230 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
232 231 branch = safe_str(ctx.branch())
233 232 commits.append((commit_id, branch))
234 233
235 234 parent_heads = []
236 235 if check_heads:
237 236 parent_heads = _check_heads(repo, start, end, revs)
238 237 return commits, parent_heads
239 238
240 239
241 240 def _check_heads(repo, start, end, commits):
242 241 from vcsserver.hgcompat import get_ctx
243 242 changelog = repo.changelog
244 243 parents = set()
245 244
246 245 for new_rev in commits:
247 246 for p in changelog.parentrevs(new_rev):
248 247 if p == mercurial.node.nullrev:
249 248 continue
250 249 if p < start:
251 250 parents.add(p)
252 251
253 252 for p in parents:
254 253 branch = get_ctx(repo, p).branch()
255 254 # The heads descending from that parent, on the same branch
256 255 parent_heads = {p}
257 256 reachable = {p}
258 257 for x in range(p + 1, end):
259 258 if get_ctx(repo, x).branch() != branch:
260 259 continue
261 260 for pp in changelog.parentrevs(x):
262 261 if pp in reachable:
263 262 reachable.add(x)
264 263 parent_heads.discard(pp)
265 264 parent_heads.add(x)
266 265 # More than one head? Suggest merging
267 266 if len(parent_heads) > 1:
268 267 return list(parent_heads)
269 268
270 269 return []
271 270
272 271
273 272 def _get_git_env():
274 273 env = {}
275 274 for k, v in os.environ.items():
276 275 if k.startswith('GIT'):
277 276 env[k] = v
278 277
279 278 # serialized version
280 279 return [(k, v) for k, v in env.items()]
281 280
282 281
283 282 def _get_hg_env(old_rev, new_rev, txnid, repo_path):
284 283 env = {}
285 284 for k, v in os.environ.items():
286 285 if k.startswith('HG'):
287 286 env[k] = v
288 287
289 288 env['HG_NODE'] = old_rev
290 289 env['HG_NODE_LAST'] = new_rev
291 290 env['HG_TXNID'] = txnid
292 291 env['HG_PENDING'] = repo_path
293 292
294 293 return [(k, v) for k, v in env.items()]
295 294
296 295
297 def _fix_hooks_executables():
296 def _fix_hooks_executables(ini_path=''):
298 297 """
299 298 This is a trick to set proper settings.EXECUTABLE paths for certain execution patterns
300 299 especially for subversion where hooks strip entire env, and calling just 'svn' command will most likely fail
301 300 because svn is not on PATH
302 301 """
303 vcsserver.settings.BINARY_DIR = (
304 os.environ.get('RC_BINARY_DIR') or vcsserver.settings.BINARY_DIR)
305 vcsserver.settings.GIT_EXECUTABLE = (
306 os.environ.get('RC_GIT_EXECUTABLE') or vcsserver.settings.GIT_EXECUTABLE)
307 vcsserver.settings.SVN_EXECUTABLE = (
308 os.environ.get('RC_SVN_EXECUTABLE') or vcsserver.settings.SVN_EXECUTABLE)
309 vcsserver.settings.SVNLOOK_EXECUTABLE = (
310 os.environ.get('RC_SVNLOOK_EXECUTABLE') or vcsserver.settings.SVNLOOK_EXECUTABLE)
302 from vcsserver.http_main import sanitize_settings_and_apply_defaults
303 from vcsserver.lib.config_utils import get_app_config_lightweight
304
305 core_binary_dir = settings.BINARY_DIR or '/usr/local/bin/rhodecode_bin/vcs_bin'
306 if ini_path:
307
308 ini_settings = get_app_config_lightweight(ini_path)
309 ini_settings = sanitize_settings_and_apply_defaults({'__file__': ini_path}, ini_settings)
310 core_binary_dir = ini_settings['core.binary_dir']
311
312 settings.BINARY_DIR = core_binary_dir
311 313
312 314
313 315 def repo_size(ui, repo, **kwargs):
314 316 extras = _extras_from_ui(ui)
315 317 return _call_hook('repo_size', extras, HgMessageWriter(ui))
316 318
317 319
318 320 def pre_pull(ui, repo, **kwargs):
319 321 extras = _extras_from_ui(ui)
320 322 return _call_hook('pre_pull', extras, HgMessageWriter(ui))
321 323
322 324
323 325 def pre_pull_ssh(ui, repo, **kwargs):
324 326 extras = _extras_from_ui(ui)
325 327 if extras and extras.get('SSH'):
326 328 return pre_pull(ui, repo, **kwargs)
327 329 return 0
328 330
329 331
330 332 def post_pull(ui, repo, **kwargs):
331 333 extras = _extras_from_ui(ui)
332 334 return _call_hook('post_pull', extras, HgMessageWriter(ui))
333 335
334 336
335 337 def post_pull_ssh(ui, repo, **kwargs):
336 338 extras = _extras_from_ui(ui)
337 339 if extras and extras.get('SSH'):
338 340 return post_pull(ui, repo, **kwargs)
339 341 return 0
340 342
341 343
342 344 def pre_push(ui, repo, node=None, **kwargs):
343 345 """
344 346 Mercurial pre_push hook
345 347 """
346 348 extras = _extras_from_ui(ui)
347 349 detect_force_push = extras.get('detect_force_push')
348 350
349 351 rev_data = []
350 352 hook_type: str = safe_str(kwargs.get('hooktype'))
351 353
352 354 if node and hook_type == 'pretxnchangegroup':
353 355 branches = collections.defaultdict(list)
354 356 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
355 357 for commit_id, branch in commits:
356 358 branches[branch].append(commit_id)
357 359
358 360 for branch, commits in branches.items():
359 361 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
360 362 rev_data.append({
361 363 'total_commits': len(commits),
362 364 'old_rev': old_rev,
363 365 'new_rev': commits[-1],
364 366 'ref': '',
365 367 'type': 'branch',
366 368 'name': branch,
367 369 })
368 370
369 371 for push_ref in rev_data:
370 372 push_ref['multiple_heads'] = _heads
371 373
372 374 repo_path = os.path.join(
373 375 extras.get('repo_store', ''), extras.get('repository', ''))
374 376 push_ref['hg_env'] = _get_hg_env(
375 377 old_rev=push_ref['old_rev'],
376 378 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
377 379 repo_path=repo_path)
378 380
379 381 extras['hook_type'] = hook_type or 'pre_push'
380 382 extras['commit_ids'] = rev_data
381 383
382 384 return _call_hook('pre_push', extras, HgMessageWriter(ui))
383 385
384 386
385 387 def pre_push_ssh(ui, repo, node=None, **kwargs):
386 388 extras = _extras_from_ui(ui)
387 389 if extras.get('SSH'):
388 390 return pre_push(ui, repo, node, **kwargs)
389 391
390 392 return 0
391 393
392 394
393 395 def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
394 396 """
395 397 Mercurial pre_push hook for SSH
396 398 """
397 399 extras = _extras_from_ui(ui)
398 400 if extras.get('SSH'):
399 401 permission = extras['SSH_PERMISSIONS']
400 402
401 403 if 'repository.write' == permission or 'repository.admin' == permission:
402 404 return 0
403 405
404 406 # non-zero ret code
405 407 return 1
406 408
407 409 return 0
408 410
409 411
410 412 def post_push(ui, repo, node, **kwargs):
411 413 """
412 414 Mercurial post_push hook
413 415 """
414 416 extras = _extras_from_ui(ui)
415 417
416 418 commit_ids = []
417 419 branches = []
418 420 bookmarks = []
419 421 tags = []
420 422 hook_type: str = safe_str(kwargs.get('hooktype'))
421 423
422 424 commits, _heads = _rev_range_hash(repo, node)
423 425 for commit_id, branch in commits:
424 426 commit_ids.append(commit_id)
425 427 if branch not in branches:
426 428 branches.append(branch)
427 429
428 430 if hasattr(ui, '_rc_pushkey_bookmarks'):
429 431 bookmarks = ui._rc_pushkey_bookmarks
430 432
431 433 extras['hook_type'] = hook_type or 'post_push'
432 434 extras['commit_ids'] = commit_ids
433 435
434 436 extras['new_refs'] = {
435 437 'branches': branches,
436 438 'bookmarks': bookmarks,
437 439 'tags': tags
438 440 }
439 441
440 442 return _call_hook('post_push', extras, HgMessageWriter(ui))
441 443
442 444
443 445 def post_push_ssh(ui, repo, node, **kwargs):
444 446 """
445 447 Mercurial post_push hook for SSH
446 448 """
447 449 if _extras_from_ui(ui).get('SSH'):
448 450 return post_push(ui, repo, node, **kwargs)
449 451 return 0
450 452
451 453
452 454 def key_push(ui, repo, **kwargs):
453 455 from vcsserver.hgcompat import get_ctx
454 456
455 457 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
456 458 # store new bookmarks in our UI object propagated later to post_push
457 459 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
458 460 return
459 461
460 462
461 463 # backward compat
462 464 log_pull_action = post_pull
463 465
464 466 # backward compat
465 467 log_push_action = post_push
466 468
467 469
468 470 def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
469 471 """
470 472 Old hook name: keep here for backward compatibility.
471 473
472 474 This is only required when the installed git hooks are not upgraded.
473 475 """
474 476 pass
475 477
476 478
477 479 def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
478 480 """
479 481 Old hook name: keep here for backward compatibility.
480 482
481 483 This is only required when the installed git hooks are not upgraded.
482 484 """
483 485 pass
484 486
485 487
486 488 @dataclasses.dataclass
487 489 class HookResponse:
488 490 status: int
489 491 output: str
490 492
491 493
492 494 def git_pre_pull(extras) -> HookResponse:
493 495 """
494 496 Pre pull hook.
495 497
496 498 :param extras: dictionary containing the keys defined in simplevcs
497 499 :type extras: dict
498 500
499 501 :return: status code of the hook. 0 for success.
500 502 :rtype: int
501 503 """
502 504
503 505 if 'pull' not in extras['hooks']:
504 506 return HookResponse(0, '')
505 507
506 508 stdout = io.StringIO()
507 509 try:
508 510 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
509 511
510 512 except Exception as error:
511 513 log.exception('Failed to call pre_pull hook')
512 514 status_code = 128
513 515 stdout.write(f'ERROR: {error}\n')
514 516
515 517 return HookResponse(status_code, stdout.getvalue())
516 518
517 519
518 520 def git_post_pull(extras) -> HookResponse:
519 521 """
520 522 Post pull hook.
521 523
522 524 :param extras: dictionary containing the keys defined in simplevcs
523 525 :type extras: dict
524 526
525 527 :return: status code of the hook. 0 for success.
526 528 :rtype: int
527 529 """
528 530 if 'pull' not in extras['hooks']:
529 531 return HookResponse(0, '')
530 532
531 533 stdout = io.StringIO()
532 534 try:
533 535 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
534 536 except Exception as error:
535 537 status = 128
536 538 stdout.write(f'ERROR: {error}\n')
537 539
538 540 return HookResponse(status, stdout.getvalue())
539 541
540 542
541 543 def _parse_git_ref_lines(revision_lines):
542 544 rev_data = []
543 545 for revision_line in revision_lines or []:
544 546 old_rev, new_rev, ref = revision_line.strip().split(' ')
545 547 ref_data = ref.split('/', 2)
546 548 if ref_data[1] in ('tags', 'heads'):
547 549 rev_data.append({
548 550 # NOTE(marcink):
549 551 # we're unable to tell total_commits for git at this point
550 552 # but we set the variable for consistency with GIT
551 553 'total_commits': -1,
552 554 'old_rev': old_rev,
553 555 'new_rev': new_rev,
554 556 'ref': ref,
555 557 'type': ref_data[1],
556 558 'name': ref_data[2],
557 559 })
558 560 return rev_data
559 561
560 562
561 563 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
562 564 """
563 565 Pre push hook.
564 566
565 567 :return: status code of the hook. 0 for success.
566 568 """
567 569 extras = json.loads(env['RC_SCM_DATA'])
568 570 rev_data = _parse_git_ref_lines(revision_lines)
569 571 if 'push' not in extras['hooks']:
570 572 return 0
573 _fix_hooks_executables()
574
571 575 empty_commit_id = '0' * 40
572 576
573 577 detect_force_push = extras.get('detect_force_push')
574 _fix_hooks_executables()
578
575 579 for push_ref in rev_data:
576 580 # store our git-env which holds the temp store
577 581 push_ref['git_env'] = _get_git_env()
578 582 push_ref['pruned_sha'] = ''
579 583 if not detect_force_push:
580 584 # don't check for forced-push when we don't need to
581 585 continue
582 586
583 587 type_ = push_ref['type']
584 588 new_branch = push_ref['old_rev'] == empty_commit_id
585 589 delete_branch = push_ref['new_rev'] == empty_commit_id
586 590 if type_ == 'heads' and not (new_branch or delete_branch):
587 591 old_rev = push_ref['old_rev']
588 592 new_rev = push_ref['new_rev']
589 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
593 cmd = [settings.GIT_EXECUTABLE(), 'rev-list', old_rev, f'^{new_rev}']
590 594 stdout, stderr = subprocessio.run_command(
591 595 cmd, env=os.environ.copy())
592 596 # means we're having some non-reachable objects, this forced push was used
593 597 if stdout:
594 598 push_ref['pruned_sha'] = stdout.splitlines()
595 599
596 600 extras['hook_type'] = 'pre_receive'
597 601 extras['commit_ids'] = rev_data
598 602
599 603 stdout = sys.stdout
600 604 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
601 605
602 606 return status_code
603 607
604 608
605 609 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
606 610 """
607 611 Post push hook.
608 612
609 613 :return: status code of the hook. 0 for success.
610 614 """
611 615 extras = json.loads(env['RC_SCM_DATA'])
612 616 if 'push' not in extras['hooks']:
613 617 return 0
618
614 619 _fix_hooks_executables()
615 620
616 621 rev_data = _parse_git_ref_lines(revision_lines)
617 622
618 623 git_revs = []
619 624
620 625 # N.B.(skreft): it is ok to just call git, as git before calling a
621 626 # subcommand sets the PATH environment variable so that it point to the
622 627 # correct version of the git executable.
623 628 empty_commit_id = '0' * 40
624 629 branches = []
625 630 tags = []
626 631 for push_ref in rev_data:
627 632 type_ = push_ref['type']
628 633
629 634 if type_ == 'heads':
630 635 # starting new branch case
631 636 if push_ref['old_rev'] == empty_commit_id:
632 637 push_ref_name = push_ref['name']
633 638
634 639 if push_ref_name not in branches:
635 640 branches.append(push_ref_name)
636 641
637 642 need_head_set = ''
638 643 with Repository(os.getcwd()) as repo:
639 644 try:
640 645 repo.head
641 646 except pygit2.GitError:
642 647 need_head_set = f'refs/heads/{push_ref_name}'
643 648
644 649 if need_head_set:
645 650 repo.set_head(need_head_set)
646 651 print(f"Setting default branch to {push_ref_name}")
647 652
648 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
653 cmd = [settings.GIT_EXECUTABLE(), 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
649 654 stdout, stderr = subprocessio.run_command(
650 655 cmd, env=os.environ.copy())
651 656 heads = safe_str(stdout)
652 657 heads = heads.replace(push_ref['ref'], '')
653 658 heads = ' '.join(head for head
654 659 in heads.splitlines() if head) or '.'
655 cmd = [settings.GIT_EXECUTABLE, 'log', '--reverse',
660 cmd = [settings.GIT_EXECUTABLE(), 'log', '--reverse',
656 661 '--pretty=format:%H', '--', push_ref['new_rev'],
657 662 '--not', heads]
658 663 stdout, stderr = subprocessio.run_command(
659 664 cmd, env=os.environ.copy())
660 665 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
661 666
662 667 # delete branch case
663 668 elif push_ref['new_rev'] == empty_commit_id:
664 669 git_revs.append(f'delete_branch=>{push_ref["name"]}')
665 670 else:
666 671 if push_ref['name'] not in branches:
667 672 branches.append(push_ref['name'])
668 673
669 cmd = [settings.GIT_EXECUTABLE, 'log',
674 cmd = [settings.GIT_EXECUTABLE(), 'log',
670 675 f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
671 676 '--reverse', '--pretty=format:%H']
672 677 stdout, stderr = subprocessio.run_command(
673 678 cmd, env=os.environ.copy())
674 679 # we get bytes from stdout, we need str to be consistent
675 680 log_revs = list(map(ascii_str, stdout.splitlines()))
676 681 git_revs.extend(log_revs)
677 682
678 683 # Pure pygit2 impl. but still 2-3x slower :/
679 684 # results = []
680 685 #
681 686 # with Repository(os.getcwd()) as repo:
682 687 # repo_new_rev = repo[push_ref['new_rev']]
683 688 # repo_old_rev = repo[push_ref['old_rev']]
684 689 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
685 690 #
686 691 # for commit in walker:
687 692 # if commit.id == repo_old_rev.id:
688 693 # break
689 694 # results.append(commit.id.hex)
690 695 # # reverse the order, can't use GIT_SORT_REVERSE
691 696 # log_revs = results[::-1]
692 697
693 698 elif type_ == 'tags':
694 699 if push_ref['name'] not in tags:
695 700 tags.append(push_ref['name'])
696 701 git_revs.append(f'tag=>{push_ref["name"]}')
697 702
698 703 extras['hook_type'] = 'post_receive'
699 704 extras['commit_ids'] = git_revs
700 705 extras['new_refs'] = {
701 706 'branches': branches,
702 707 'bookmarks': [],
703 708 'tags': tags,
704 709 }
705 710
706 711 stdout = sys.stdout
707 712
708 713 if 'repo_size' in extras['hooks']:
709 714 try:
710 715 _call_hook('repo_size', extras, GitMessageWriter(stdout))
711 716 except Exception:
712 717 pass
713 718
714 719 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
715 720 return status_code
716 721
717 722
718 723 def _get_extras_from_txn_id(path, txn_id):
724 _fix_hooks_executables()
725
719 726 extras = {}
720 727 try:
721 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
728 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
722 729 '-t', txn_id,
723 730 '--revprop', path, 'rc-scm-extras']
724 731 stdout, stderr = subprocessio.run_command(
725 732 cmd, env=os.environ.copy())
726 733 extras = json.loads(base64.urlsafe_b64decode(stdout))
727 734 except Exception:
728 735 log.exception('Failed to extract extras info from txn_id')
729 736
730 737 return extras
731 738
732 739
733 740 def _get_extras_from_commit_id(commit_id, path):
741 _fix_hooks_executables()
742
734 743 extras = {}
735 744 try:
736 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
745 cmd = [settings.SVNLOOK_EXECUTABLE(), 'pget',
737 746 '-r', commit_id,
738 747 '--revprop', path, 'rc-scm-extras']
739 748 stdout, stderr = subprocessio.run_command(
740 749 cmd, env=os.environ.copy())
741 750 extras = json.loads(base64.urlsafe_b64decode(stdout))
742 751 except Exception:
743 752 log.exception('Failed to extract extras info from commit_id')
744 753
745 754 return extras
746 755
747 756
748 757 def svn_pre_commit(repo_path, commit_data, env):
758
749 759 path, txn_id = commit_data
750 760 branches = []
751 761 tags = []
752 762
753 _fix_hooks_executables()
754 763 if env.get('RC_SCM_DATA'):
755 764 extras = json.loads(env['RC_SCM_DATA'])
756 765 else:
757 766 # fallback method to read from TXN-ID stored data
758 767 extras = _get_extras_from_txn_id(path, txn_id)
759 768 if not extras:
760 769 return 0
761 770
762 771 extras['hook_type'] = 'pre_commit'
763 772 extras['commit_ids'] = [txn_id]
764 773 extras['txn_id'] = txn_id
765 774 extras['new_refs'] = {
766 775 'total_commits': 1,
767 776 'branches': branches,
768 777 'bookmarks': [],
769 778 'tags': tags,
770 779 }
771 780
772 781 return _call_hook('pre_push', extras, SvnMessageWriter())
773 782
774 783
775 784 def svn_post_commit(repo_path, commit_data, env):
776 785 """
777 786 commit_data is path, rev, txn_id
778 787 """
779 788
780 789 if len(commit_data) == 3:
781 790 path, commit_id, txn_id = commit_data
782 791 elif len(commit_data) == 2:
783 792 log.error('Failed to extract txn_id from commit_data using legacy method. '
784 793 'Some functionality might be limited')
785 794 path, commit_id = commit_data
786 795 txn_id = None
787 796 else:
788 797 return 0
789 798
790 799 branches = []
791 800 tags = []
792 801
793 _fix_hooks_executables()
794 802 if env.get('RC_SCM_DATA'):
795 803 extras = json.loads(env['RC_SCM_DATA'])
796 804 else:
797 805 # fallback method to read from TXN-ID stored data
798 806 extras = _get_extras_from_commit_id(commit_id, path)
799 807 if not extras:
800 808 return 0
801 809
802 810 extras['hook_type'] = 'post_commit'
803 811 extras['commit_ids'] = [commit_id]
804 812 extras['txn_id'] = txn_id
805 813 extras['new_refs'] = {
806 814 'branches': branches,
807 815 'bookmarks': [],
808 816 'tags': tags,
809 817 'total_commits': 1,
810 818 }
811 819
812 820 if 'repo_size' in extras['hooks']:
813 821 try:
814 822 _call_hook('repo_size', extras, SvnMessageWriter())
815 823 except Exception:
816 824 pass
817 825
818 826 return _call_hook('post_push', extras, SvnMessageWriter())
@@ -1,777 +1,774 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import platform
21 21 import sys
22 22 import locale
23 23 import logging
24 24 import uuid
25 25 import time
26 26 import wsgiref.util
27 27 import tempfile
28 28 import psutil
29 29
30 30 from itertools import chain
31 31
32 32 import msgpack
33 33 import configparser
34 34
35 35 from pyramid.config import Configurator
36 36 from pyramid.wsgi import wsgiapp
37 37 from pyramid.response import Response
38 38
39 39 from vcsserver.base import BytesEnvelope, BinaryEnvelope
40 40 from vcsserver.lib.rc_json import json
41 41 from vcsserver.config.settings_maker import SettingsMaker
42 42 from vcsserver.str_utils import safe_int
43 43 from vcsserver.lib.statsd_client import StatsdClient
44 44 from vcsserver.tweens.request_wrapper import get_headers_call_context
45 45
46 46 import vcsserver
47 47 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
48 48 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
49 49 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
50 50 from vcsserver.echo_stub.echo_app import EchoApp
51 51 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
52 52 from vcsserver.lib.exc_tracking import store_exception, format_exc
53 53 from vcsserver.server import VcsServer
54 54
55 55 strict_vcs = True
56 56
57 57 git_import_err = None
58 58 try:
59 59 from vcsserver.remote.git_remote import GitFactory, GitRemote
60 60 except ImportError as e:
61 61 GitFactory = None
62 62 GitRemote = None
63 63 git_import_err = e
64 64 if strict_vcs:
65 65 raise
66 66
67 67
68 68 hg_import_err = None
69 69 try:
70 70 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
71 71 except ImportError as e:
72 72 MercurialFactory = None
73 73 HgRemote = None
74 74 hg_import_err = e
75 75 if strict_vcs:
76 76 raise
77 77
78 78
79 79 svn_import_err = None
80 80 try:
81 81 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
82 82 except ImportError as e:
83 83 SubversionFactory = None
84 84 SvnRemote = None
85 85 svn_import_err = e
86 86 if strict_vcs:
87 87 raise
88 88
89 89 log = logging.getLogger(__name__)
90 90
91 91 # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
92 92 # causing problems and "fix" it in case they do and fallback to LC_ALL = C
93 93
94 94 try:
95 95 locale.setlocale(locale.LC_ALL, '')
96 96 except locale.Error as e:
97 97 log.error(
98 98 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
99 99 os.environ['LC_ALL'] = 'C'
100 100
101 101
102 102 def _is_request_chunked(environ):
103 103 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
104 104 return stream
105 105
106 106
107 107 def log_max_fd():
108 108 try:
109 109 maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
110 110 log.info('Max file descriptors value: %s', maxfd)
111 111 except Exception:
112 112 pass
113 113
114 114
115 115 class VCS:
116 116 def __init__(self, locale_conf=None, cache_config=None):
117 117 self.locale = locale_conf
118 118 self.cache_config = cache_config
119 119 self._configure_locale()
120 120
121 121 log_max_fd()
122 122
123 123 if GitFactory and GitRemote:
124 124 git_factory = GitFactory()
125 125 self._git_remote = GitRemote(git_factory)
126 126 else:
127 127 log.error("Git client import failed: %s", git_import_err)
128 128
129 129 if MercurialFactory and HgRemote:
130 130 hg_factory = MercurialFactory()
131 131 self._hg_remote = HgRemote(hg_factory)
132 132 else:
133 133 log.error("Mercurial client import failed: %s", hg_import_err)
134 134
135 135 if SubversionFactory and SvnRemote:
136 136 svn_factory = SubversionFactory()
137 137
138 138 # hg factory is used for svn url validation
139 139 hg_factory = MercurialFactory()
140 140 self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
141 141 else:
142 142 log.error("Subversion client import failed: %s", svn_import_err)
143 143
144 144 self._vcsserver = VcsServer()
145 145
146 146 def _configure_locale(self):
147 147 if self.locale:
148 148 log.info('Settings locale: `LC_ALL` to %s', self.locale)
149 149 else:
150 150 log.info('Configuring locale subsystem based on environment variables')
151 151 try:
152 152 # If self.locale is the empty string, then the locale
153 153 # module will use the environment variables. See the
154 154 # documentation of the package `locale`.
155 155 locale.setlocale(locale.LC_ALL, self.locale)
156 156
157 157 language_code, encoding = locale.getlocale()
158 158 log.info(
159 159 'Locale set to language code "%s" with encoding "%s".',
160 160 language_code, encoding)
161 161 except locale.Error:
162 162 log.exception('Cannot set locale, not configuring the locale system')
163 163
164 164
165 165 class WsgiProxy:
166 166 def __init__(self, wsgi):
167 167 self.wsgi = wsgi
168 168
169 169 def __call__(self, environ, start_response):
170 170 input_data = environ['wsgi.input'].read()
171 171 input_data = msgpack.unpackb(input_data)
172 172
173 173 error = None
174 174 try:
175 175 data, status, headers = self.wsgi.handle(
176 176 input_data['environment'], input_data['input_data'],
177 177 *input_data['args'], **input_data['kwargs'])
178 178 except Exception as e:
179 179 data, status, headers = [], None, None
180 180 error = {
181 181 'message': str(e),
182 182 '_vcs_kind': getattr(e, '_vcs_kind', None)
183 183 }
184 184
185 185 start_response(200, {})
186 186 return self._iterator(error, status, headers, data)
187 187
188 188 def _iterator(self, error, status, headers, data):
189 189 initial_data = [
190 190 error,
191 191 status,
192 192 headers,
193 193 ]
194 194
195 195 for d in chain(initial_data, data):
196 196 yield msgpack.packb(d)
197 197
198 198
199 199 def not_found(request):
200 200 return {'status': '404 NOT FOUND'}
201 201
202 202
203 203 class VCSViewPredicate:
204 204 def __init__(self, val, config):
205 205 self.remotes = val
206 206
207 207 def text(self):
208 208 return f'vcs view method = {list(self.remotes.keys())}'
209 209
210 210 phash = text
211 211
212 212 def __call__(self, context, request):
213 213 """
214 214 View predicate that returns true if given backend is supported by
215 215 defined remotes.
216 216 """
217 217 backend = request.matchdict.get('backend')
218 218 return backend in self.remotes
219 219
220 220
221 221 class HTTPApplication:
222 222 ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
223 223
224 224 remote_wsgi = remote_wsgi
225 225 _use_echo_app = False
226 226
227 227 def __init__(self, settings=None, global_config=None):
228 228
229 229 self.config = Configurator(settings=settings)
230 230 # Init our statsd at very start
231 231 self.config.registry.statsd = StatsdClient.statsd
232 232 self.config.registry.vcs_call_context = {}
233 233
234 234 self.global_config = global_config
235 235 self.config.include('vcsserver.lib.rc_cache')
236 236 self.config.include('vcsserver.lib.rc_cache.archive_cache')
237 237
238 238 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
239 239 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
240 240 self._remotes = {
241 241 'hg': vcs._hg_remote,
242 242 'git': vcs._git_remote,
243 243 'svn': vcs._svn_remote,
244 244 'server': vcs._vcsserver,
245 245 }
246 246 if settings.get('dev.use_echo_app', 'false').lower() == 'true':
247 247 self._use_echo_app = True
248 248 log.warning("Using EchoApp for VCS operations.")
249 249 self.remote_wsgi = remote_wsgi_stub
250 250
251 251 self._configure_settings(global_config, settings)
252 252
253 253 self._configure()
254 254
255 255 def _configure_settings(self, global_config, app_settings):
256 256 """
257 257 Configure the settings module.
258 258 """
259 259 settings_merged = global_config.copy()
260 260 settings_merged.update(app_settings)
261 261
262 262 binary_dir = app_settings['core.binary_dir']
263 263
264 264 settings.BINARY_DIR = binary_dir
265 265
266 # from core.binary dir we set executable paths
267 settings.GIT_EXECUTABLE = os.path.join(binary_dir, settings.GIT_EXECUTABLE)
268 settings.SVN_EXECUTABLE = os.path.join(binary_dir, settings.SVN_EXECUTABLE)
269 settings.SVNLOOK_EXECUTABLE = os.path.join(binary_dir, settings.SVNLOOK_EXECUTABLE)
270
271 266 # Store the settings to make them available to other modules.
272 267 vcsserver.PYRAMID_SETTINGS = settings_merged
273 268 vcsserver.CONFIG = settings_merged
274 269
275 270 def _configure(self):
276 271 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
277 272
278 273 self.config.add_route('service', '/_service')
279 274 self.config.add_route('status', '/status')
280 275 self.config.add_route('hg_proxy', '/proxy/hg')
281 276 self.config.add_route('git_proxy', '/proxy/git')
282 277
283 278 # rpc methods
284 279 self.config.add_route('vcs', '/{backend}')
285 280
286 281 # streaming rpc remote methods
287 282 self.config.add_route('vcs_stream', '/{backend}/stream')
288 283
289 284 # vcs operations clone/push as streaming
290 285 self.config.add_route('stream_git', '/stream/git/*repo_name')
291 286 self.config.add_route('stream_hg', '/stream/hg/*repo_name')
292 287
293 288 self.config.add_view(self.status_view, route_name='status', renderer='json')
294 289 self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
295 290
296 291 self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
297 292 self.config.add_view(self.git_proxy(), route_name='git_proxy')
298 293 self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
299 294 vcs_view=self._remotes)
300 295 self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
301 296 vcs_view=self._remotes)
302 297
303 298 self.config.add_view(self.hg_stream(), route_name='stream_hg')
304 299 self.config.add_view(self.git_stream(), route_name='stream_git')
305 300
306 301 self.config.add_view_predicate('vcs_view', VCSViewPredicate)
307 302
308 303 self.config.add_notfound_view(not_found, renderer='json')
309 304
310 305 self.config.add_view(self.handle_vcs_exception, context=Exception)
311 306
312 307 self.config.add_tween(
313 308 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
314 309 )
315 310 self.config.add_request_method(
316 311 'vcsserver.lib.request_counter.get_request_counter',
317 312 'request_count')
318 313
319 314 def wsgi_app(self):
320 315 return self.config.make_wsgi_app()
321 316
322 317 def _vcs_view_params(self, request):
323 318 remote = self._remotes[request.matchdict['backend']]
324 319 payload = msgpack.unpackb(request.body, use_list=True)
325 320
326 321 method = payload.get('method')
327 322 params = payload['params']
328 323 wire = params.get('wire')
329 324 args = params.get('args')
330 325 kwargs = params.get('kwargs')
331 326 context_uid = None
332 327
333 328 request.registry.vcs_call_context = {
334 329 'method': method,
335 330 'repo_name': payload.get('_repo_name'),
336 331 }
337 332
338 333 if wire:
339 334 try:
340 335 wire['context'] = context_uid = uuid.UUID(wire['context'])
341 336 except KeyError:
342 337 pass
343 338 args.insert(0, wire)
344 339 repo_state_uid = wire.get('repo_state_uid') if wire else None
345 340
346 341 # NOTE(marcink): trading complexity for slight performance
347 342 if log.isEnabledFor(logging.DEBUG):
348 343 # also we SKIP printing out any of those methods args since they maybe excessive
349 344 just_args_methods = {
350 345 'commitctx': ('content', 'removed', 'updated'),
351 346 'commit': ('content', 'removed', 'updated')
352 347 }
353 348 if method in just_args_methods:
354 349 skip_args = just_args_methods[method]
355 350 call_args = ''
356 351 call_kwargs = {}
357 352 for k in kwargs:
358 353 if k in skip_args:
359 354 # replace our skip key with dummy
360 355 call_kwargs[k] = f'RemovedParam({k})'
361 356 else:
362 357 call_kwargs[k] = kwargs[k]
363 358 else:
364 359 call_args = args[1:]
365 360 call_kwargs = kwargs
366 361
367 362 log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
368 363 method, call_args, call_kwargs, context_uid, repo_state_uid)
369 364
370 365 statsd = request.registry.statsd
371 366 if statsd:
372 367 statsd.incr(
373 368 'vcsserver_method_total', tags=[
374 369 f"method:{method}",
375 370 ])
376 371 return payload, remote, method, args, kwargs
377 372
378 373 def vcs_view(self, request):
379 374
380 375 payload, remote, method, args, kwargs = self._vcs_view_params(request)
381 376 payload_id = payload.get('id')
382 377
383 378 try:
384 379 resp = getattr(remote, method)(*args, **kwargs)
385 380 except Exception as e:
386 381 exc_info = list(sys.exc_info())
387 382 exc_type, exc_value, exc_traceback = exc_info
388 383
389 384 org_exc = getattr(e, '_org_exc', None)
390 385 org_exc_name = None
391 386 org_exc_tb = ''
392 387 if org_exc:
393 388 org_exc_name = org_exc.__class__.__name__
394 389 org_exc_tb = getattr(e, '_org_exc_tb', '')
395 390 # replace our "faked" exception with our org
396 391 exc_info[0] = org_exc.__class__
397 392 exc_info[1] = org_exc
398 393
399 394 should_store_exc = True
400 395 if org_exc:
401 396 def get_exc_fqn(_exc_obj):
402 397 module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
403 398 return module_name + '.' + org_exc_name
404 399
405 400 exc_fqn = get_exc_fqn(org_exc)
406 401
407 402 if exc_fqn in ['mercurial.error.RepoLookupError',
408 403 'vcsserver.exceptions.RefNotFoundException']:
409 404 should_store_exc = False
410 405
411 406 if should_store_exc:
412 407 store_exception(id(exc_info), exc_info, request_path=request.path)
413 408
414 409 tb_info = format_exc(exc_info)
415 410
416 411 type_ = e.__class__.__name__
417 412 if type_ not in self.ALLOWED_EXCEPTIONS:
418 413 type_ = None
419 414
420 415 resp = {
421 416 'id': payload_id,
422 417 'error': {
423 418 'message': str(e),
424 419 'traceback': tb_info,
425 420 'org_exc': org_exc_name,
426 421 'org_exc_tb': org_exc_tb,
427 422 'type': type_
428 423 }
429 424 }
430 425
431 426 try:
432 427 resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
433 428 except AttributeError:
434 429 pass
435 430 else:
436 431 resp = {
437 432 'id': payload_id,
438 433 'result': resp
439 434 }
440 435 log.debug('Serving data for method %s', method)
441 436 return resp
442 437
443 438 def vcs_stream_view(self, request):
444 439 payload, remote, method, args, kwargs = self._vcs_view_params(request)
445 440 # this method has a stream: marker we remove it here
446 441 method = method.split('stream:')[-1]
447 442 chunk_size = safe_int(payload.get('chunk_size')) or 4096
448 443
449 444 resp = getattr(remote, method)(*args, **kwargs)
450 445
451 446 def get_chunked_data(method_resp):
452 447 stream = io.BytesIO(method_resp)
453 448 while 1:
454 449 chunk = stream.read(chunk_size)
455 450 if not chunk:
456 451 break
457 452 yield chunk
458 453
459 454 response = Response(app_iter=get_chunked_data(resp))
460 455 response.content_type = 'application/octet-stream'
461 456
462 457 return response
463 458
464 459 def status_view(self, request):
465 460 import vcsserver
466 461 _platform_id = platform.uname()[1] or 'instance'
467 462
468 463 return {
469 464 "status": "OK",
470 465 "vcsserver_version": vcsserver.get_version(),
471 466 "platform": _platform_id,
472 467 "pid": os.getpid(),
473 468 }
474 469
475 470 def service_view(self, request):
476 471 import vcsserver
477 472
478 473 payload = msgpack.unpackb(request.body, use_list=True)
479 474 server_config, app_config = {}, {}
480 475
481 476 try:
482 477 path = self.global_config['__file__']
483 478 config = configparser.RawConfigParser()
484 479
485 480 config.read(path)
486 481
487 482 if config.has_section('server:main'):
488 483 server_config = dict(config.items('server:main'))
489 484 if config.has_section('app:main'):
490 485 app_config = dict(config.items('app:main'))
491 486
492 487 except Exception:
493 488 log.exception('Failed to read .ini file for display')
494 489
495 490 environ = list(os.environ.items())
496 491
497 492 resp = {
498 493 'id': payload.get('id'),
499 494 'result': dict(
500 495 version=vcsserver.get_version(),
501 496 config=server_config,
502 497 app_config=app_config,
503 498 environ=environ,
504 499 payload=payload,
505 500 )
506 501 }
507 502 return resp
508 503
509 504 def _msgpack_renderer_factory(self, info):
510 505
511 506 def _render(value, system):
512 507 bin_type = False
513 508 res = value.get('result')
514 509 if isinstance(res, BytesEnvelope):
515 510 log.debug('Result is wrapped in BytesEnvelope type')
516 511 bin_type = True
517 512 elif isinstance(res, BinaryEnvelope):
518 513 log.debug('Result is wrapped in BinaryEnvelope type')
519 514 value['result'] = res.val
520 515 bin_type = True
521 516
522 517 request = system.get('request')
523 518 if request is not None:
524 519 response = request.response
525 520 ct = response.content_type
526 521 if ct == response.default_content_type:
527 522 response.content_type = 'application/x-msgpack'
528 523 if bin_type:
529 524 response.content_type = 'application/x-msgpack-bin'
530 525
531 526 return msgpack.packb(value, use_bin_type=bin_type)
532 527 return _render
533 528
534 529 def set_env_from_config(self, environ, config):
535 530 dict_conf = {}
536 531 try:
537 532 for elem in config:
538 533 if elem[0] == 'rhodecode':
539 534 dict_conf = json.loads(elem[2])
540 535 break
541 536 except Exception:
542 537 log.exception('Failed to fetch SCM CONFIG')
543 538 return
544 539
545 540 username = dict_conf.get('username')
546 541 if username:
547 542 environ['REMOTE_USER'] = username
548 543 # mercurial specific, some extension api rely on this
549 544 environ['HGUSER'] = username
550 545
551 546 ip = dict_conf.get('ip')
552 547 if ip:
553 548 environ['REMOTE_HOST'] = ip
554 549
555 550 if _is_request_chunked(environ):
556 551 # set the compatibility flag for webob
557 552 environ['wsgi.input_terminated'] = True
558 553
559 554 def hg_proxy(self):
560 555 @wsgiapp
561 556 def _hg_proxy(environ, start_response):
562 557 app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
563 558 return app(environ, start_response)
564 559 return _hg_proxy
565 560
566 561 def git_proxy(self):
567 562 @wsgiapp
568 563 def _git_proxy(environ, start_response):
569 564 app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
570 565 return app(environ, start_response)
571 566 return _git_proxy
572 567
573 568 def hg_stream(self):
574 569 if self._use_echo_app:
575 570 @wsgiapp
576 571 def _hg_stream(environ, start_response):
577 572 app = EchoApp('fake_path', 'fake_name', None)
578 573 return app(environ, start_response)
579 574 return _hg_stream
580 575 else:
581 576 @wsgiapp
582 577 def _hg_stream(environ, start_response):
583 578 log.debug('http-app: handling hg stream')
584 579 call_context = get_headers_call_context(environ)
585 580
586 581 repo_path = call_context['repo_path']
587 582 repo_name = call_context['repo_name']
588 583 config = call_context['repo_config']
589 584
590 585 app = scm_app.create_hg_wsgi_app(
591 586 repo_path, repo_name, config)
592 587
593 588 # Consistent path information for hgweb
594 589 environ['PATH_INFO'] = call_context['path_info']
595 590 environ['REPO_NAME'] = repo_name
596 591 self.set_env_from_config(environ, config)
597 592
598 593 log.debug('http-app: starting app handler '
599 594 'with %s and process request', app)
600 595 return app(environ, ResponseFilter(start_response))
601 596 return _hg_stream
602 597
603 598 def git_stream(self):
604 599 if self._use_echo_app:
605 600 @wsgiapp
606 601 def _git_stream(environ, start_response):
607 602 app = EchoApp('fake_path', 'fake_name', None)
608 603 return app(environ, start_response)
609 604 return _git_stream
610 605 else:
611 606 @wsgiapp
612 607 def _git_stream(environ, start_response):
613 608 log.debug('http-app: handling git stream')
614 609
615 610 call_context = get_headers_call_context(environ)
616 611
617 612 repo_path = call_context['repo_path']
618 613 repo_name = call_context['repo_name']
619 614 config = call_context['repo_config']
620 615
621 616 environ['PATH_INFO'] = call_context['path_info']
622 617 self.set_env_from_config(environ, config)
623 618
624 619 content_type = environ.get('CONTENT_TYPE', '')
625 620
626 621 path = environ['PATH_INFO']
627 622 is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
628 623 log.debug(
629 624 'LFS: Detecting if request `%s` is LFS server path based '
630 625 'on content type:`%s`, is_lfs:%s',
631 626 path, content_type, is_lfs_request)
632 627
633 628 if not is_lfs_request:
634 629 # fallback detection by path
635 630 if GIT_LFS_PROTO_PAT.match(path):
636 631 is_lfs_request = True
637 632 log.debug(
638 633 'LFS: fallback detection by path of: `%s`, is_lfs:%s',
639 634 path, is_lfs_request)
640 635
641 636 if is_lfs_request:
642 637 app = scm_app.create_git_lfs_wsgi_app(
643 638 repo_path, repo_name, config)
644 639 else:
645 640 app = scm_app.create_git_wsgi_app(
646 641 repo_path, repo_name, config)
647 642
648 643 log.debug('http-app: starting app handler '
649 644 'with %s and process request', app)
650 645
651 646 return app(environ, start_response)
652 647
653 648 return _git_stream
654 649
655 650 def handle_vcs_exception(self, exception, request):
656 651 _vcs_kind = getattr(exception, '_vcs_kind', '')
657 652
658 653 if _vcs_kind == 'repo_locked':
659 654 headers_call_context = get_headers_call_context(request.environ)
660 655 status_code = safe_int(headers_call_context['locked_status_code'])
661 656
662 657 return HTTPRepoLocked(
663 658 title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
664 659
665 660 elif _vcs_kind == 'repo_branch_protected':
666 661 # Get custom repo-branch-protected status code if present.
667 662 return HTTPRepoBranchProtected(
668 663 title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
669 664
670 665 exc_info = request.exc_info
671 666 store_exception(id(exc_info), exc_info)
672 667
673 668 traceback_info = 'unavailable'
674 669 if request.exc_info:
675 670 traceback_info = format_exc(request.exc_info)
676 671
677 672 log.error(
678 673 'error occurred handling this request for path: %s, \n%s',
679 674 request.path, traceback_info)
680 675
681 676 statsd = request.registry.statsd
682 677 if statsd:
683 678 exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
684 679 statsd.incr('vcsserver_exception_total',
685 680 tags=[f"type:{exc_type}"])
686 681 raise exception
687 682
688 683
689 684 class ResponseFilter:
690 685
691 686 def __init__(self, start_response):
692 687 self._start_response = start_response
693 688
694 689 def __call__(self, status, response_headers, exc_info=None):
695 690 headers = tuple(
696 691 (h, v) for h, v in response_headers
697 692 if not wsgiref.util.is_hop_by_hop(h))
698 693 return self._start_response(status, headers, exc_info)
699 694
700 695
701 696 def sanitize_settings_and_apply_defaults(global_config, settings):
702 697 _global_settings_maker = SettingsMaker(global_config)
703 698 settings_maker = SettingsMaker(settings)
704 699
705 700 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
706 701
707 702 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
708 703 settings_maker.enable_logging(logging_conf)
709 704
710 705 # Default includes, possible to change as a user
711 706 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
712 707 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
713 708
714 709 settings_maker.make_setting('__file__', global_config.get('__file__'))
715 710
716 711 settings_maker.make_setting('pyramid.default_locale_name', 'en')
717 712 settings_maker.make_setting('locale', 'en_US.UTF-8')
718 713
719 settings_maker.make_setting('core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin')
714 settings_maker.make_setting(
715 'core.binary_dir', '/usr/local/bin/rhodecode_bin/vcs_bin',
716 default_when_empty=True, parser='string:noquote')
720 717
721 718 temp_store = tempfile.gettempdir()
722 719 default_cache_dir = os.path.join(temp_store, 'rc_cache')
723 720 # save default, cache dir, and use it for all backends later.
724 721 default_cache_dir = settings_maker.make_setting(
725 722 'cache_dir',
726 723 default=default_cache_dir, default_when_empty=True,
727 724 parser='dir:ensured')
728 725
729 726 # exception store cache
730 727 settings_maker.make_setting(
731 728 'exception_tracker.store_path',
732 729 default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
733 730 parser='dir:ensured'
734 731 )
735 732
736 733 # repo_object cache defaults
737 734 settings_maker.make_setting(
738 735 'rc_cache.repo_object.backend',
739 736 default='dogpile.cache.rc.file_namespace',
740 737 parser='string')
741 738 settings_maker.make_setting(
742 739 'rc_cache.repo_object.expiration_time',
743 740 default=30 * 24 * 60 * 60, # 30days
744 741 parser='int')
745 742 settings_maker.make_setting(
746 743 'rc_cache.repo_object.arguments.filename',
747 744 default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
748 745 parser='string')
749 746
750 747 # statsd
751 748 settings_maker.make_setting('statsd.enabled', False, parser='bool')
752 749 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
753 750 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
754 751 settings_maker.make_setting('statsd.statsd_prefix', '')
755 752 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
756 753
757 754 settings_maker.env_expand()
758 755
759 756
760 757 def main(global_config, **settings):
761 758 start_time = time.time()
762 759 log.info('Pyramid app config starting')
763 760
764 761 if MercurialFactory:
765 762 hgpatches.patch_largefiles_capabilities()
766 763 hgpatches.patch_subrepo_type_mapping()
767 764
768 765 # Fill in and sanitize the defaults & do ENV expansion
769 766 sanitize_settings_and_apply_defaults(global_config, settings)
770 767
771 768 # init and bootstrap StatsdClient
772 769 StatsdClient.setup(settings)
773 770
774 771 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
775 772 total_time = time.time() - start_time
776 773 log.info('Pyramid app created and configured in %.2fs', total_time)
777 774 return pyramid_app
@@ -1,1518 +1,1519 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23 import traceback
24 24 import urllib.request
25 25 import urllib.parse
26 26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 40
41 41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str, splitnewlines
44 44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 53 PEELED_REF_MARKER = b'^{}'
54 54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Converts Dulwich exceptions to something neutral."""
61 61
62 62 @wraps(func)
63 63 def wrapper(*args, **kwargs):
64 64 try:
65 65 return func(*args, **kwargs)
66 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
67 67 exc = exceptions.LookupException(org_exc=e)
68 68 raise exc(safe_str(e))
69 69 except (HangupException, UnexpectedCommandError) as e:
70 70 exc = exceptions.VcsException(org_exc=e)
71 71 raise exc(safe_str(e))
72 72 except Exception:
73 73 # NOTE(marcink): because of how dulwich handles some exceptions
74 74 # (KeyError on empty repos), we cannot track this and catch all
75 75 # exceptions, it's an exceptions from other handlers
76 76 #if not hasattr(e, '_vcs_kind'):
77 77 #log.exception("Unhandled exception in git remote call")
78 78 #raise_from_original(exceptions.UnhandledException)
79 79 raise
80 80 return wrapper
81 81
82 82
83 83 class Repo(DulwichRepo):
84 84 """
85 85 A wrapper for dulwich Repo class.
86 86
87 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
88 88 "Too many open files" error. We need to close all opened file descriptors
89 89 once the repo object is destroyed.
90 90 """
91 91 def __del__(self):
92 92 if hasattr(self, 'object_store'):
93 93 self.close()
94 94
95 95
96 96 class Repository(LibGit2Repo):
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, exc_type, exc_val, exc_tb):
102 102 self.free()
103 103
104 104
105 105 class GitFactory(RepoFactory):
106 106 repo_type = 'git'
107 107
108 108 def _create_repo(self, wire, create, use_libgit2=False):
109 109 if use_libgit2:
110 110 repo = Repository(safe_bytes(wire['path']))
111 111 else:
112 112 # dulwich mode
113 113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 114 repo = Repo(repo_path)
115 115
116 116 log.debug('repository created: got GIT object: %s', repo)
117 117 return repo
118 118
119 119 def repo(self, wire, create=False, use_libgit2=False):
120 120 """
121 121 Get a repository instance for the given path.
122 122 """
123 123 return self._create_repo(wire, create, use_libgit2)
124 124
125 125 def repo_libgit2(self, wire):
126 126 return self.repo(wire, use_libgit2=True)
127 127
128 128
129 129 def create_signature_from_string(author_str, **kwargs):
130 130 """
131 131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132 132
133 133 :param author_str: String of the format 'Name <email>'
134 134 :return: pygit2.Signature object
135 135 """
136 136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 137 if match is None:
138 138 raise ValueError(f"Invalid format: {author_str}")
139 139
140 140 name, email = match.groups()
141 141 return pygit2.Signature(name, email, **kwargs)
142 142
143 143
144 144 def get_obfuscated_url(url_obj):
145 145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 146 url_obj.query = obfuscate_qs(url_obj.query)
147 147 obfuscated_uri = str(url_obj)
148 148 return obfuscated_uri
149 149
150 150
151 151 class GitRemote(RemoteBase):
152 152
153 153 def __init__(self, factory):
154 154 self._factory = factory
155 155 self._bulk_methods = {
156 156 "date": self.date,
157 157 "author": self.author,
158 158 "branch": self.branch,
159 159 "message": self.message,
160 160 "parents": self.parents,
161 161 "_commit": self.revision,
162 162 }
163 163 self._bulk_file_methods = {
164 164 "size": self.get_node_size,
165 165 "data": self.get_node_data,
166 166 "flags": self.get_node_flags,
167 167 "is_binary": self.get_node_is_binary,
168 168 "md5": self.md5_hash
169 169 }
170 170
171 171 def _wire_to_config(self, wire):
172 172 if 'config' in wire:
173 173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
174 174 return {}
175 175
176 176 def _remote_conf(self, config):
177 177 params = [
178 178 '-c', 'core.askpass=""',
179 179 ]
180 180 config_attrs = {
181 181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 183 }
184 184 for key, param in config_attrs.items():
185 185 if value := config.get(key):
186 186 params.extend(['-c', param.format(value)])
187 187 return params
188 188
189 189 @reraise_safe_exceptions
190 190 def discover_git_version(self):
191 191 stdout, _ = self.run_git_command(
192 192 {}, ['--version'], _bare=True, _safe=True)
193 193 prefix = b'git version'
194 194 if stdout.startswith(prefix):
195 195 stdout = stdout[len(prefix):]
196 196 return safe_str(stdout.strip())
197 197
198 198 @reraise_safe_exceptions
199 199 def is_empty(self, wire):
200 200 repo_init = self._factory.repo_libgit2(wire)
201 201 with repo_init as repo:
202 202 try:
203 203 has_head = repo.head.name
204 204 if has_head:
205 205 return False
206 206
207 207 # NOTE(marcink): check again using more expensive method
208 208 return repo.is_empty
209 209 except Exception:
210 210 pass
211 211
212 212 return True
213 213
214 214 @reraise_safe_exceptions
215 215 def assert_correct_path(self, wire):
216 216 cache_on, context_uid, repo_id = self._cache_on(wire)
217 217 region = self._region(wire)
218 218
219 219 @region.conditional_cache_on_arguments(condition=cache_on)
220 220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 221 if fast_check:
222 222 path = safe_str(wire['path'])
223 223 if pygit2.discover_repository(path):
224 224 return True
225 225 return False
226 226 else:
227 227 try:
228 228 repo_init = self._factory.repo_libgit2(wire)
229 229 with repo_init:
230 230 pass
231 231 except pygit2.GitError:
232 232 path = wire.get('path')
233 233 tb = traceback.format_exc()
234 234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 235 return False
236 236 return True
237 237
238 238 return _assert_correct_path(context_uid, repo_id, True)
239 239
240 240 @reraise_safe_exceptions
241 241 def bare(self, wire):
242 242 repo_init = self._factory.repo_libgit2(wire)
243 243 with repo_init as repo:
244 244 return repo.is_bare
245 245
246 246 @reraise_safe_exceptions
247 247 def get_node_data(self, wire, commit_id, path):
248 248 repo_init = self._factory.repo_libgit2(wire)
249 249 with repo_init as repo:
250 250 commit = repo[commit_id]
251 251 blob_obj = commit.tree[path]
252 252
253 253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 254 raise exceptions.LookupException()(
255 255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256 256
257 257 return BytesEnvelope(blob_obj.data)
258 258
259 259 @reraise_safe_exceptions
260 260 def get_node_size(self, wire, commit_id, path):
261 261 repo_init = self._factory.repo_libgit2(wire)
262 262 with repo_init as repo:
263 263 commit = repo[commit_id]
264 264 blob_obj = commit.tree[path]
265 265
266 266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 267 raise exceptions.LookupException()(
268 268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269 269
270 270 return blob_obj.size
271 271
272 272 @reraise_safe_exceptions
273 273 def get_node_flags(self, wire, commit_id, path):
274 274 repo_init = self._factory.repo_libgit2(wire)
275 275 with repo_init as repo:
276 276 commit = repo[commit_id]
277 277 blob_obj = commit.tree[path]
278 278
279 279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 280 raise exceptions.LookupException()(
281 281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282 282
283 283 return blob_obj.filemode
284 284
285 285 @reraise_safe_exceptions
286 286 def get_node_is_binary(self, wire, commit_id, path):
287 287 repo_init = self._factory.repo_libgit2(wire)
288 288 with repo_init as repo:
289 289 commit = repo[commit_id]
290 290 blob_obj = commit.tree[path]
291 291
292 292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 293 raise exceptions.LookupException()(
294 294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295 295
296 296 return blob_obj.is_binary
297 297
298 298 @reraise_safe_exceptions
299 299 def blob_as_pretty_string(self, wire, sha):
300 300 repo_init = self._factory.repo_libgit2(wire)
301 301 with repo_init as repo:
302 302 blob_obj = repo[sha]
303 303 return BytesEnvelope(blob_obj.data)
304 304
305 305 @reraise_safe_exceptions
306 306 def blob_raw_length(self, wire, sha):
307 307 cache_on, context_uid, repo_id = self._cache_on(wire)
308 308 region = self._region(wire)
309 309
310 310 @region.conditional_cache_on_arguments(condition=cache_on)
311 311 def _blob_raw_length(_repo_id, _sha):
312 312
313 313 repo_init = self._factory.repo_libgit2(wire)
314 314 with repo_init as repo:
315 315 blob = repo[sha]
316 316 return blob.size
317 317
318 318 return _blob_raw_length(repo_id, sha)
319 319
320 320 def _parse_lfs_pointer(self, raw_content):
321 321 spec_string = b'version https://git-lfs.github.com/spec'
322 322 if raw_content and raw_content.startswith(spec_string):
323 323
324 324 pattern = re.compile(rb"""
325 325 (?:\n)?
326 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
327 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
328 328 ^size[ ](?P<oid_size>[0-9]+)\n
329 329 (?:\n)?
330 330 """, re.VERBOSE | re.MULTILINE)
331 331 match = pattern.match(raw_content)
332 332 if match:
333 333 return match.groupdict()
334 334
335 335 return {}
336 336
337 337 @reraise_safe_exceptions
338 338 def is_large_file(self, wire, commit_id):
339 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 340 region = self._region(wire)
341 341
342 342 @region.conditional_cache_on_arguments(condition=cache_on)
343 343 def _is_large_file(_repo_id, _sha):
344 344 repo_init = self._factory.repo_libgit2(wire)
345 345 with repo_init as repo:
346 346 blob = repo[commit_id]
347 347 if blob.is_binary:
348 348 return {}
349 349
350 350 return self._parse_lfs_pointer(blob.data)
351 351
352 352 return _is_large_file(repo_id, commit_id)
353 353
354 354 @reraise_safe_exceptions
355 355 def is_binary(self, wire, tree_id):
356 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 357 region = self._region(wire)
358 358
359 359 @region.conditional_cache_on_arguments(condition=cache_on)
360 360 def _is_binary(_repo_id, _tree_id):
361 361 repo_init = self._factory.repo_libgit2(wire)
362 362 with repo_init as repo:
363 363 blob_obj = repo[tree_id]
364 364 return blob_obj.is_binary
365 365
366 366 return _is_binary(repo_id, tree_id)
367 367
368 368 @reraise_safe_exceptions
369 369 def md5_hash(self, wire, commit_id, path):
370 370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 371 region = self._region(wire)
372 372
373 373 @region.conditional_cache_on_arguments(condition=cache_on)
374 374 def _md5_hash(_repo_id, _commit_id, _path):
375 375 repo_init = self._factory.repo_libgit2(wire)
376 376 with repo_init as repo:
377 377 commit = repo[_commit_id]
378 378 blob_obj = commit.tree[_path]
379 379
380 380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 381 raise exceptions.LookupException()(
382 382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383 383
384 384 return ''
385 385
386 386 return _md5_hash(repo_id, commit_id, path)
387 387
388 388 @reraise_safe_exceptions
389 389 def in_largefiles_store(self, wire, oid):
390 390 conf = self._wire_to_config(wire)
391 391 repo_init = self._factory.repo_libgit2(wire)
392 392 with repo_init as repo:
393 393 repo_name = repo.path
394 394
395 395 store_location = conf.get('vcs_git_lfs_store_location')
396 396 if store_location:
397 397
398 398 store = LFSOidStore(
399 399 oid=oid, repo=repo_name, store_location=store_location)
400 400 return store.has_oid()
401 401
402 402 return False
403 403
404 404 @reraise_safe_exceptions
405 405 def store_path(self, wire, oid):
406 406 conf = self._wire_to_config(wire)
407 407 repo_init = self._factory.repo_libgit2(wire)
408 408 with repo_init as repo:
409 409 repo_name = repo.path
410 410
411 411 store_location = conf.get('vcs_git_lfs_store_location')
412 412 if store_location:
413 413 store = LFSOidStore(
414 414 oid=oid, repo=repo_name, store_location=store_location)
415 415 return store.oid_path
416 416 raise ValueError(f'Unable to fetch oid with path {oid}')
417 417
418 418 @reraise_safe_exceptions
419 419 def bulk_request(self, wire, rev, pre_load):
420 420 cache_on, context_uid, repo_id = self._cache_on(wire)
421 421 region = self._region(wire)
422 422
423 423 @region.conditional_cache_on_arguments(condition=cache_on)
424 424 def _bulk_request(_repo_id, _rev, _pre_load):
425 425 result = {}
426 426 for attr in pre_load:
427 427 try:
428 428 method = self._bulk_methods[attr]
429 429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
430 430 args = [wire, rev]
431 431 result[attr] = method(*args)
432 432 except KeyError as e:
433 433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
434 434 return result
435 435
436 436 return _bulk_request(repo_id, rev, sorted(pre_load))
437 437
438 438 @reraise_safe_exceptions
439 439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 441 region = self._region(wire)
442 442
443 443 @region.conditional_cache_on_arguments(condition=cache_on)
444 444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 445 result = {}
446 446 for attr in pre_load:
447 447 try:
448 448 method = self._bulk_file_methods[attr]
449 449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 450 result[attr] = method(wire, _commit_id, _path)
451 451 except KeyError as e:
452 452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 453 return result
454 454
455 455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456 456
457 457 def _build_opener(self, url: str):
458 458 handlers = []
459 459 url_obj = url_parser(safe_bytes(url))
460 460 authinfo = url_obj.authinfo()[1]
461 461
462 462 if authinfo:
463 463 # create a password manager
464 464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 465 passmgr.add_password(*convert_to_str(authinfo))
466 466
467 467 handlers.extend((httpbasicauthhandler(passmgr),
468 468 httpdigestauthhandler(passmgr)))
469 469
470 470 return urllib.request.build_opener(*handlers)
471 471
472 472 @reraise_safe_exceptions
473 473 def check_url(self, url, config):
474 474 url_obj = url_parser(safe_bytes(url))
475 475
476 476 test_uri = safe_str(url_obj.authinfo()[0])
477 477 obfuscated_uri = get_obfuscated_url(url_obj)
478 478
479 479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
480 480
481 481 if not test_uri.endswith('info/refs'):
482 482 test_uri = test_uri.rstrip('/') + '/info/refs'
483 483
484 484 o = self._build_opener(url=url)
485 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
486 486
487 487 q = {"service": 'git-upload-pack'}
488 488 qs = f'?{urllib.parse.urlencode(q)}'
489 489 cu = f"{test_uri}{qs}"
490 490
491 491 try:
492 492 req = urllib.request.Request(cu, None, {})
493 493 log.debug("Trying to open URL %s", obfuscated_uri)
494 494 resp = o.open(req)
495 495 if resp.code != 200:
496 496 raise exceptions.URLError()('Return Code is not 200')
497 497 except Exception as e:
498 498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
499 499 # means it cannot be cloned
500 500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
501 501
502 502 # now detect if it's proper git repo
503 503 gitdata: bytes = resp.read()
504 504
505 505 if b'service=git-upload-pack' in gitdata:
506 506 pass
507 507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 508 # old style git can return some other format!
509 509 pass
510 510 else:
511 511 e = None
512 512 raise exceptions.URLError(e)(
513 513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
514 514
515 515 return True
516 516
517 517 @reraise_safe_exceptions
518 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
519 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
520 520 remote_refs = self.pull(wire, url, apply_refs=False)
521 521 repo = self._factory.repo(wire)
522 522 if isinstance(valid_refs, list):
523 523 valid_refs = tuple(valid_refs)
524 524
525 525 for k in remote_refs:
526 526 # only parse heads/tags and skip so called deferred tags
527 527 if k.startswith(valid_refs) and not k.endswith(deferred):
528 528 repo[k] = remote_refs[k]
529 529
530 530 if update_after_clone:
531 531 # we want to checkout HEAD
532 532 repo["HEAD"] = remote_refs["HEAD"]
533 533 index.build_index_from_tree(repo.path, repo.index_path(),
534 534 repo.object_store, repo["HEAD"].tree)
535 535
536 536 @reraise_safe_exceptions
537 537 def branch(self, wire, commit_id):
538 538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 539 region = self._region(wire)
540 540
541 541 @region.conditional_cache_on_arguments(condition=cache_on)
542 542 def _branch(_context_uid, _repo_id, _commit_id):
543 543 regex = re.compile('^refs/heads')
544 544
545 545 def filter_with(ref):
546 546 return regex.match(ref[0]) and ref[1] == _commit_id
547 547
548 548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
549 549 return [x[0].split('refs/heads/')[-1] for x in branches]
550 550
551 551 return _branch(context_uid, repo_id, commit_id)
552 552
553 553 @reraise_safe_exceptions
554 554 def commit_branches(self, wire, commit_id):
555 555 cache_on, context_uid, repo_id = self._cache_on(wire)
556 556 region = self._region(wire)
557 557
558 558 @region.conditional_cache_on_arguments(condition=cache_on)
559 559 def _commit_branches(_context_uid, _repo_id, _commit_id):
560 560 repo_init = self._factory.repo_libgit2(wire)
561 561 with repo_init as repo:
562 562 branches = [x for x in repo.branches.with_commit(_commit_id)]
563 563 return branches
564 564
565 565 return _commit_branches(context_uid, repo_id, commit_id)
566 566
567 567 @reraise_safe_exceptions
568 568 def add_object(self, wire, content):
569 569 repo_init = self._factory.repo_libgit2(wire)
570 570 with repo_init as repo:
571 571 blob = objects.Blob()
572 572 blob.set_raw_string(content)
573 573 repo.object_store.add_object(blob)
574 574 return blob.id
575 575
576 576 @reraise_safe_exceptions
577 577 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
578 578 date_args: list[int, int] = None,
579 579 parents: list | None = None):
580 580
581 581 repo_init = self._factory.repo_libgit2(wire)
582 582 with repo_init as repo:
583 583
584 584 if date_args:
585 585 current_time, offset = date_args
586 586
587 587 kw = {
588 588 'time': current_time,
589 589 'offset': offset
590 590 }
591 591 author = create_signature_from_string(author, **kw)
592 592 committer = create_signature_from_string(committer, **kw)
593 593
594 594 tree = new_tree_id
595 595 if isinstance(tree, (bytes, str)):
596 596 # validate this tree is in the repo...
597 597 tree = repo[safe_str(tree)].id
598 598
599 599 if parents:
600 600 # run via sha's and validate them in repo
601 601 parents = [repo[c].id for c in parents]
602 602 else:
603 603 parents = []
604 604 # ensure we COMMIT on top of given branch head
605 605 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
606 606 if branch in repo.branches.local:
607 607 parents += [repo.branches[branch].target]
608 608 elif [x for x in repo.branches.local]:
609 609 parents += [repo.head.target]
610 610 #else:
611 611 # in case we want to commit on new branch we create it on top of HEAD
612 612 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
613 613
614 614 # # Create a new commit
615 615 commit_oid = repo.create_commit(
616 616 f'refs/heads/{branch}', # the name of the reference to update
617 617 author, # the author of the commit
618 618 committer, # the committer of the commit
619 619 message, # the commit message
620 620 tree, # the tree produced by the index
621 621 parents # list of parents for the new commit, usually just one,
622 622 )
623 623
624 624 new_commit_id = safe_str(commit_oid)
625 625
626 626 return new_commit_id
627 627
628 628 @reraise_safe_exceptions
629 629 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
630 630
631 631 def mode2pygit(mode):
632 632 """
633 633 git only supports two filemode 644 and 755
634 634
635 635 0o100755 -> 33261
636 636 0o100644 -> 33188
637 637 """
638 638 return {
639 639 0o100644: pygit2.GIT_FILEMODE_BLOB,
640 640 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
641 641 0o120000: pygit2.GIT_FILEMODE_LINK
642 642 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
643 643
644 644 repo_init = self._factory.repo_libgit2(wire)
645 645 with repo_init as repo:
646 646 repo_index = repo.index
647 647
648 648 commit_parents = None
649 649 if commit_tree and commit_data['parents']:
650 650 commit_parents = commit_data['parents']
651 651 parent_commit = repo[commit_parents[0]]
652 652 repo_index.read_tree(parent_commit.tree)
653 653
654 654 for pathspec in updated:
655 655 blob_id = repo.create_blob(pathspec['content'])
656 656 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
657 657 repo_index.add(ie)
658 658
659 659 for pathspec in removed:
660 660 repo_index.remove(pathspec)
661 661
662 662 # Write changes to the index
663 663 repo_index.write()
664 664
665 665 # Create a tree from the updated index
666 666 written_commit_tree = repo_index.write_tree()
667 667
668 668 new_tree_id = written_commit_tree
669 669
670 670 author = commit_data['author']
671 671 committer = commit_data['committer']
672 672 message = commit_data['message']
673 673
674 674 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
675 675
676 676 new_commit_id = self.create_commit(wire, author, committer, message, branch,
677 677 new_tree_id, date_args=date_args, parents=commit_parents)
678 678
679 679 # libgit2, ensure the branch is there and exists
680 680 self.create_branch(wire, branch, new_commit_id)
681 681
682 682 # libgit2, set new ref to this created commit
683 683 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
684 684
685 685 return new_commit_id
686 686
687 687 @reraise_safe_exceptions
688 688 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
689 689 if url != 'default' and '://' not in url:
690 690 client = LocalGitClient(url)
691 691 else:
692 692 url_obj = url_parser(safe_bytes(url))
693 693 o = self._build_opener(url)
694 694 url = url_obj.authinfo()[0]
695 695 client = HttpGitClient(base_url=url, opener=o)
696 696 repo = self._factory.repo(wire)
697 697
698 698 determine_wants = repo.object_store.determine_wants_all
699 699
700 700 if refs:
701 701 refs: list[bytes] = [ascii_bytes(x) for x in refs]
702 702
703 703 def determine_wants_requested(_remote_refs):
704 704 determined = []
705 705 for ref_name, ref_hash in _remote_refs.items():
706 706 bytes_ref_name = safe_bytes(ref_name)
707 707
708 708 if bytes_ref_name in refs:
709 709 bytes_ref_hash = safe_bytes(ref_hash)
710 710 determined.append(bytes_ref_hash)
711 711 return determined
712 712
713 713 # swap with our custom requested wants
714 714 determine_wants = determine_wants_requested
715 715
716 716 try:
717 717 remote_refs = client.fetch(
718 718 path=url, target=repo, determine_wants=determine_wants)
719 719
720 720 except NotGitRepository as e:
721 721 log.warning(
722 722 'Trying to fetch from "%s" failed, not a Git repository.', url)
723 723 # Exception can contain unicode which we convert
724 724 raise exceptions.AbortException(e)(repr(e))
725 725
726 726 # mikhail: client.fetch() returns all the remote refs, but fetches only
727 727 # refs filtered by `determine_wants` function. We need to filter result
728 728 # as well
729 729 if refs:
730 730 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
731 731
732 732 if apply_refs:
733 733 # TODO: johbo: Needs proper test coverage with a git repository
734 734 # that contains a tag object, so that we would end up with
735 735 # a peeled ref at this point.
736 736 for k in remote_refs:
737 737 if k.endswith(PEELED_REF_MARKER):
738 738 log.debug("Skipping peeled reference %s", k)
739 739 continue
740 740 repo[k] = remote_refs[k]
741 741
742 742 if refs and not update_after:
743 743 # update to ref
744 744 # mikhail: explicitly set the head to the last ref.
745 745 update_to_ref = refs[-1]
746 746 if isinstance(update_after, str):
747 747 update_to_ref = update_after
748 748
749 749 repo[HEAD_MARKER] = remote_refs[update_to_ref]
750 750
751 751 if update_after:
752 752 # we want to check out HEAD
753 753 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
754 754 index.build_index_from_tree(repo.path, repo.index_path(),
755 755 repo.object_store, repo[HEAD_MARKER].tree)
756 756
757 757 if isinstance(remote_refs, FetchPackResult):
758 758 return remote_refs.refs
759 759 return remote_refs
760 760
761 761 @reraise_safe_exceptions
762 762 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
763 763 self._factory.repo(wire)
764 764 if refs and not isinstance(refs, (list, tuple)):
765 765 refs = [refs]
766 766
767 767 config = self._wire_to_config(wire)
768 768 # get all remote refs we'll use to fetch later
769 769 cmd = ['ls-remote']
770 770 if not all_refs:
771 771 cmd += ['--heads', '--tags']
772 772 cmd += [url]
773 773 output, __ = self.run_git_command(
774 774 wire, cmd, fail_on_stderr=False,
775 775 _copts=self._remote_conf(config),
776 776 extra_env={'GIT_TERMINAL_PROMPT': '0'})
777 777
778 778 remote_refs = collections.OrderedDict()
779 779 fetch_refs = []
780 780
781 781 for ref_line in output.splitlines():
782 782 sha, ref = ref_line.split(b'\t')
783 783 sha = sha.strip()
784 784 if ref in remote_refs:
785 785 # duplicate, skip
786 786 continue
787 787 if ref.endswith(PEELED_REF_MARKER):
788 788 log.debug("Skipping peeled reference %s", ref)
789 789 continue
790 790 # don't sync HEAD
791 791 if ref in [HEAD_MARKER]:
792 792 continue
793 793
794 794 remote_refs[ref] = sha
795 795
796 796 if refs and sha in refs:
797 797 # we filter fetch using our specified refs
798 798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
799 799 elif not refs:
800 800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
801 801 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
802 802
803 803 if fetch_refs:
804 804 for chunk in more_itertools.chunked(fetch_refs, 128):
805 805 fetch_refs_chunks = list(chunk)
806 806 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
807 807 self.run_git_command(
808 808 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
809 809 fail_on_stderr=False,
810 810 _copts=self._remote_conf(config),
811 811 extra_env={'GIT_TERMINAL_PROMPT': '0'})
812 812 if kwargs.get('sync_large_objects'):
813 813 self.run_git_command(
814 814 wire, ['lfs', 'fetch', url, '--all'],
815 815 fail_on_stderr=False,
816 816 _copts=self._remote_conf(config),
817 817 )
818 818
819 819 return remote_refs
820 820
821 821 @reraise_safe_exceptions
822 822 def sync_push(self, wire, url, refs=None, **kwargs):
823 823 if not self.check_url(url, wire):
824 824 return
825 825 config = self._wire_to_config(wire)
826 826 self._factory.repo(wire)
827 827 self.run_git_command(
828 828 wire, ['push', url, '--mirror'], fail_on_stderr=False,
829 829 _copts=self._remote_conf(config),
830 830 extra_env={'GIT_TERMINAL_PROMPT': '0'})
831 831 if kwargs.get('sync_large_objects'):
832 832 self.run_git_command(
833 833 wire, ['lfs', 'push', url, '--all'],
834 834 fail_on_stderr=False,
835 835 _copts=self._remote_conf(config),
836 836 )
837 837
838 838 @reraise_safe_exceptions
839 839 def get_remote_refs(self, wire, url):
840 840 repo = Repo(url)
841 841 return repo.get_refs()
842 842
843 843 @reraise_safe_exceptions
844 844 def get_description(self, wire):
845 845 repo = self._factory.repo(wire)
846 846 return repo.get_description()
847 847
848 848 @reraise_safe_exceptions
849 849 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
850 850 origin_repo_path = wire['path']
851 851 repo = self._factory.repo(wire)
852 852 # fetch from other_repo_path to our origin repo
853 853 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
854 854
855 855 wire_remote = wire.copy()
856 856 wire_remote['path'] = other_repo_path
857 857 repo_remote = self._factory.repo(wire_remote)
858 858
859 859 # fetch from origin_repo_path to our remote repo
860 860 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
861 861
862 862 revs = [
863 863 x.commit.id
864 864 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
865 865 return revs
866 866
867 867 @reraise_safe_exceptions
868 868 def get_object(self, wire, sha, maybe_unreachable=False):
869 869 cache_on, context_uid, repo_id = self._cache_on(wire)
870 870 region = self._region(wire)
871 871
872 872 @region.conditional_cache_on_arguments(condition=cache_on)
873 873 def _get_object(_context_uid, _repo_id, _sha):
874 874 repo_init = self._factory.repo_libgit2(wire)
875 875 with repo_init as repo:
876 876
877 877 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
878 878 try:
879 879 commit = repo.revparse_single(sha)
880 880 except KeyError:
881 881 # NOTE(marcink): KeyError doesn't give us any meaningful information
882 882 # here, we instead give something more explicit
883 883 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
884 884 raise exceptions.LookupException(e)(missing_commit_err)
885 885 except ValueError as e:
886 886 raise exceptions.LookupException(e)(missing_commit_err)
887 887
888 888 is_tag = False
889 889 if isinstance(commit, pygit2.Tag):
890 890 commit = repo.get(commit.target)
891 891 is_tag = True
892 892
893 893 check_dangling = True
894 894 if is_tag:
895 895 check_dangling = False
896 896
897 897 if check_dangling and maybe_unreachable:
898 898 check_dangling = False
899 899
900 900 # we used a reference and it parsed means we're not having a dangling commit
901 901 if sha != commit.hex:
902 902 check_dangling = False
903 903
904 904 if check_dangling:
905 905 # check for dangling commit
906 906 for branch in repo.branches.with_commit(commit.hex):
907 907 if branch:
908 908 break
909 909 else:
910 910 # NOTE(marcink): Empty error doesn't give us any meaningful information
911 911 # here, we instead give something more explicit
912 912 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
913 913 raise exceptions.LookupException(e)(missing_commit_err)
914 914
915 915 commit_id = commit.hex
916 916 type_str = commit.type_str
917 917
918 918 return {
919 919 'id': commit_id,
920 920 'type': type_str,
921 921 'commit_id': commit_id,
922 922 'idx': 0
923 923 }
924 924
925 925 return _get_object(context_uid, repo_id, sha)
926 926
927 927 @reraise_safe_exceptions
928 928 def get_refs(self, wire):
929 929 cache_on, context_uid, repo_id = self._cache_on(wire)
930 930 region = self._region(wire)
931 931
932 932 @region.conditional_cache_on_arguments(condition=cache_on)
933 933 def _get_refs(_context_uid, _repo_id):
934 934
935 935 repo_init = self._factory.repo_libgit2(wire)
936 936 with repo_init as repo:
937 937 regex = re.compile('^refs/(heads|tags)/')
938 938 return {x.name: x.target.hex for x in
939 939 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
940 940
941 941 return _get_refs(context_uid, repo_id)
942 942
943 943 @reraise_safe_exceptions
944 944 def get_branch_pointers(self, wire):
945 945 cache_on, context_uid, repo_id = self._cache_on(wire)
946 946 region = self._region(wire)
947 947
948 948 @region.conditional_cache_on_arguments(condition=cache_on)
949 949 def _get_branch_pointers(_context_uid, _repo_id):
950 950
951 951 repo_init = self._factory.repo_libgit2(wire)
952 952 regex = re.compile('^refs/heads')
953 953 with repo_init as repo:
954 954 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
955 955 return {x.target.hex: x.shorthand for x in branches}
956 956
957 957 return _get_branch_pointers(context_uid, repo_id)
958 958
959 959 @reraise_safe_exceptions
960 960 def head(self, wire, show_exc=True):
961 961 cache_on, context_uid, repo_id = self._cache_on(wire)
962 962 region = self._region(wire)
963 963
964 964 @region.conditional_cache_on_arguments(condition=cache_on)
965 965 def _head(_context_uid, _repo_id, _show_exc):
966 966 repo_init = self._factory.repo_libgit2(wire)
967 967 with repo_init as repo:
968 968 try:
969 969 return repo.head.peel().hex
970 970 except Exception:
971 971 if show_exc:
972 972 raise
973 973 return _head(context_uid, repo_id, show_exc)
974 974
975 975 @reraise_safe_exceptions
976 976 def init(self, wire):
977 977 repo_path = safe_str(wire['path'])
978 978 os.makedirs(repo_path, mode=0o755)
979 979 pygit2.init_repository(repo_path, bare=False)
980 980
981 981 @reraise_safe_exceptions
982 982 def init_bare(self, wire):
983 983 repo_path = safe_str(wire['path'])
984 984 os.makedirs(repo_path, mode=0o755)
985 985 pygit2.init_repository(repo_path, bare=True)
986 986
987 987 @reraise_safe_exceptions
988 988 def revision(self, wire, rev):
989 989
990 990 cache_on, context_uid, repo_id = self._cache_on(wire)
991 991 region = self._region(wire)
992 992
993 993 @region.conditional_cache_on_arguments(condition=cache_on)
994 994 def _revision(_context_uid, _repo_id, _rev):
995 995 repo_init = self._factory.repo_libgit2(wire)
996 996 with repo_init as repo:
997 997 commit = repo[rev]
998 998 obj_data = {
999 999 'id': commit.id.hex,
1000 1000 }
1001 1001 # tree objects itself don't have tree_id attribute
1002 1002 if hasattr(commit, 'tree_id'):
1003 1003 obj_data['tree'] = commit.tree_id.hex
1004 1004
1005 1005 return obj_data
1006 1006 return _revision(context_uid, repo_id, rev)
1007 1007
1008 1008 @reraise_safe_exceptions
1009 1009 def date(self, wire, commit_id):
1010 1010 cache_on, context_uid, repo_id = self._cache_on(wire)
1011 1011 region = self._region(wire)
1012 1012
1013 1013 @region.conditional_cache_on_arguments(condition=cache_on)
1014 1014 def _date(_repo_id, _commit_id):
1015 1015 repo_init = self._factory.repo_libgit2(wire)
1016 1016 with repo_init as repo:
1017 1017 commit = repo[commit_id]
1018 1018
1019 1019 if hasattr(commit, 'commit_time'):
1020 1020 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1021 1021 else:
1022 1022 commit = commit.get_object()
1023 1023 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
1024 1024
1025 1025 # TODO(marcink): check dulwich difference of offset vs timezone
1026 1026 return [commit_time, commit_time_offset]
1027 1027 return _date(repo_id, commit_id)
1028 1028
1029 1029 @reraise_safe_exceptions
1030 1030 def author(self, wire, commit_id):
1031 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
1032 1032 region = self._region(wire)
1033 1033
1034 1034 @region.conditional_cache_on_arguments(condition=cache_on)
1035 1035 def _author(_repo_id, _commit_id):
1036 1036 repo_init = self._factory.repo_libgit2(wire)
1037 1037 with repo_init as repo:
1038 1038 commit = repo[commit_id]
1039 1039
1040 1040 if hasattr(commit, 'author'):
1041 1041 author = commit.author
1042 1042 else:
1043 1043 author = commit.get_object().author
1044 1044
1045 1045 if author.email:
1046 1046 return f"{author.name} <{author.email}>"
1047 1047
1048 1048 try:
1049 1049 return f"{author.name}"
1050 1050 except Exception:
1051 1051 return f"{safe_str(author.raw_name)}"
1052 1052
1053 1053 return _author(repo_id, commit_id)
1054 1054
1055 1055 @reraise_safe_exceptions
1056 1056 def message(self, wire, commit_id):
1057 1057 cache_on, context_uid, repo_id = self._cache_on(wire)
1058 1058 region = self._region(wire)
1059 1059
1060 1060 @region.conditional_cache_on_arguments(condition=cache_on)
1061 1061 def _message(_repo_id, _commit_id):
1062 1062 repo_init = self._factory.repo_libgit2(wire)
1063 1063 with repo_init as repo:
1064 1064 commit = repo[commit_id]
1065 1065 return commit.message
1066 1066 return _message(repo_id, commit_id)
1067 1067
1068 1068 @reraise_safe_exceptions
1069 1069 def parents(self, wire, commit_id):
1070 1070 cache_on, context_uid, repo_id = self._cache_on(wire)
1071 1071 region = self._region(wire)
1072 1072
1073 1073 @region.conditional_cache_on_arguments(condition=cache_on)
1074 1074 def _parents(_repo_id, _commit_id):
1075 1075 repo_init = self._factory.repo_libgit2(wire)
1076 1076 with repo_init as repo:
1077 1077 commit = repo[commit_id]
1078 1078 if hasattr(commit, 'parent_ids'):
1079 1079 parent_ids = commit.parent_ids
1080 1080 else:
1081 1081 parent_ids = commit.get_object().parent_ids
1082 1082
1083 1083 return [x.hex for x in parent_ids]
1084 1084 return _parents(repo_id, commit_id)
1085 1085
1086 1086 @reraise_safe_exceptions
1087 1087 def children(self, wire, commit_id):
1088 1088 cache_on, context_uid, repo_id = self._cache_on(wire)
1089 1089 region = self._region(wire)
1090 1090
1091 1091 head = self.head(wire)
1092 1092
1093 1093 @region.conditional_cache_on_arguments(condition=cache_on)
1094 1094 def _children(_repo_id, _commit_id):
1095 1095
1096 1096 output, __ = self.run_git_command(
1097 1097 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
1098 1098
1099 1099 child_ids = []
1100 1100 pat = re.compile(fr'^{commit_id}')
1101 1101 for line in output.splitlines():
1102 1102 line = safe_str(line)
1103 1103 if pat.match(line):
1104 1104 found_ids = line.split(' ')[1:]
1105 1105 child_ids.extend(found_ids)
1106 1106 break
1107 1107
1108 1108 return child_ids
1109 1109 return _children(repo_id, commit_id)
1110 1110
1111 1111 @reraise_safe_exceptions
1112 1112 def set_refs(self, wire, key, value):
1113 1113 repo_init = self._factory.repo_libgit2(wire)
1114 1114 with repo_init as repo:
1115 1115 repo.references.create(key, value, force=True)
1116 1116
1117 1117 @reraise_safe_exceptions
1118 1118 def update_refs(self, wire, key, value):
1119 1119 repo_init = self._factory.repo_libgit2(wire)
1120 1120 with repo_init as repo:
1121 1121 if key not in repo.references:
1122 1122 raise ValueError(f'Reference {key} not found in the repository')
1123 1123 repo.references.create(key, value, force=True)
1124 1124
1125 1125 @reraise_safe_exceptions
1126 1126 def create_branch(self, wire, branch_name, commit_id, force=False):
1127 1127 repo_init = self._factory.repo_libgit2(wire)
1128 1128 with repo_init as repo:
1129 1129 if commit_id:
1130 1130 commit = repo[commit_id]
1131 1131 else:
1132 1132 # if commit is not given just use the HEAD
1133 1133 commit = repo.head()
1134 1134
1135 1135 if force:
1136 1136 repo.branches.local.create(branch_name, commit, force=force)
1137 1137 elif not repo.branches.get(branch_name):
1138 1138 # create only if that branch isn't existing
1139 1139 repo.branches.local.create(branch_name, commit, force=force)
1140 1140
1141 1141 @reraise_safe_exceptions
1142 1142 def remove_ref(self, wire, key):
1143 1143 repo_init = self._factory.repo_libgit2(wire)
1144 1144 with repo_init as repo:
1145 1145 repo.references.delete(key)
1146 1146
1147 1147 @reraise_safe_exceptions
1148 1148 def tag_remove(self, wire, tag_name):
1149 1149 repo_init = self._factory.repo_libgit2(wire)
1150 1150 with repo_init as repo:
1151 1151 key = f'refs/tags/{tag_name}'
1152 1152 repo.references.delete(key)
1153 1153
1154 1154 @reraise_safe_exceptions
1155 1155 def tree_changes(self, wire, source_id, target_id):
1156 1156 repo = self._factory.repo(wire)
1157 1157 # source can be empty
1158 1158 source_id = safe_bytes(source_id if source_id else b'')
1159 1159 target_id = safe_bytes(target_id)
1160 1160
1161 1161 source = repo[source_id].tree if source_id else None
1162 1162 target = repo[target_id].tree
1163 1163 result = repo.object_store.tree_changes(source, target)
1164 1164
1165 1165 added = set()
1166 1166 modified = set()
1167 1167 deleted = set()
1168 1168 for (old_path, new_path), (_, _), (_, _) in list(result):
1169 1169 if new_path and old_path:
1170 1170 modified.add(new_path)
1171 1171 elif new_path and not old_path:
1172 1172 added.add(new_path)
1173 1173 elif not new_path and old_path:
1174 1174 deleted.add(old_path)
1175 1175
1176 1176 return list(added), list(modified), list(deleted)
1177 1177
1178 1178 @reraise_safe_exceptions
1179 1179 def tree_and_type_for_path(self, wire, commit_id, path):
1180 1180
1181 1181 cache_on, context_uid, repo_id = self._cache_on(wire)
1182 1182 region = self._region(wire)
1183 1183
1184 1184 @region.conditional_cache_on_arguments(condition=cache_on)
1185 1185 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1186 1186 repo_init = self._factory.repo_libgit2(wire)
1187 1187
1188 1188 with repo_init as repo:
1189 1189 commit = repo[commit_id]
1190 1190 try:
1191 1191 tree = commit.tree[path]
1192 1192 except KeyError:
1193 1193 return None, None, None
1194 1194
1195 1195 return tree.id.hex, tree.type_str, tree.filemode
1196 1196 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1197 1197
1198 1198 @reraise_safe_exceptions
1199 1199 def tree_items(self, wire, tree_id):
1200 1200 cache_on, context_uid, repo_id = self._cache_on(wire)
1201 1201 region = self._region(wire)
1202 1202
1203 1203 @region.conditional_cache_on_arguments(condition=cache_on)
1204 1204 def _tree_items(_repo_id, _tree_id):
1205 1205
1206 1206 repo_init = self._factory.repo_libgit2(wire)
1207 1207 with repo_init as repo:
1208 1208 try:
1209 1209 tree = repo[tree_id]
1210 1210 except KeyError:
1211 1211 raise ObjectMissing(f'No tree with id: {tree_id}')
1212 1212
1213 1213 result = []
1214 1214 for item in tree:
1215 1215 item_sha = item.hex
1216 1216 item_mode = item.filemode
1217 1217 item_type = item.type_str
1218 1218
1219 1219 if item_type == 'commit':
1220 1220 # NOTE(marcink): submodules we translate to 'link' for backward compat
1221 1221 item_type = 'link'
1222 1222
1223 1223 result.append((item.name, item_mode, item_sha, item_type))
1224 1224 return result
1225 1225 return _tree_items(repo_id, tree_id)
1226 1226
1227 1227 @reraise_safe_exceptions
1228 1228 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1229 1229 """
1230 1230 Old version that uses subprocess to call diff
1231 1231 """
1232 1232
1233 1233 flags = [
1234 1234 f'-U{context}', '--patch',
1235 1235 '--binary',
1236 1236 '--find-renames',
1237 1237 '--no-indent-heuristic',
1238 1238 # '--indent-heuristic',
1239 1239 #'--full-index',
1240 1240 #'--abbrev=40'
1241 1241 ]
1242 1242
1243 1243 if opt_ignorews:
1244 1244 flags.append('--ignore-all-space')
1245 1245
1246 1246 if commit_id_1 == self.EMPTY_COMMIT:
1247 1247 cmd = ['show'] + flags + [commit_id_2]
1248 1248 else:
1249 1249 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1250 1250
1251 1251 if file_filter:
1252 1252 cmd.extend(['--', file_filter])
1253 1253
1254 1254 diff, __ = self.run_git_command(wire, cmd)
1255 1255 # If we used 'show' command, strip first few lines (until actual diff
1256 1256 # starts)
1257 1257 if commit_id_1 == self.EMPTY_COMMIT:
1258 1258 lines = diff.splitlines()
1259 1259 x = 0
1260 1260 for line in lines:
1261 1261 if line.startswith(b'diff'):
1262 1262 break
1263 1263 x += 1
1264 1264 # Append new line just like 'diff' command do
1265 1265 diff = '\n'.join(lines[x:]) + '\n'
1266 1266 return diff
1267 1267
1268 1268 @reraise_safe_exceptions
1269 1269 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1270 1270 repo_init = self._factory.repo_libgit2(wire)
1271 1271
1272 1272 with repo_init as repo:
1273 1273 swap = True
1274 1274 flags = 0
1275 1275 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1276 1276
1277 1277 if opt_ignorews:
1278 1278 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1279 1279
1280 1280 if commit_id_1 == self.EMPTY_COMMIT:
1281 1281 comm1 = repo[commit_id_2]
1282 1282 diff_obj = comm1.tree.diff_to_tree(
1283 1283 flags=flags, context_lines=context, swap=swap)
1284 1284
1285 1285 else:
1286 1286 comm1 = repo[commit_id_2]
1287 1287 comm2 = repo[commit_id_1]
1288 1288 diff_obj = comm1.tree.diff_to_tree(
1289 1289 comm2.tree, flags=flags, context_lines=context, swap=swap)
1290 1290 similar_flags = 0
1291 1291 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1292 1292 diff_obj.find_similar(flags=similar_flags)
1293 1293
1294 1294 if file_filter:
1295 1295 for p in diff_obj:
1296 1296 if p.delta.old_file.path == file_filter:
1297 1297 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1298 1298 # fo matching path == no diff
1299 1299 return BytesEnvelope(b'')
1300 1300
1301 1301 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1302 1302
1303 1303 @reraise_safe_exceptions
1304 1304 def node_history(self, wire, commit_id, path, limit):
1305 1305 cache_on, context_uid, repo_id = self._cache_on(wire)
1306 1306 region = self._region(wire)
1307 1307
1308 1308 @region.conditional_cache_on_arguments(condition=cache_on)
1309 1309 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1310 1310 # optimize for n==1, rev-list is much faster for that use-case
1311 1311 if limit == 1:
1312 1312 cmd = ['rev-list', '-1', commit_id, '--', path]
1313 1313 else:
1314 1314 cmd = ['log']
1315 1315 if limit:
1316 1316 cmd.extend(['-n', str(safe_int(limit, 0))])
1317 1317 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1318 1318
1319 1319 output, __ = self.run_git_command(wire, cmd)
1320 1320 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1321 1321
1322 1322 return [x for x in commit_ids]
1323 1323 return _node_history(context_uid, repo_id, commit_id, path, limit)
1324 1324
1325 1325 @reraise_safe_exceptions
1326 1326 def node_annotate_legacy(self, wire, commit_id, path):
1327 1327 # note: replaced by pygit2 implementation
1328 1328 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1329 1329 # -l ==> outputs long shas (and we need all 40 characters)
1330 1330 # --root ==> doesn't put '^' character for boundaries
1331 1331 # -r commit_id ==> blames for the given commit
1332 1332 output, __ = self.run_git_command(wire, cmd)
1333 1333
1334 1334 result = []
1335 1335 for i, blame_line in enumerate(output.splitlines()[:-1]):
1336 1336 line_no = i + 1
1337 1337 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1338 1338 result.append((line_no, blame_commit_id, line))
1339 1339
1340 1340 return result
1341 1341
1342 1342 @reraise_safe_exceptions
1343 1343 def node_annotate(self, wire, commit_id, path):
1344 1344
1345 1345 result_libgit = []
1346 1346 repo_init = self._factory.repo_libgit2(wire)
1347 1347 with repo_init as repo:
1348 1348 commit = repo[commit_id]
1349 1349 blame_obj = repo.blame(path, newest_commit=commit_id)
1350 for i, line in enumerate(commit.tree[path].data.splitlines()):
1350 file_content = commit.tree[path].data
1351 for i, line in enumerate(splitnewlines(file_content)):
1351 1352 line_no = i + 1
1352 1353 hunk = blame_obj.for_line(line_no)
1353 1354 blame_commit_id = hunk.final_commit_id.hex
1354 1355
1355 1356 result_libgit.append((line_no, blame_commit_id, line))
1356 1357
1357 1358 return BinaryEnvelope(result_libgit)
1358 1359
1359 1360 @reraise_safe_exceptions
1360 1361 def update_server_info(self, wire, force=False):
1361 1362 cmd = ['update-server-info']
1362 1363 if force:
1363 1364 cmd += ['--force']
1364 1365 output, __ = self.run_git_command(wire, cmd)
1365 1366 return output.splitlines()
1366 1367
1367 1368 @reraise_safe_exceptions
1368 1369 def get_all_commit_ids(self, wire):
1369 1370
1370 1371 cache_on, context_uid, repo_id = self._cache_on(wire)
1371 1372 region = self._region(wire)
1372 1373
1373 1374 @region.conditional_cache_on_arguments(condition=cache_on)
1374 1375 def _get_all_commit_ids(_context_uid, _repo_id):
1375 1376
1376 1377 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1377 1378 try:
1378 1379 output, __ = self.run_git_command(wire, cmd)
1379 1380 return output.splitlines()
1380 1381 except Exception:
1381 1382 # Can be raised for empty repositories
1382 1383 return []
1383 1384
1384 1385 @region.conditional_cache_on_arguments(condition=cache_on)
1385 1386 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1386 1387 repo_init = self._factory.repo_libgit2(wire)
1387 1388 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1388 1389 results = []
1389 1390 with repo_init as repo:
1390 1391 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1391 1392 results.append(commit.id.hex)
1392 1393
1393 1394 return _get_all_commit_ids(context_uid, repo_id)
1394 1395
1395 1396 @reraise_safe_exceptions
1396 1397 def run_git_command(self, wire, cmd, **opts):
1397 1398 path = wire.get('path', None)
1398 1399 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1399 1400
1400 1401 if path and os.path.isdir(path):
1401 1402 opts['cwd'] = path
1402 1403
1403 1404 if '_bare' in opts:
1404 1405 _copts = []
1405 1406 del opts['_bare']
1406 1407 else:
1407 1408 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1408 1409 safe_call = False
1409 1410 if '_safe' in opts:
1410 1411 # no exc on failure
1411 1412 del opts['_safe']
1412 1413 safe_call = True
1413 1414
1414 1415 if '_copts' in opts:
1415 1416 _copts.extend(opts['_copts'] or [])
1416 1417 del opts['_copts']
1417 1418
1418 1419 gitenv = os.environ.copy()
1419 1420 gitenv.update(opts.pop('extra_env', {}))
1420 1421 # need to clean fix GIT_DIR !
1421 1422 if 'GIT_DIR' in gitenv:
1422 1423 del gitenv['GIT_DIR']
1423 1424 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1424 1425 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1425 1426
1426 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1427 cmd = [settings.GIT_EXECUTABLE()] + _copts + cmd
1427 1428 _opts = {'env': gitenv, 'shell': False}
1428 1429
1429 1430 proc = None
1430 1431 try:
1431 1432 _opts.update(opts)
1432 1433 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1433 1434
1434 1435 return b''.join(proc), b''.join(proc.stderr)
1435 1436 except OSError as err:
1436 1437 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1437 1438 call_opts = {}
1438 1439 if debug_mode:
1439 1440 call_opts = _opts
1440 1441
1441 1442 tb_err = ("Couldn't run git command ({}).\n"
1442 1443 "Original error was:{}\n"
1443 1444 "Call options:{}\n"
1444 1445 .format(cmd, err, call_opts))
1445 1446 log.exception(tb_err)
1446 1447 if safe_call:
1447 1448 return '', err
1448 1449 else:
1449 1450 raise exceptions.VcsException()(tb_err)
1450 1451 finally:
1451 1452 if proc:
1452 1453 proc.close()
1453 1454
1454 1455 @reraise_safe_exceptions
1455 1456 def install_hooks(self, wire, force=False):
1456 1457 from vcsserver.hook_utils import install_git_hooks
1457 1458 bare = self.bare(wire)
1458 1459 path = wire['path']
1459 1460 binary_dir = settings.BINARY_DIR
1460 1461 if binary_dir:
1461 1462 os.path.join(binary_dir, 'python3')
1462 1463 return install_git_hooks(path, bare, force_create=force)
1463 1464
1464 1465 @reraise_safe_exceptions
1465 1466 def get_hooks_info(self, wire):
1466 1467 from vcsserver.hook_utils import (
1467 1468 get_git_pre_hook_version, get_git_post_hook_version)
1468 1469 bare = self.bare(wire)
1469 1470 path = wire['path']
1470 1471 return {
1471 1472 'pre_version': get_git_pre_hook_version(path, bare),
1472 1473 'post_version': get_git_post_hook_version(path, bare),
1473 1474 }
1474 1475
1475 1476 @reraise_safe_exceptions
1476 1477 def set_head_ref(self, wire, head_name):
1477 1478 log.debug('Setting refs/head to `%s`', head_name)
1478 1479 repo_init = self._factory.repo_libgit2(wire)
1479 1480 with repo_init as repo:
1480 1481 repo.set_head(f'refs/heads/{head_name}')
1481 1482
1482 1483 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1483 1484
1484 1485 @reraise_safe_exceptions
1485 1486 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1486 1487 archive_dir_name, commit_id, cache_config):
1487 1488
1488 1489 def file_walker(_commit_id, path):
1489 1490 repo_init = self._factory.repo_libgit2(wire)
1490 1491
1491 1492 with repo_init as repo:
1492 1493 commit = repo[commit_id]
1493 1494
1494 1495 if path in ['', '/']:
1495 1496 tree = commit.tree
1496 1497 else:
1497 1498 tree = commit.tree[path.rstrip('/')]
1498 1499 tree_id = tree.id.hex
1499 1500 try:
1500 1501 tree = repo[tree_id]
1501 1502 except KeyError:
1502 1503 raise ObjectMissing(f'No tree with id: {tree_id}')
1503 1504
1504 1505 index = LibGit2Index.Index()
1505 1506 index.read_tree(tree)
1506 1507 file_iter = index
1507 1508
1508 1509 for file_node in file_iter:
1509 1510 file_path = file_node.path
1510 1511 mode = file_node.mode
1511 1512 is_link = stat.S_ISLNK(mode)
1512 1513 if mode == pygit2.GIT_FILEMODE_COMMIT:
1513 1514 log.debug('Skipping path %s as a commit node', file_path)
1514 1515 continue
1515 1516 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1516 1517
1517 1518 return store_archive_in_cache(
1518 1519 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
@@ -1,255 +1,255 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import logging
20 20 import itertools
21 21
22 22 import mercurial
23 23 import mercurial.error
24 24 import mercurial.wireprotoserver
25 25 import mercurial.hgweb.common
26 26 import mercurial.hgweb.hgweb_mod
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30 30 from vcsserver.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 # propagated from mercurial documentation
36 36 HG_UI_SECTIONS = [
37 37 'alias', 'auth', 'decode/encode', 'defaults', 'diff', 'email', 'extensions',
38 38 'format', 'merge-patterns', 'merge-tools', 'hooks', 'http_proxy', 'smtp',
39 39 'patch', 'paths', 'profiling', 'server', 'trusted', 'ui', 'web',
40 40 ]
41 41
42 42
43 43 class HgWeb(mercurial.hgweb.hgweb_mod.hgweb):
44 44 """Extension of hgweb that simplifies some functions."""
45 45
46 46 def _get_view(self, repo):
47 47 """Views are not supported."""
48 48 return repo
49 49
50 50 def loadsubweb(self):
51 51 """The result is only used in the templater method which is not used."""
52 52 return None
53 53
54 54 def run(self):
55 55 """Unused function so raise an exception if accidentally called."""
56 56 raise NotImplementedError
57 57
58 58 def templater(self, req):
59 59 """Function used in an unreachable code path.
60 60
61 61 This code is unreachable because we guarantee that the HTTP request,
62 62 corresponds to a Mercurial command. See the is_hg method. So, we are
63 63 never going to get a user-visible url.
64 64 """
65 65 raise NotImplementedError
66 66
67 67 def archivelist(self, nodeid):
68 68 """Unused function so raise an exception if accidentally called."""
69 69 raise NotImplementedError
70 70
71 71 def __call__(self, environ, start_response):
72 72 """Run the WSGI application.
73 73
74 74 This may be called by multiple threads.
75 75 """
76 76 from mercurial.hgweb import request as requestmod
77 77 req = requestmod.parserequestfromenv(environ)
78 78 res = requestmod.wsgiresponse(req, start_response)
79 79 gen = self.run_wsgi(req, res)
80 80
81 81 first_chunk = None
82 82
83 83 try:
84 84 data = next(gen)
85 85
86 86 def first_chunk():
87 87 yield data
88 88 except StopIteration:
89 89 pass
90 90
91 91 if first_chunk:
92 92 return itertools.chain(first_chunk(), gen)
93 93 return gen
94 94
95 95 def _runwsgi(self, req, res, repo):
96 96
97 97 cmd = req.qsparams.get(b'cmd', '')
98 98 if not mercurial.wireprotoserver.iscmd(cmd):
99 99 # NOTE(marcink): for unsupported commands, we return bad request
100 100 # internally from HG
101 101 log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
102 102 from mercurial.hgweb.common import statusmessage
103 103 res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
104 104 res.setbodybytes(b'')
105 105 return res.sendresponse()
106 106
107 107 return super()._runwsgi(req, res, repo)
108 108
109 109
110 110 def sanitize_hg_ui(baseui):
111 111 # NOTE(marcink): since python3 hgsubversion is deprecated.
112 112 # From old installations we might still have this set enabled
113 113 # we explicitly remove this now here to make sure it wont propagate further
114 114
115 115 if baseui.config(b'extensions', b'hgsubversion') is not None:
116 116 for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
117 117 if b'extensions' in cfg:
118 118 if b'hgsubversion' in cfg[b'extensions']:
119 119 del cfg[b'extensions'][b'hgsubversion']
120 120
121 121
122 122 def make_hg_ui_from_config(repo_config):
123 123 baseui = mercurial.ui.ui()
124 124
125 125 # clean the baseui object
126 126 baseui._ocfg = mercurial.config.config()
127 127 baseui._ucfg = mercurial.config.config()
128 128 baseui._tcfg = mercurial.config.config()
129 129
130 130 for section, option, value in repo_config:
131 131 baseui.setconfig(
132 132 ascii_bytes(section, allow_bytes=True),
133 133 ascii_bytes(option, allow_bytes=True),
134 134 ascii_bytes(value, allow_bytes=True))
135 135
136 136 # make our hgweb quiet so it doesn't print output
137 137 baseui.setconfig(b'ui', b'quiet', b'true')
138 138
139 139 return baseui
140 140
141 141
142 142 def update_hg_ui_from_hgrc(baseui, repo_path):
143 143 path = os.path.join(repo_path, '.hg', 'hgrc')
144 144
145 145 if not os.path.isfile(path):
146 146 log.debug('hgrc file is not present at %s, skipping...', path)
147 147 return
148 148 log.debug('reading hgrc from %s', path)
149 149 cfg = mercurial.config.config()
150 150 cfg.read(ascii_bytes(path))
151 151 for section in HG_UI_SECTIONS:
152 152 for k, v in cfg.items(section):
153 153 log.debug('settings ui from file: [%s] %s=%s', section, k, v)
154 154 baseui.setconfig(
155 155 ascii_bytes(section, allow_bytes=True),
156 156 ascii_bytes(k, allow_bytes=True),
157 157 ascii_bytes(v, allow_bytes=True))
158 158
159 159
160 160 def create_hg_wsgi_app(repo_path, repo_name, config):
161 161 """
162 162 Prepares a WSGI application to handle Mercurial requests.
163 163
164 164 :param config: is a list of 3-item tuples representing a ConfigObject
165 165 (it is the serialized version of the config object).
166 166 """
167 167 log.debug("Creating Mercurial WSGI application")
168 168
169 169 baseui = make_hg_ui_from_config(config)
170 170 update_hg_ui_from_hgrc(baseui, repo_path)
171 171 sanitize_hg_ui(baseui)
172 172
173 173 try:
174 174 return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
175 175 except mercurial.error.RequirementError as e:
176 176 raise exceptions.RequirementException(e)(e)
177 177
178 178
179 179 class GitHandler:
180 180 """
181 181 Handler for Git operations like push/pull etc
182 182 """
183 183 def __init__(self, repo_location, repo_name, git_path, update_server_info,
184 184 extras):
185 185 if not os.path.isdir(repo_location):
186 186 raise OSError(repo_location)
187 187 self.content_path = repo_location
188 188 self.repo_name = repo_name
189 189 self.repo_location = repo_location
190 190 self.extras = extras
191 191 self.git_path = git_path
192 192 self.update_server_info = update_server_info
193 193
194 194 def __call__(self, environ, start_response):
195 195 app = webob.exc.HTTPNotFound()
196 196 candidate_paths = (
197 197 self.content_path, os.path.join(self.content_path, '.git'))
198 198
199 199 for content_path in candidate_paths:
200 200 try:
201 201 app = pygrack.GitRepository(
202 202 self.repo_name, content_path, self.git_path,
203 203 self.update_server_info, self.extras)
204 204 break
205 205 except OSError:
206 206 continue
207 207
208 208 return app(environ, start_response)
209 209
210 210
211 211 def create_git_wsgi_app(repo_path, repo_name, config):
212 212 """
213 213 Creates a WSGI application to handle Git requests.
214 214
215 215 :param config: is a dictionary holding the extras.
216 216 """
217 git_path = settings.GIT_EXECUTABLE
217 git_path = settings.GIT_EXECUTABLE()
218 218 update_server_info = config.pop('git_update_server_info')
219 219 app = GitHandler(
220 220 repo_path, repo_name, git_path, update_server_info, config)
221 221
222 222 return app
223 223
224 224
225 225 class GitLFSHandler:
226 226 """
227 227 Handler for Git LFS operations
228 228 """
229 229
230 230 def __init__(self, repo_location, repo_name, git_path, update_server_info,
231 231 extras):
232 232 if not os.path.isdir(repo_location):
233 233 raise OSError(repo_location)
234 234 self.content_path = repo_location
235 235 self.repo_name = repo_name
236 236 self.repo_location = repo_location
237 237 self.extras = extras
238 238 self.git_path = git_path
239 239 self.update_server_info = update_server_info
240 240
241 241 def get_app(self, git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
242 242 app = git_lfs.create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
243 243 return app
244 244
245 245
246 246 def create_git_lfs_wsgi_app(repo_path, repo_name, config):
247 git_path = settings.GIT_EXECUTABLE
247 git_path = settings.GIT_EXECUTABLE()
248 248 update_server_info = config.pop('git_update_server_info')
249 249 git_lfs_enabled = config.pop('git_lfs_enabled')
250 250 git_lfs_store_path = config.pop('git_lfs_store_path')
251 251 git_lfs_http_scheme = config.pop('git_lfs_http_scheme', 'http')
252 252 app = GitLFSHandler(
253 253 repo_path, repo_name, git_path, update_server_info, config)
254 254
255 255 return app.get_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
@@ -1,22 +1,31 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 import os
17 18
18 19 WIRE_ENCODING = 'UTF-8'
19 GIT_EXECUTABLE = 'git'
20 SVN_EXECUTABLE = 'svn'
21 SVNLOOK_EXECUTABLE = 'svnlook'
20
21 # Path where we can find binary dir
22 22 BINARY_DIR = ''
23
24 def GIT_EXECUTABLE() -> str:
25 return os.environ.get('RC_GIT_EXECUTABLE') or os.path.join(BINARY_DIR, 'git')
26
27 def SVN_EXECUTABLE() -> str:
28 return os.environ.get('RC_SVN_EXECUTABLE') or os.path.join(BINARY_DIR, 'svn')
29
30 def SVNLOOK_EXECUTABLE() -> str:
31 return os.environ.get('RC_SVNLOOK_EXECUTABLE') or os.path.join(BINARY_DIR, 'svnlook')
@@ -1,144 +1,158 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import typing
19 19 import base64
20 20 import logging
21 21
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 26 def safe_int(val, default=None) -> int:
27 27 """
28 28 Returns int() of val if val is not convertable to int use default
29 29 instead
30 30
31 31 :param val:
32 32 :param default:
33 33 """
34 34
35 35 try:
36 36 val = int(val)
37 37 except (ValueError, TypeError):
38 38 val = default
39 39
40 40 return val
41 41
42 42
43 43 def base64_to_str(text) -> str:
44 44 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
45 45
46 46
47 47 def get_default_encodings() -> list[str]:
48 48 return ['utf8']
49 49
50 50
51 51 def safe_str(str_, to_encoding=None) -> str:
52 52 """
53 53 safe str function. Does few trick to turn unicode_ into string
54 54
55 55 :param str_: str to encode
56 56 :param to_encoding: encode to this type UTF8 default
57 57 """
58 58 if isinstance(str_, str):
59 59 return str_
60 60
61 61 # if it's bytes cast to str
62 62 if not isinstance(str_, bytes):
63 63 return str(str_)
64 64
65 65 to_encoding = to_encoding or get_default_encodings()
66 66 if not isinstance(to_encoding, (list, tuple)):
67 67 to_encoding = [to_encoding]
68 68
69 69 for enc in to_encoding:
70 70 try:
71 71 return str(str_, enc)
72 72 except UnicodeDecodeError:
73 73 pass
74 74
75 75 return str(str_, to_encoding[0], 'replace')
76 76
77 77
78 78 def safe_bytes(str_, from_encoding=None) -> bytes:
79 79 """
80 80 safe bytes function. Does few trick to turn str_ into bytes string:
81 81
82 82 :param str_: string to decode
83 83 :param from_encoding: encode from this type UTF8 default
84 84 """
85 85 if isinstance(str_, bytes):
86 86 return str_
87 87
88 88 if not isinstance(str_, str):
89 89 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
90 90
91 91 from_encoding = from_encoding or get_default_encodings()
92 92 if not isinstance(from_encoding, (list, tuple)):
93 93 from_encoding = [from_encoding]
94 94
95 95 for enc in from_encoding:
96 96 try:
97 97 return str_.encode(enc)
98 98 except UnicodeDecodeError:
99 99 pass
100 100
101 101 return str_.encode(from_encoding[0], 'replace')
102 102
103 103
104 104 def ascii_bytes(str_, allow_bytes=False) -> bytes:
105 105 """
106 106 Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
107 107 Fails with UnicodeError on invalid input.
108 108 This should be used where encoding and "safe" ambiguity should be avoided.
109 109 Where strings already have been encoded in other ways but still are unicode
110 110 string - for example to hex, base64, json, urlencoding, or are known to be
111 111 identifiers.
112 112 """
113 113 if allow_bytes and isinstance(str_, bytes):
114 114 return str_
115 115
116 116 if not isinstance(str_, str):
117 117 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
118 118 return str_.encode('ascii')
119 119
120 120
121 121 def ascii_str(str_) -> str:
122 122 """
123 123 Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
124 124 Fails with UnicodeError on invalid input.
125 125 This should be used where encoding and "safe" ambiguity should be avoided.
126 126 Where strings are encoded but also in other ways are known to be ASCII, and
127 127 where a unicode string is wanted without caring about encoding. For example
128 128 to hex, base64, urlencoding, or are known to be identifiers.
129 129 """
130 130
131 131 if not isinstance(str_, bytes):
132 132 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
133 133 return str_.decode('ascii')
134 134
135 135
136 136 def convert_to_str(data):
137 137 if isinstance(data, bytes):
138 138 return safe_str(data)
139 139 elif isinstance(data, tuple):
140 140 return tuple(convert_to_str(item) for item in data)
141 141 elif isinstance(data, list):
142 142 return list(convert_to_str(item) for item in data)
143 143 else:
144 144 return data
145
146
147 def splitnewlines(text: bytes):
148 """
149 like splitlines, but only split on newlines.
150 """
151
152 lines = [_l + b'\n' for _l in text.split(b'\n')]
153 if lines:
154 if lines[-1] == b'\n':
155 lines.pop()
156 else:
157 lines[-1] = lines[-1][:-1]
158 return lines No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now